video_core: Add basic vertex, index buffer handling and pipeline caching

This commit is contained in:
raphaelthegreat 2024-05-24 23:50:56 +03:00
parent 0eaa7d5859
commit f480d091ce
27 changed files with 506 additions and 174 deletions

2
.gitmodules vendored
View File

@ -50,7 +50,7 @@
[submodule "externals/toml11"] [submodule "externals/toml11"]
path = externals/toml11 path = externals/toml11
url = https://github.com/ToruNiina/toml11.git url = https://github.com/ToruNiina/toml11.git
[submodule "externals/xxHash"] [submodule "externals/xxhash"]
path = externals/xxHash path = externals/xxHash
url = https://github.com/Cyan4973/xxHash.git url = https://github.com/Cyan4973/xxHash.git
[submodule "externals/zydis"] [submodule "externals/zydis"]

View File

@ -74,8 +74,8 @@ add_subdirectory(magic_enum EXCLUDE_FROM_ALL)
add_subdirectory(toml11 EXCLUDE_FROM_ALL) add_subdirectory(toml11 EXCLUDE_FROM_ALL)
# xxHash # xxHash
add_library(xxhash INTERFACE) add_library(xxhash xxhash/xxhash.h xxhash/xxhash.c)
target_include_directories(xxhash INTERFACE xxhash) target_include_directories(xxhash PUBLIC xxhash)
# Zydis # Zydis
option(ZYDIS_BUILD_TOOLS "" OFF) option(ZYDIS_BUILD_TOOLS "" OFF)
@ -92,4 +92,4 @@ endif()
add_subdirectory(sirit EXCLUDE_FROM_ALL) add_subdirectory(sirit EXCLUDE_FROM_ALL)
if (WIN32) if (WIN32)
target_compile_options(sirit PUBLIC "-Wno-error=unused-command-line-argument") target_compile_options(sirit PUBLIC "-Wno-error=unused-command-line-argument")
endif() endif()

View File

@ -7,6 +7,7 @@
#include "common/scope_exit.h" #include "common/scope_exit.h"
#include "core/libraries/error_codes.h" #include "core/libraries/error_codes.h"
#include "core/memory.h" #include "core/memory.h"
#include "video_core/renderer_vulkan/vk_instance.h"
namespace Core { namespace Core {
@ -61,6 +62,10 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M
new_vma.prot = prot; new_vma.prot = prot;
new_vma.name = name; new_vma.name = name;
new_vma.type = type; new_vma.type = type;
if (type == VMAType::Direct) {
MapVulkanMemory(mapped_addr, size);
}
}; };
// When virtual addr is zero let the address space manager pick the address. // When virtual addr is zero let the address space manager pick the address.
@ -103,6 +108,10 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) {
ASSERT_MSG(it != vma_map.end() && it->first == virtual_addr, ASSERT_MSG(it != vma_map.end() && it->first == virtual_addr,
"Attempting to unmap partially mapped range"); "Attempting to unmap partially mapped range");
if (it->second.type == VMAType::Direct) {
UnmapVulkanMemory(virtual_addr, size);
}
// Mark region as free and attempt to coalesce it with neighbours. // Mark region as free and attempt to coalesce it with neighbours.
auto& vma = it->second; auto& vma = it->second;
vma.type = VMAType::Free; vma.type = VMAType::Free;
@ -114,6 +123,13 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) {
impl.Unmap(virtual_addr, size); impl.Unmap(virtual_addr, size);
} }
std::pair<vk::Buffer, size_t> MemoryManager::GetVulkanBuffer(VAddr addr) {
auto it = mapped_memories.upper_bound(addr);
it = std::prev(it);
ASSERT(it != mapped_memories.end() && it->first <= addr);
return std::make_pair(*it->second.buffer, addr - it->first);
}
VirtualMemoryArea& MemoryManager::AddMapping(VAddr virtual_addr, size_t size) { VirtualMemoryArea& MemoryManager::AddMapping(VAddr virtual_addr, size_t size) {
auto vma_handle = FindVMA(virtual_addr); auto vma_handle = FindVMA(virtual_addr);
ASSERT_MSG(vma_handle != vma_map.end(), "Virtual address not in vm_map"); ASSERT_MSG(vma_handle != vma_map.end(), "Virtual address not in vm_map");
@ -171,4 +187,81 @@ MemoryManager::VMAHandle MemoryManager::MergeAdjacent(VMAHandle iter) {
return iter; return iter;
} }
void MemoryManager::MapVulkanMemory(VAddr addr, size_t size) {
const vk::Device device = instance->GetDevice();
const auto memory_props = instance->GetPhysicalDevice().getMemoryProperties();
void* host_pointer = reinterpret_cast<void*>(addr);
const auto host_mem_props = device.getMemoryHostPointerPropertiesEXT(
vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT, host_pointer);
ASSERT(host_mem_props.memoryTypeBits != 0);
int mapped_memory_type = -1;
auto find_mem_type_with_flag = [&](const vk::MemoryPropertyFlags flags) {
u32 host_mem_types = host_mem_props.memoryTypeBits;
while (host_mem_types != 0) {
// Try to find a cached memory type
mapped_memory_type = std::countr_zero(host_mem_types);
host_mem_types -= (1 << mapped_memory_type);
if ((memory_props.memoryTypes[mapped_memory_type].propertyFlags & flags) == flags) {
return;
}
}
mapped_memory_type = -1;
};
// First try to find a memory that is both coherent and cached
find_mem_type_with_flag(vk::MemoryPropertyFlagBits::eHostCoherent |
vk::MemoryPropertyFlagBits::eHostCached);
if (mapped_memory_type == -1)
// Then only coherent (lower performance)
find_mem_type_with_flag(vk::MemoryPropertyFlagBits::eHostCoherent);
if (mapped_memory_type == -1) {
LOG_CRITICAL(Render_Vulkan, "No coherent memory available for memory mapping");
mapped_memory_type = std::countr_zero(host_mem_props.memoryTypeBits);
}
const vk::StructureChain alloc_info = {
vk::MemoryAllocateInfo{
.allocationSize = size,
.memoryTypeIndex = static_cast<uint32_t>(mapped_memory_type),
},
vk::ImportMemoryHostPointerInfoEXT{
.handleType = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT,
.pHostPointer = host_pointer,
},
};
const auto [it, new_memory] = mapped_memories.try_emplace(addr);
ASSERT_MSG(new_memory, "Attempting to remap already mapped vulkan memory");
auto& memory = it->second;
memory.backing = device.allocateMemoryUnique(alloc_info.get());
constexpr vk::BufferUsageFlags MapFlags =
vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eVertexBuffer |
vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst |
vk::BufferUsageFlagBits::eUniformBuffer;
const vk::StructureChain buffer_info = {
vk::BufferCreateInfo{
.size = size,
.usage = MapFlags,
.sharingMode = vk::SharingMode::eExclusive,
},
vk::ExternalMemoryBufferCreateInfoKHR{
.handleTypes = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT,
}};
memory.buffer = device.createBufferUnique(buffer_info.get());
device.bindBufferMemory(*memory.buffer, *memory.backing, 0);
}
void MemoryManager::UnmapVulkanMemory(VAddr addr, size_t size) {
const auto it = mapped_memories.find(addr);
ASSERT(it != mapped_memories.end() && it->second.buffer_size == size);
mapped_memories.erase(it);
}
} // namespace Core } // namespace Core

View File

@ -3,6 +3,7 @@
#pragma once #pragma once
#include <functional>
#include <string_view> #include <string_view>
#include <vector> #include <vector>
#include <boost/icl/split_interval_map.hpp> #include <boost/icl/split_interval_map.hpp>
@ -10,6 +11,11 @@
#include "common/singleton.h" #include "common/singleton.h"
#include "common/types.h" #include "common/types.h"
#include "core/address_space.h" #include "core/address_space.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
class Instance;
}
namespace Core { namespace Core {
@ -86,6 +92,10 @@ public:
explicit MemoryManager(); explicit MemoryManager();
~MemoryManager(); ~MemoryManager();
void SetInstance(const Vulkan::Instance* instance_) {
instance = instance_;
}
PAddr Allocate(PAddr search_start, PAddr search_end, size_t size, u64 alignment, PAddr Allocate(PAddr search_start, PAddr search_end, size_t size, u64 alignment,
int memory_type); int memory_type);
@ -97,11 +107,9 @@ public:
void UnmapMemory(VAddr virtual_addr, size_t size); void UnmapMemory(VAddr virtual_addr, size_t size);
private: std::pair<vk::Buffer, size_t> GetVulkanBuffer(VAddr addr);
bool HasOverlap(VAddr addr, size_t size) const {
return vma_map.find(addr) != vma_map.end();
}
private:
VMAHandle FindVMA(VAddr target) { VMAHandle FindVMA(VAddr target) {
// Return first the VMA with base >= target. // Return first the VMA with base >= target.
const auto it = vma_map.lower_bound(target); const auto it = vma_map.lower_bound(target);
@ -117,10 +125,22 @@ private:
VMAHandle MergeAdjacent(VMAHandle iter); VMAHandle MergeAdjacent(VMAHandle iter);
void MapVulkanMemory(VAddr addr, size_t size);
void UnmapVulkanMemory(VAddr addr, size_t size);
private: private:
AddressSpace impl; AddressSpace impl;
std::vector<DirectMemoryArea> allocations; std::vector<DirectMemoryArea> allocations;
VMAMap vma_map; VMAMap vma_map;
struct MappedMemory {
vk::UniqueBuffer buffer;
vk::UniqueDeviceMemory backing;
size_t buffer_size;
};
std::map<VAddr, MappedMemory> mapped_memories;
const Vulkan::Instance* instance{};
}; };
using Memory = Common::Singleton<MemoryManager>; using Memory = Common::Singleton<MemoryManager>;

View File

@ -175,12 +175,14 @@ void EmitContext::DefineInputs(const IR::Program& program) {
const Id id{DefineInput(type, input.binding)}; const Id id{DefineInput(type, input.binding)};
Name(id, fmt::format("vs_in_attr{}", input.binding)); Name(id, fmt::format("vs_in_attr{}", input.binding));
input_params[input.binding] = GetAttributeInfo(input.fmt, id); input_params[input.binding] = GetAttributeInfo(input.fmt, id);
interfaces.push_back(id);
} }
break; break;
case Stage::Fragment: case Stage::Fragment:
for (const auto& input : info.ps_inputs) { for (const auto& input : info.ps_inputs) {
if (input.is_default) { if (input.is_default) {
input_params[input.semantic] = {MakeDefaultValue(*this, input.default_value), input_f32, F32[1]}; input_params[input.semantic] = {MakeDefaultValue(*this, input.default_value),
input_f32, F32[1]};
continue; continue;
} }
const IR::Attribute param{IR::Attribute::Param0 + input.param_index}; const IR::Attribute param{IR::Attribute::Param0 + input.param_index};
@ -192,6 +194,7 @@ void EmitContext::DefineInputs(const IR::Program& program) {
} }
Name(id, fmt::format("fs_in_attr{}", input.semantic)); Name(id, fmt::format("fs_in_attr{}", input.semantic));
input_params[input.semantic] = {id, input_f32, F32[1], num_components}; input_params[input.semantic] = {id, input_f32, F32[1], num_components};
interfaces.push_back(id);
} }
default: default:
break; break;
@ -212,6 +215,7 @@ void EmitContext::DefineOutputs(const IR::Program& program) {
const Id id{DefineOutput(F32[num_components], i)}; const Id id{DefineOutput(F32[num_components], i)};
Name(id, fmt::format("out_attr{}", i)); Name(id, fmt::format("out_attr{}", i));
output_params[i] = {id, output_f32, F32[1], num_components}; output_params[i] = {id, output_f32, F32[1], num_components};
interfaces.push_back(id);
} }
break; break;
case Stage::Fragment: case Stage::Fragment:

View File

@ -40,7 +40,7 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
struct VsharpLoad { struct VsharpLoad {
u32 dword_offset{}; u32 dword_offset{};
s32 base_sgpr{}; s32 base_sgpr{};
s32 dst_sgpr{-1}; s32 dst_reg{-1};
}; };
boost::container::static_vector<VsharpLoad, 16> loads; boost::container::static_vector<VsharpLoad, 16> loads;
@ -57,11 +57,13 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
} }
if (inst.inst_class == InstClass::VectorMemBufFmt) { if (inst.inst_class == InstClass::VectorMemBufFmt) {
// SRSRC is in units of 4 SPGRs while SBASE is in pairs of SGPRs
const u32 base_sgpr = inst.src[2].code * 4;
// Find the load instruction that loaded the V# to the SPGR. // Find the load instruction that loaded the V# to the SPGR.
// This is so we can determine its index in the vertex table. // This is so we can determine its index in the vertex table.
const auto it = std::ranges::find_if(loads, [&](VsharpLoad& load) { const auto it = std::ranges::find_if(
return load.dst_sgpr == inst.src[2].code * 4; loads, [&](VsharpLoad& load) { return load.dst_reg == base_sgpr; });
});
auto& attrib = attributes.emplace_back(); auto& attrib = attributes.emplace_back();
attrib.semantic = semantic_index++; attrib.semantic = semantic_index++;
@ -71,7 +73,7 @@ std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
attrib.dword_offset = it->dword_offset; attrib.dword_offset = it->dword_offset;
// Mark load as used. // Mark load as used.
it->dst_sgpr = -1; it->dst_reg = -1;
} }
} }

View File

@ -9,11 +9,11 @@
namespace Shader::Gcn { namespace Shader::Gcn {
struct VertexAttribute { struct VertexAttribute {
u8 semantic; ///< Semantic index of the attribute u8 semantic; ///< Semantic index of the attribute
u8 dest_vgpr; ///< Destination VGPR to load first component u8 dest_vgpr; ///< Destination VGPR to load first component.
u8 num_elements; ///< Number of components to load u8 num_elements; ///< Number of components to load
u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V# u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V#
u8 dword_offset; ///< The dword offset of the V# that describes this attribute. u8 dword_offset; ///< The dword offset of the V# that describes this attribute.
}; };
std::vector<VertexAttribute> ParseFetchShader(const u32* code); std::vector<VertexAttribute> ParseFetchShader(const u32* code);

View File

@ -2,8 +2,8 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/exception.h" #include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/frontend/fetch_shader.h"
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/runtime_info.h" #include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/resource.h" #include "video_core/amdgpu/resource.h"
@ -103,20 +103,21 @@ void Translator::EmitFetch(const GcnInst& inst) {
// Parse the assembly to generate a list of attributes. // Parse the assembly to generate a list of attributes.
const auto attribs = ParseFetchShader(code); const auto attribs = ParseFetchShader(code);
for (const auto& attrib : attribs) { for (const auto& attrib : attribs) {
IR::VectorReg dst_reg{attrib.dest_vgpr};
const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic}; const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic};
IR::VectorReg dst_reg{attrib.dest_vgpr};
for (u32 i = 0; i < attrib.num_elements; i++) { for (u32 i = 0; i < attrib.num_elements; i++) {
ir.SetVectorReg(dst_reg++, ir.GetAttribute(attr, i)); ir.SetVectorReg(dst_reg++, ir.GetAttribute(attr, i));
} }
// Read the V# of the attribute to figure out component number and type. // Read the V# of the attribute to figure out component number and type.
const auto buffer = info.ReadUd<AmdGpu::Buffer>(attrib.sgpr_base, const auto buffer = info.ReadUd<AmdGpu::Buffer>(attrib.sgpr_base, attrib.dword_offset);
attrib.dword_offset);
const u32 num_components = AmdGpu::NumComponents(buffer.data_format); const u32 num_components = AmdGpu::NumComponents(buffer.data_format);
info.vs_inputs.push_back({ info.vs_inputs.push_back({
.fmt = buffer.num_format, .fmt = buffer.num_format,
.binding = attrib.semantic, .binding = attrib.semantic,
.num_components = std::min<u16>(attrib.num_elements, num_components), .num_components = std::min<u16>(attrib.num_elements, num_components),
.sgpr_base = attrib.sgpr_base,
.dword_offset = attrib.dword_offset,
}); });
} }
} }

View File

@ -12,8 +12,6 @@
namespace Shader::IR { namespace Shader::IR {
struct Program { struct Program {
explicit Program(const Info&& info_) : info{info_} {}
AbstractSyntaxList syntax_list; AbstractSyntaxList syntax_list;
BlockList blocks; BlockList blocks;
BlockList post_order_blocks; BlockList post_order_blocks;

View File

@ -44,7 +44,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
file.close(); file.close();
// Decode and save instructions // Decode and save instructions
IR::Program program{std::move(info)}; IR::Program program;
program.ins_list.reserve(token.size()); program.ins_list.reserve(token.size());
while (!slice.atEnd()) { while (!slice.atEnd()) {
program.ins_list.emplace_back(decoder.decodeInstruction(slice)); program.ins_list.emplace_back(decoder.decodeInstruction(slice));
@ -55,6 +55,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
Gcn::CFG cfg{gcn_block_pool, program.ins_list}; Gcn::CFG cfg{gcn_block_pool, program.ins_list};
// Structurize control flow graph and create program. // Structurize control flow graph and create program.
program.info = std::move(info);
program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, program.info); program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, program.info);
program.blocks = GenerateBlocks(program.syntax_list); program.blocks = GenerateBlocks(program.syntax_list);
program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front()); program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());

View File

@ -4,8 +4,8 @@
#pragma once #pragma once
#include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/object_pool.h"
#include "shader_recompiler/ir/program.h" #include "shader_recompiler/ir/program.h"
#include "shader_recompiler/object_pool.h"
namespace Shader { namespace Shader {
@ -30,7 +30,6 @@ struct BinaryInfo {
[[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, [[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool, ObjectPool<IR::Block>& block_pool,
std::span<const u32> code, std::span<const u32> code, const Info&& info);
const Info&& info);
} // namespace Shader } // namespace Shader

View File

@ -40,12 +40,12 @@ enum class TextureType : u32 {
constexpr u32 NUM_TEXTURE_TYPES = 7; constexpr u32 NUM_TEXTURE_TYPES = 7;
struct Info { struct Info {
explicit Info(std::span<const u32, 16> user_data_) : user_data{user_data_} {}
struct VsInput { struct VsInput {
AmdGpu::NumberFormat fmt; AmdGpu::NumberFormat fmt;
u16 binding; u16 binding;
u16 num_components; u16 num_components;
u8 sgpr_base;
u8 dword_offset;
}; };
boost::container::static_vector<VsInput, 32> vs_inputs{}; boost::container::static_vector<VsInput, 32> vs_inputs{};
@ -60,29 +60,33 @@ struct Info {
struct AttributeFlags { struct AttributeFlags {
bool Get(IR::Attribute attrib, u32 comp = 0) const { bool Get(IR::Attribute attrib, u32 comp = 0) const {
return flags[static_cast<size_t>(attrib)] & (1 << comp); return flags[Index(attrib)] & (1 << comp);
} }
bool GetAny(IR::Attribute attrib) const { bool GetAny(IR::Attribute attrib) const {
return flags[static_cast<size_t>(attrib)]; return flags[Index(attrib)];
} }
void Set(IR::Attribute attrib, u32 comp = 0) { void Set(IR::Attribute attrib, u32 comp = 0) {
flags[static_cast<size_t>(attrib)] |= (1 << comp); flags[Index(attrib)] |= (1 << comp);
} }
u32 NumComponents(IR::Attribute attrib) const { u32 NumComponents(IR::Attribute attrib) const {
const u8 mask = flags[static_cast<size_t>(attrib)]; const u8 mask = flags[Index(attrib)];
ASSERT(mask != 0b1011 || mask != 0b1101); ASSERT(mask != 0b1011 || mask != 0b1101);
return std::popcount(mask); return std::popcount(mask);
} }
static size_t Index(IR::Attribute attrib) {
return static_cast<size_t>(attrib);
}
std::array<u8, IR::NumAttributes> flags; std::array<u8, IR::NumAttributes> flags;
}; };
AttributeFlags loads{}; AttributeFlags loads{};
AttributeFlags stores{}; AttributeFlags stores{};
std::span<const u32, 16> user_data; std::span<const u32> user_data;
Stage stage; Stage stage;
template <typename T> template <typename T>

View File

@ -114,7 +114,7 @@ void Liverpool::ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes) {
regs.num_indices = draw_index->index_count; regs.num_indices = draw_index->index_count;
regs.draw_initiator = draw_index->draw_initiator; regs.draw_initiator = draw_index->draw_initiator;
if (rasterizer) { if (rasterizer) {
rasterizer->DrawIndex(); rasterizer->Draw(true);
} }
break; break;
} }
@ -122,7 +122,9 @@ void Liverpool::ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes) {
const auto* draw_index = reinterpret_cast<const PM4CmdDrawIndexAuto*>(header); const auto* draw_index = reinterpret_cast<const PM4CmdDrawIndexAuto*>(header);
regs.num_indices = draw_index->index_count; regs.num_indices = draw_index->index_count;
regs.draw_initiator = draw_index->draw_initiator; regs.draw_initiator = draw_index->draw_initiator;
rasterizer->DrawIndex(); if (rasterizer) {
rasterizer->Draw(false);
}
break; break;
} }
case PM4ItOpcode::DispatchDirect: { case PM4ItOpcode::DispatchDirect: {

View File

@ -180,25 +180,6 @@ struct Liverpool {
BitField<31, 1, u32> disable_color_writes_on_depth_pass; BitField<31, 1, u32> disable_color_writes_on_depth_pass;
}; };
union DepthSize {
u32 raw;
BitField<0, 11, u32> pitch_tile_max;
BitField<11, 11, u32> height_tile_max;
u32 Pitch() const {
return (pitch_tile_max + 1) << 3;
}
u32 Height() const {
return (height_tile_max + 1) << 3;
}
};
union DepthSlice {
u32 raw;
BitField<0, 22, u32> slice_tile_max;
};
enum class StencilFunc : u32 { enum class StencilFunc : u32 {
Keep = 0, Keep = 0,
Zero = 1, Zero = 1,
@ -236,9 +217,45 @@ struct Liverpool {
BitField<24, 8, u32> stencil_op_val; BitField<24, 8, u32> stencil_op_val;
}; };
union StencilInfo { struct DepthBuffer {
u32 raw; enum class ZFormat : u32 {
BitField<0, 1, u32> format; Invald = 0,
Z16 = 1,
Z32Float = 2,
};
enum class StencilFormat : u32 {
Invalid = 0,
Stencil8 = 1,
};
union {
BitField<0, 2, ZFormat> format;
BitField<2, 2, u32> num_samples;
BitField<13, 3, u32> tile_split;
} z_info;
union {
BitField<0, 1, StencilFormat> format;
} stencil_info;
u32 z_read_base;
u32 stencil_read_base;
u32 z_write_base;
u32 stencil_write_base;
union {
BitField<0, 11, u32> pitch_tile_max;
BitField<11, 11, u32> height_tile_max;
} depth_size;
union {
BitField<0, 22, u32> tile_max;
} depth_slice;
u32 Pitch() const {
return (depth_size.pitch_tile_max + 1) << 3;
}
u32 Height() const {
return (depth_size.height_tile_max + 1) << 3;
}
}; };
enum class ClipSpace : u32 { enum class ClipSpace : u32 {
@ -505,6 +522,12 @@ struct Liverpool {
u64 CmaskAddress() const { u64 CmaskAddress() const {
return u64(cmask_base_address) << 8; return u64(cmask_base_address) << 8;
} }
NumberFormat NumFormat() const {
// There is a small difference between T# and CB number types, account for it.
return info.number_type == AmdGpu::NumberFormat::Uscaled ? AmdGpu::NumberFormat::Srgb
: info.number_type;
}
}; };
enum class PrimitiveType : u32 { enum class PrimitiveType : u32 {
@ -539,14 +562,8 @@ struct Liverpool {
u32 stencil_clear; u32 stencil_clear;
u32 depth_clear; u32 depth_clear;
Scissor screen_scissor; Scissor screen_scissor;
INSERT_PADDING_WORDS(0xA011 - 0xA00C - 2); INSERT_PADDING_WORDS(0xA010 - 0xA00C - 2);
StencilInfo stencil_info; DepthBuffer depth_buffer;
u32 z_read_base;
u32 stencil_read_base;
u32 z_write_base;
u32 stencil_write_base;
DepthSize depth_size;
DepthSlice depth_slice;
INSERT_PADDING_WORDS(0xA08E - 0xA018); INSERT_PADDING_WORDS(0xA08E - 0xA018);
ColorBufferMask color_target_mask; ColorBufferMask color_target_mask;
ColorBufferMask color_shader_mask; ColorBufferMask color_shader_mask;
@ -595,6 +612,17 @@ struct Liverpool {
VgtNumInstances num_instances; VgtNumInstances num_instances;
}; };
std::array<u32, NumRegs> reg_array{}; std::array<u32, NumRegs> reg_array{};
const ShaderProgram* ProgramForStage(u32 index) const {
switch (index) {
case 0:
return &vs_program;
case 4:
return &ps_program;
default:
return nullptr;
}
}
}; };
Regs regs{}; Regs regs{};
@ -635,7 +663,7 @@ static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
static_assert(GFX6_3D_REG_INDEX(vs_program) == 0x2C48); static_assert(GFX6_3D_REG_INDEX(vs_program) == 0x2C48);
static_assert(GFX6_3D_REG_INDEX(vs_program.user_data) == 0x2C4C); static_assert(GFX6_3D_REG_INDEX(vs_program.user_data) == 0x2C4C);
static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C); static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C);
static_assert(GFX6_3D_REG_INDEX(depth_slice) == 0xA017); static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017);
static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E); static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E);
static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F); static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F);
static_assert(GFX6_3D_REG_INDEX(viewport_scissors) == 0xA094); static_assert(GFX6_3D_REG_INDEX(viewport_scissors) == 0xA094);

View File

@ -76,4 +76,3 @@ struct fmt::formatter<AmdGpu::NumberFormat> {
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(fmt)); return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(fmt));
} }
}; };

View File

@ -1,6 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma clang optimize off
#include "common/assert.h" #include "common/assert.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h"
@ -114,19 +114,41 @@ vk::CullModeFlags CullMode(Liverpool::CullMode mode) {
} }
vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) { vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) {
if (data_format == AmdGpu::DataFormat::Format32_32_32_32 && num_format == AmdGpu::NumberFormat::Float) { if (data_format == AmdGpu::DataFormat::Format32_32_32_32 &&
num_format == AmdGpu::NumberFormat::Float) {
return vk::Format::eR32G32B32A32Sfloat; return vk::Format::eR32G32B32A32Sfloat;
} }
if (data_format == AmdGpu::DataFormat::Format32_32_32 && num_format == AmdGpu::NumberFormat::Uint) { if (data_format == AmdGpu::DataFormat::Format32_32_32 &&
num_format == AmdGpu::NumberFormat::Uint) {
return vk::Format::eR32G32B32Uint; return vk::Format::eR32G32B32Uint;
} }
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && num_format == AmdGpu::NumberFormat::Unorm) { if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eR8G8B8A8Unorm; return vk::Format::eR8G8B8A8Unorm;
} }
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && num_format == AmdGpu::NumberFormat::Srgb) { if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
num_format == AmdGpu::NumberFormat::Srgb) {
return vk::Format::eR8G8B8A8Srgb; return vk::Format::eR8G8B8A8Srgb;
} }
UNREACHABLE(); UNREACHABLE();
} }
vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format,
Liverpool::DepthBuffer::StencilFormat stencil_format) {
UNREACHABLE();
}
void EmitQuadToTriangleListIndices(u8* out_ptr, u32 num_vertices) {
static constexpr u16 NumVerticesPerQuad = 4;
u16* out_data = reinterpret_cast<u16*>(out_ptr);
for (u16 i = 0; i < num_vertices; i += NumVerticesPerQuad) {
*out_data++ = i;
*out_data++ = i + 1;
*out_data++ = i + 2;
*out_data++ = i + 2;
*out_data++ = i;
*out_data++ = i + 3;
}
}
} // namespace Vulkan::LiverpoolToVK } // namespace Vulkan::LiverpoolToVK

View File

@ -23,4 +23,9 @@ vk::CullModeFlags CullMode(Liverpool::CullMode mode);
vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format); vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format);
vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format,
Liverpool::DepthBuffer::StencilFormat stencil_format);
void EmitQuadToTriangleListIndices(u8* out_indices, u32 num_vertices);
} // namespace Vulkan::LiverpoolToVK } // namespace Vulkan::LiverpoolToVK

View File

@ -4,22 +4,58 @@
#include <boost/container/static_vector.hpp> #include <boost/container/static_vector.hpp>
#include "common/assert.h" #include "common/assert.h"
#include "core/memory.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
namespace Vulkan { namespace Vulkan {
GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey& key_, GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_,
vk::PipelineCache pipeline_cache_, vk::PipelineLayout layout_, const PipelineKey& key_, vk::PipelineCache pipeline_cache,
std::span<const Shader::Info*, MaxShaderStages> infos,
std::array<vk::ShaderModule, MaxShaderStages> modules) std::array<vk::ShaderModule, MaxShaderStages> modules)
: instance{instance_}, pipeline_layout{layout_}, pipeline_cache{pipeline_cache_}, key{key_} { : instance{instance_}, scheduler{scheduler_}, key{key_} {
const vk::Device device = instance.GetDevice(); const vk::Device device = instance.GetDevice();
for (u32 i = 0; i < MaxShaderStages; i++) {
if (!infos[i]) {
continue;
}
stages[i] = *infos[i];
}
const vk::PipelineLayoutCreateInfo layout_info = {
.setLayoutCount = 0U,
.pSetLayouts = nullptr,
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr,
};
pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
boost::container::static_vector<vk::VertexInputBindingDescription, 32> bindings;
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> attributes;
const auto& vs_info = stages[0];
for (const auto& input : vs_info.vs_inputs) {
const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
attributes.push_back({
.location = input.binding,
.binding = input.binding,
.format = LiverpoolToVK::SurfaceFormat(buffer.data_format, buffer.num_format),
.offset = 0,
});
bindings.push_back({
.binding = input.binding,
.stride = u32(buffer.stride),
.inputRate = vk::VertexInputRate::eVertex,
});
}
const vk::PipelineVertexInputStateCreateInfo vertex_input_info = { const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
.vertexBindingDescriptionCount = 0U, .vertexBindingDescriptionCount = static_cast<u32>(bindings.size()),
.pVertexBindingDescriptions = nullptr, .pVertexBindingDescriptions = bindings.data(),
.vertexAttributeDescriptionCount = 0U, .vertexAttributeDescriptionCount = static_cast<u32>(attributes.size()),
.pVertexAttributeDescriptions = nullptr, .pVertexAttributeDescriptions = attributes.data(),
}; };
const vk::PipelineInputAssemblyStateCreateInfo input_assembly = { const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
@ -126,11 +162,12 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey&
.pName = "main", .pName = "main",
}; };
const vk::Format color_format = vk::Format::eR8G8B8A8Srgb; const auto it = std::ranges::find(key.color_formats, vk::Format::eUndefined);
const u32 num_color_formats = std::distance(key.color_formats.begin(), it);
const vk::PipelineRenderingCreateInfoKHR pipeline_rendering_ci = { const vk::PipelineRenderingCreateInfoKHR pipeline_rendering_ci = {
.colorAttachmentCount = 1, .colorAttachmentCount = num_color_formats,
.pColorAttachmentFormats = &color_format, .pColorAttachmentFormats = key.color_formats.data(),
.depthAttachmentFormat = vk::Format::eUndefined, .depthAttachmentFormat = key.depth.depth_enable ? key.depth_format : vk::Format::eUndefined,
.stencilAttachmentFormat = vk::Format::eUndefined, .stencilAttachmentFormat = vk::Format::eUndefined,
}; };
@ -146,7 +183,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey&
.pDepthStencilState = &depth_info, .pDepthStencilState = &depth_info,
.pColorBlendState = &color_blending, .pColorBlendState = &color_blending,
.pDynamicState = &dynamic_info, .pDynamicState = &dynamic_info,
.layout = pipeline_layout, .layout = *pipeline_layout,
}; };
auto result = device.createGraphicsPipelineUnique(pipeline_cache, pipeline_info); auto result = device.createGraphicsPipelineUnique(pipeline_cache, pipeline_info);
@ -159,4 +196,20 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey&
GraphicsPipeline::~GraphicsPipeline() = default; GraphicsPipeline::~GraphicsPipeline() = default;
void GraphicsPipeline::BindResources(Core::MemoryManager* memory) const {
std::array<vk::Buffer, MaxVertexBufferCount> buffers;
std::array<vk::DeviceSize, MaxVertexBufferCount> offsets;
const auto& vs_info = stages[0];
const size_t num_buffers = vs_info.vs_inputs.size();
for (u32 i = 0; i < num_buffers; ++i) {
const auto& input = vs_info.vs_inputs[i];
const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
std::tie(buffers[i], offsets[i]) = memory->GetVulkanBuffer(buffer.base_address);
}
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindVertexBuffers(0, num_buffers, buffers.data(), offsets.data());
}
} // namespace Vulkan } // namespace Vulkan

View File

@ -1,19 +1,31 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <xxhash.h>
#include "common/types.h" #include "common/types.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_common.h" #include "video_core/renderer_vulkan/vk_common.h"
namespace Core {
class MemoryManager;
}
namespace Vulkan { namespace Vulkan {
static constexpr u32 MaxVertexBufferCount = 32;
static constexpr u32 MaxShaderStages = 5; static constexpr u32 MaxShaderStages = 5;
class Instance; class Instance;
class Scheduler;
using Liverpool = AmdGpu::Liverpool; using Liverpool = AmdGpu::Liverpool;
struct PipelineKey { struct PipelineKey {
std::array<size_t, MaxShaderStages> stage_hashes;
std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
vk::Format depth_format;
Liverpool::DepthControl depth; Liverpool::DepthControl depth;
Liverpool::StencilControl stencil; Liverpool::StencilControl stencil;
Liverpool::StencilRefMask stencil_ref_front; Liverpool::StencilRefMask stencil_ref_front;
@ -21,26 +33,41 @@ struct PipelineKey {
Liverpool::PrimitiveType prim_type; Liverpool::PrimitiveType prim_type;
Liverpool::PolygonMode polygon_mode; Liverpool::PolygonMode polygon_mode;
Liverpool::CullMode cull_mode; Liverpool::CullMode cull_mode;
bool operator==(const PipelineKey& key) const noexcept {
return std::memcmp(this, &key, sizeof(PipelineKey)) == 0;
}
}; };
static_assert(std::has_unique_object_representations_v<PipelineKey>); static_assert(std::has_unique_object_representations_v<PipelineKey>);
class GraphicsPipeline { class GraphicsPipeline {
public: public:
explicit GraphicsPipeline(const Instance& instance, const PipelineKey& key, explicit GraphicsPipeline(const Instance& instance, Scheduler& scheduler,
vk::PipelineCache pipeline_cache, vk::PipelineLayout layout, const PipelineKey& key, vk::PipelineCache pipeline_cache,
std::span<const Shader::Info*, MaxShaderStages> infos,
std::array<vk::ShaderModule, MaxShaderStages> modules); std::array<vk::ShaderModule, MaxShaderStages> modules);
~GraphicsPipeline(); ~GraphicsPipeline();
void BindResources(Core::MemoryManager* memory) const;
[[nodiscard]] vk::Pipeline Handle() const noexcept { [[nodiscard]] vk::Pipeline Handle() const noexcept {
return *pipeline; return *pipeline;
} }
private: private:
const Instance& instance; const Instance& instance;
Scheduler& scheduler;
vk::UniquePipeline pipeline; vk::UniquePipeline pipeline;
vk::PipelineLayout pipeline_layout; vk::UniquePipelineLayout pipeline_layout;
vk::PipelineCache pipeline_cache; std::array<Shader::Info, MaxShaderStages> stages;
PipelineKey key; PipelineKey key;
}; };
} // namespace Vulkan } // namespace Vulkan
template <>
struct std::hash<Vulkan::PipelineKey> {
std::size_t operator()(const Vulkan::PipelineKey& key) const noexcept {
return XXH3_64bits(&key, sizeof(key));
}
};

View File

@ -271,11 +271,11 @@ void Instance::CollectDeviceParameters() {
const std::string api_version = GetReadableVersion(properties.apiVersion); const std::string api_version = GetReadableVersion(properties.apiVersion);
const std::string extensions = fmt::format("{}", fmt::join(available_extensions, ", ")); const std::string extensions = fmt::format("{}", fmt::join(available_extensions, ", "));
LOG_INFO(Render_Vulkan, "GPU_Vendor", vendor_name); LOG_INFO(Render_Vulkan, "GPU_Vendor: {}", vendor_name);
LOG_INFO(Render_Vulkan, "GPU_Model", model_name); LOG_INFO(Render_Vulkan, "GPU_Model: {}", model_name);
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Driver", driver_name); LOG_INFO(Render_Vulkan, "GPU_Vulkan_Driver: {}", driver_name);
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Version", api_version); LOG_INFO(Render_Vulkan, "GPU_Vulkan_Version: {}", api_version);
LOG_INFO(Render_Vulkan, "GPU_Vulkan_Extensions", extensions); LOG_INFO(Render_Vulkan, "GPU_Vulkan_Extensions: {}", extensions);
} }
void Instance::CollectToolingInfo() { void Instance::CollectToolingInfo() {

View File

@ -2,10 +2,10 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <fstream> #include <fstream>
#include "common/scope_exit.h"
#include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/recompiler.h" #include "shader_recompiler/recompiler.h"
#include "shader_recompiler/runtime_info.h" #include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
@ -14,8 +14,9 @@
namespace Vulkan { namespace Vulkan {
Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_data, Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_data,
AmdGpu::Liverpool::Regs& regs) { const AmdGpu::Liverpool::Regs& regs) {
Shader::Info info{user_data}; Shader::Info info{};
info.user_data = user_data;
info.stage = stage; info.stage = stage;
switch (stage) { switch (stage) {
case Shader::Stage::Fragment: { case Shader::Stage::Fragment: {
@ -39,66 +40,96 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
AmdGpu::Liverpool* liverpool_) AmdGpu::Liverpool* liverpool_)
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{8192}, : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{8192},
block_pool{512} { block_pool{512} {
const vk::PipelineLayoutCreateInfo layout_info = {
.setLayoutCount = 0U,
.pSetLayouts = nullptr,
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr,
};
pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
pipeline_cache = instance.GetDevice().createPipelineCacheUnique({}); pipeline_cache = instance.GetDevice().createPipelineCacheUnique({});
} }
void PipelineCache::BindPipeline() { const GraphicsPipeline* PipelineCache::GetPipeline() {
SCOPE_EXIT { RefreshKey();
const auto cmdbuf = scheduler.CommandBuffer(); const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key);
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); if (is_new) {
}; it.value() = CreatePipeline();
}
const GraphicsPipeline* pipeline = it->second.get();
return pipeline;
}
if (pipeline) { void PipelineCache::RefreshKey() {
return; auto& regs = liverpool->regs;
auto& key = graphics_key;
key.depth = regs.depth_control;
key.stencil = regs.stencil_control;
key.stencil_ref_front = regs.stencil_ref_front;
key.stencil_ref_back = regs.stencil_ref_back;
key.prim_type = regs.primitive_type;
key.polygon_mode = regs.polygon_control.PolyMode();
const auto& db = regs.depth_buffer;
key.depth_format = key.depth.depth_enable
? LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format)
: vk::Format::eUndefined;
for (u32 i = 0; i < Liverpool::NumColorBuffers; i++) {
const auto& cb = regs.color_buffers[i];
key.color_formats[i] = cb.base_address
? LiverpoolToVK::SurfaceFormat(cb.info.format, cb.NumFormat())
: vk::Format::eUndefined;
} }
const auto get_program = [&](const AmdGpu::Liverpool::ShaderProgram& pgm, Shader::Stage stage) { for (u32 i = 0; i < MaxShaderStages; i++) {
const u32* token = pgm.Address<u32>(); auto* pgm = regs.ProgramForStage(i);
if (!pgm || !pgm->Address<u32>()) {
key.stage_hashes[i] = 0;
continue;
}
const u32* code = pgm->Address<u32>();
// Retrieve shader header.
Shader::BinaryInfo bininfo; Shader::BinaryInfo bininfo;
std::memcpy(&bininfo, token + (token[1] + 1) * 2, sizeof(bininfo)); std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
key.stage_hashes[i] = bininfo.shader_hash;
}
}
std::unique_ptr<GraphicsPipeline> PipelineCache::CreatePipeline() {
const auto& regs = liverpool->regs;
std::array<Shader::IR::Program, MaxShaderStages> programs;
std::array<const Shader::Info*, MaxShaderStages> infos{};
for (u32 i = 0; i < MaxShaderStages; i++) {
if (!graphics_key.stage_hashes[i]) {
stages[i] = VK_NULL_HANDLE;
continue;
}
auto* pgm = regs.ProgramForStage(i);
const u32* code = pgm->Address<u32>();
Shader::BinaryInfo bininfo;
std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo));
const u32 num_dwords = bininfo.length / sizeof(u32);
// Lookup if the shader already exists.
const auto it = module_map.find(bininfo.shader_hash); const auto it = module_map.find(bininfo.shader_hash);
if (it != module_map.end()) { if (it != module_map.end()) {
return *it->second; stages[i] = *it->second;
continue;
} }
// Compile and cache shader.
const auto data = std::span{token, bininfo.length / sizeof(u32)};
block_pool.ReleaseContents(); block_pool.ReleaseContents();
inst_pool.ReleaseContents(); inst_pool.ReleaseContents();
const auto info = MakeShaderInfo(stage, pgm.user_data, liverpool->regs);
auto program = Shader::TranslateProgram(inst_pool, block_pool, data, std::move(info));
const auto code = Shader::Backend::SPIRV::EmitSPIRV(Shader::Profile{}, program);
static int counter = 0; // Recompile shader to IR.
std::ofstream file(fmt::format("shader{}.spv", counter++), std::ios::out | std::ios::binary); const auto stage = Shader::Stage{i};
file.write((const char*)code.data(), code.size() * sizeof(u32)); const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
file.close(); programs[i] = Shader::TranslateProgram(inst_pool, block_pool, std::span{code, num_dwords},
std::move(info));
return CompileSPV(code, instance.GetDevice()); // Compile IR to SPIR-V
}; const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(Shader::Profile{}, programs[i]);
stages[i] = CompileSPV(spv_code, instance.GetDevice());
infos[i] = &programs[i].info;
}
// Retrieve shader stage modules. return std::make_unique<GraphicsPipeline>(instance, scheduler, graphics_key, *pipeline_cache,
// TODO: Only do this when program address is changed. infos, stages);
stages[0] = get_program(liverpool->regs.vs_program, Shader::Stage::Vertex);
stages[4] = get_program(liverpool->regs.ps_program, Shader::Stage::Fragment);
// Bind pipeline.
// TODO: Read entire key based on reg state.
graphics_key.prim_type = liverpool->regs.primitive_type;
graphics_key.polygon_mode = liverpool->regs.polygon_control.PolyMode();
pipeline = std::make_unique<GraphicsPipeline>(instance, graphics_key, *pipeline_cache,
*pipeline_layout, stages);
} }
} // namespace Vulkan } // namespace Vulkan

View File

@ -8,6 +8,10 @@
#include "shader_recompiler/object_pool.h" #include "shader_recompiler/object_pool.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
namespace Shader {
struct Info;
}
namespace Vulkan { namespace Vulkan {
class Instance; class Instance;
@ -21,7 +25,12 @@ public:
AmdGpu::Liverpool* liverpool); AmdGpu::Liverpool* liverpool);
~PipelineCache() = default; ~PipelineCache() = default;
void BindPipeline(); const GraphicsPipeline* GetPipeline();
private:
void RefreshKey();
std::unique_ptr<GraphicsPipeline> CreatePipeline();
private: private:
const Instance& instance; const Instance& instance;
@ -31,7 +40,7 @@ private:
vk::UniquePipelineLayout pipeline_layout; vk::UniquePipelineLayout pipeline_layout;
tsl::robin_map<size_t, vk::UniqueShaderModule> module_map; tsl::robin_map<size_t, vk::UniqueShaderModule> module_map;
std::array<vk::ShaderModule, MaxShaderStages> stages{}; std::array<vk::ShaderModule, MaxShaderStages> stages{};
std::unique_ptr<GraphicsPipeline> pipeline; tsl::robin_map<PipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_pipelines;
PipelineKey graphics_key{}; PipelineKey graphics_key{};
Shader::ObjectPool<Shader::IR::Inst> inst_pool; Shader::ObjectPool<Shader::IR::Inst> inst_pool;
Shader::ObjectPool<Shader::IR::Block> block_pool; Shader::ObjectPool<Shader::IR::Block> block_pool;

View File

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "common/config.h" #include "common/config.h"
#include "core/memory.h"
#include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/liverpool.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_rasterizer.h"
@ -18,24 +19,25 @@ static constexpr vk::BufferUsageFlags VertexIndexFlags = vk::BufferUsageFlagBits
Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_, Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
VideoCore::TextureCache& texture_cache_, AmdGpu::Liverpool* liverpool_) VideoCore::TextureCache& texture_cache_, AmdGpu::Liverpool* liverpool_)
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_}, : instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
liverpool{liverpool_}, pipeline_cache{instance, scheduler, liverpool}, liverpool{liverpool_}, memory{Core::Memory::Instance()},
pipeline_cache{instance, scheduler, liverpool},
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 64_MB} { vertex_index_buffer{instance, scheduler, VertexIndexFlags, 64_MB} {
if (!Config::nullGpu()) { if (!Config::nullGpu()) {
liverpool->BindRasterizer(this); liverpool->BindRasterizer(this);
} }
memory->SetInstance(&instance);
} }
Rasterizer::~Rasterizer() = default; Rasterizer::~Rasterizer() = default;
void Rasterizer::DrawIndex() { void Rasterizer::Draw(bool is_indexed) {
const auto cmdbuf = scheduler.CommandBuffer(); const auto cmdbuf = scheduler.CommandBuffer();
auto& regs = liverpool->regs; const auto& regs = liverpool->regs;
const u32 num_indices = SetupIndexBuffer(is_indexed);
UpdateDynamicState(); const auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0]);
const GraphicsPipeline* pipeline = pipeline_cache.GetPipeline();
pipeline_cache.BindPipeline(); pipeline->BindResources(memory);
auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0]);
const vk::RenderingAttachmentInfo color_info = { const vk::RenderingAttachmentInfo color_info = {
.imageView = *image_view.image_view, .imageView = *image_view.image_view,
@ -52,13 +54,50 @@ void Rasterizer::DrawIndex() {
.pColorAttachments = &color_info, .pColorAttachments = &color_info,
}; };
UpdateDynamicState();
cmdbuf.beginRendering(rendering_info); cmdbuf.beginRendering(rendering_info);
cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), 0, vk::IndexType::eUint32); cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
cmdbuf.bindVertexBuffers(0, vertex_index_buffer.Handle(), vk::DeviceSize(0)); if (is_indexed) {
cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), 0, 0); cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
} else {
cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), 0, 0);
}
cmdbuf.endRendering(); cmdbuf.endRendering();
} }
u32 Rasterizer::SetupIndexBuffer(bool& is_indexed) {
// Emulate QuadList primitive type with CPU made index buffer.
const auto& regs = liverpool->regs;
if (liverpool->regs.primitive_type == Liverpool::PrimitiveType::QuadList) {
ASSERT_MSG(!is_indexed, "Using QuadList primitive with indexed draw");
is_indexed = true;
// Emit indices.
const u32 index_size = 3 * regs.num_indices;
const auto [data, offset, _] = vertex_index_buffer.Map(index_size);
LiverpoolToVK::EmitQuadToTriangleListIndices(data, regs.num_indices);
vertex_index_buffer.Commit(index_size);
// Bind index buffer.
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), offset, vk::IndexType::eUint16);
return index_size / sizeof(u16);
}
if (!is_indexed) {
return 0;
}
const VAddr index_address = regs.index_base_address.Address();
const auto [buffer, offset] = memory->GetVulkanBuffer(index_address);
const vk::IndexType index_type =
regs.index_buffer_type.index_type == Liverpool::IndexType::Index16 ? vk::IndexType::eUint16
: vk::IndexType::eUint32;
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindIndexBuffer(buffer, offset, index_type);
return regs.num_indices;
}
void Rasterizer::UpdateDynamicState() { void Rasterizer::UpdateDynamicState() {
UpdateViewportScissorState(); UpdateViewportScissorState();
} }

View File

@ -3,7 +3,6 @@
#pragma once #pragma once
#include <memory>
#include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h"
@ -11,6 +10,10 @@ namespace AmdGpu {
struct Liverpool; struct Liverpool;
} }
namespace Core {
class MemoryManager;
}
namespace VideoCore { namespace VideoCore {
class TextureCache; class TextureCache;
} }
@ -26,20 +29,14 @@ public:
VideoCore::TextureCache& texture_cache, AmdGpu::Liverpool* liverpool); VideoCore::TextureCache& texture_cache, AmdGpu::Liverpool* liverpool);
~Rasterizer(); ~Rasterizer();
/// Performs a draw call with an index buffer. void Draw(bool is_indexed);
void DrawIndex();
/// Performs a draw call without an index buffer.
void DrawAuto();
/// Updates graphics state that is not part of the bound pipeline.
void UpdateDynamicState();
private: private:
/// Updates viewport and scissor from liverpool registers. u32 SetupIndexBuffer(bool& is_indexed);
void UpdateViewportScissorState(); void MapMemory(VAddr addr, size_t size);
/// Updates depth and stencil pipeline state from liverpool registers. void UpdateDynamicState();
void UpdateViewportScissorState();
void UpdateDepthStencilState(); void UpdateDepthStencilState();
private: private:
@ -47,6 +44,7 @@ private:
Scheduler& scheduler; Scheduler& scheduler;
VideoCore::TextureCache& texture_cache; VideoCore::TextureCache& texture_cache;
AmdGpu::Liverpool* liverpool; AmdGpu::Liverpool* liverpool;
Core::MemoryManager* memory;
PipelineCache pipeline_cache; PipelineCache pipeline_cache;
StreamBuffer vertex_index_buffer; StreamBuffer vertex_index_buffer;
}; };

View File

@ -35,7 +35,7 @@ public:
* @param size Size to reserve. * @param size Size to reserve.
* @returns A pair of a raw memory pointer (with offset added), and the buffer offset * @returns A pair of a raw memory pointer (with offset added), and the buffer offset
*/ */
std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment); std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment = 0);
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
void Commit(u64 size); void Commit(u64 size);

View File

@ -67,12 +67,8 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noe
} }
ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept { ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept {
// There is a small difference between T# and CB number types, account for it.
const auto number_fmt =
buffer.info.number_type == AmdGpu::NumberFormat::Uscaled ? AmdGpu::NumberFormat::Srgb
: buffer.info.number_type;
is_tiled = true; is_tiled = true;
pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, number_fmt); pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat());
type = vk::ImageType::e2D; type = vk::ImageType::e2D;
size.width = buffer.Pitch(); size.width = buffer.Pitch();
size.height = buffer.Height(); size.height = buffer.Height();

View File

@ -147,7 +147,8 @@ ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buff
return slot_image_views[view_id]; return slot_image_views[view_id];
} }
const ImageViewId view_id = slot_image_views.insert(instance, scheduler, view_info, image.image); const ImageViewId view_id =
slot_image_views.insert(instance, scheduler, view_info, image.image);
image.image_view_infos.emplace_back(view_info); image.image_view_infos.emplace_back(view_info);
image.image_view_ids.emplace_back(view_id); image.image_view_ids.emplace_back(view_id);
return slot_image_views[view_id]; return slot_image_views[view_id];