mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-27 12:34:37 +00:00
Base DMA implementation
This commit is contained in:
parent
94a078207f
commit
c077fb97da
@ -164,7 +164,7 @@ using BufferAlias = EmitContext::BufferAlias;
|
||||
|
||||
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst) {
|
||||
const u32 flatbuf_off_dw = inst->Flags<u32>();
|
||||
const auto& srt_flatbuf = ctx.buffers.back();
|
||||
const auto& srt_flatbuf = ctx.buffers[ctx.flatbuf_index];
|
||||
ASSERT(srt_flatbuf.binding >= 0 && flatbuf_off_dw > 0 &&
|
||||
srt_flatbuf.buffer_type == BufferType::ReadConstUbo);
|
||||
const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32];
|
||||
|
@ -195,7 +195,8 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
|
||||
}
|
||||
|
||||
Id EmitContext::GetBufferSize(const u32 sharp_idx) {
|
||||
const auto& srt_flatbuf = buffers.back();
|
||||
// Can this be done with memory access? Like we do now with ReadConst
|
||||
const auto& srt_flatbuf = buffers[flatbuf_index];
|
||||
ASSERT(srt_flatbuf.buffer_type == BufferType::ReadConstUbo);
|
||||
const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32];
|
||||
|
||||
@ -693,6 +694,12 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte
|
||||
case Shader::BufferType::ReadConstUbo:
|
||||
Name(id, "srt_flatbuf_ubo");
|
||||
break;
|
||||
case Shader::BufferType::BdaPagetable:
|
||||
Name(id, "bda_pagetable");
|
||||
break;
|
||||
case Shader::BufferType::FaultReadback:
|
||||
Name(id, "fault_readback");
|
||||
break;
|
||||
case Shader::BufferType::SharedMemory:
|
||||
Name(id, "ssbo_shmem");
|
||||
break;
|
||||
@ -718,6 +725,15 @@ void EmitContext::DefineBuffers() {
|
||||
const auto buf_sharp = desc.GetSharp(info);
|
||||
const bool is_storage = desc.IsStorage(buf_sharp, profile);
|
||||
|
||||
// Set indexes for special buffers.
|
||||
if (desc.buffer_type == BufferType::ReadConstUbo) {
|
||||
flatbuf_index = buffers.size();
|
||||
} else if (desc.buffer_type == BufferType::BdaPagetable) {
|
||||
bda_pagetable_index = buffers.size();
|
||||
} else if (desc.buffer_type == BufferType::FaultReadback) {
|
||||
fault_readback_index = buffers.size();
|
||||
}
|
||||
|
||||
// Define aliases depending on the shader usage.
|
||||
auto& spv_buffer = buffers.emplace_back(binding.buffer++, desc.buffer_type);
|
||||
if (True(desc.used_types & IR::Type::U32)) {
|
||||
|
@ -269,6 +269,10 @@ public:
|
||||
boost::container::small_vector<TextureDefinition, 8> images;
|
||||
boost::container::small_vector<Id, 4> samplers;
|
||||
|
||||
size_t flatbuf_index{};
|
||||
size_t bda_pagetable_index{};
|
||||
size_t fault_readback_index{};
|
||||
|
||||
Id sampler_type{};
|
||||
Id sampler_pointer_type{};
|
||||
|
||||
|
@ -39,21 +39,22 @@ void Translator::EmitScalarMemory(const GcnInst& inst) {
|
||||
|
||||
void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
|
||||
const auto& smrd = inst.control.smrd;
|
||||
const u32 dword_offset = [&] -> u32 {
|
||||
const IR::ScalarReg sbase{inst.src[0].code * 2};
|
||||
const IR::U32 dword_offset = [&] -> IR::U32 {
|
||||
if (smrd.imm) {
|
||||
return smrd.offset;
|
||||
return ir.Imm32(smrd.offset);
|
||||
}
|
||||
if (smrd.offset == SQ_SRC_LITERAL) {
|
||||
return inst.src[1].code;
|
||||
return ir.Imm32(inst.src[1].code);
|
||||
}
|
||||
UNREACHABLE();
|
||||
return ir.ShiftRightLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)), ir.Imm32(2));
|
||||
}();
|
||||
const IR::ScalarReg sbase{inst.src[0].code * 2};
|
||||
const IR::Value base =
|
||||
ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1));
|
||||
IR::ScalarReg dst_reg{inst.dst[0].code};
|
||||
for (u32 i = 0; i < num_dwords; i++) {
|
||||
ir.SetScalarReg(dst_reg++, ir.ReadConst(base, ir.Imm32(dword_offset + i)));
|
||||
IR::U32 index = ir.IAdd(dword_offset, ir.Imm32(i));
|
||||
ir.SetScalarReg(dst_reg++, ir.ReadConst(base, index));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -42,6 +42,8 @@ constexpr u32 NUM_TEXTURE_TYPES = 7;
|
||||
enum class BufferType : u32 {
|
||||
Guest,
|
||||
ReadConstUbo,
|
||||
BdaPagetable,
|
||||
FaultReadback,
|
||||
GdsBuffer,
|
||||
SharedMemory,
|
||||
};
|
||||
@ -209,6 +211,7 @@ struct Info {
|
||||
bool stores_tess_level_inner{};
|
||||
bool translation_failed{};
|
||||
bool has_readconst{};
|
||||
bool uses_dma{};
|
||||
u8 mrt_mask{0u};
|
||||
bool has_fetch_shader{false};
|
||||
u32 fetch_shader_sgpr_base{0u};
|
||||
|
@ -87,6 +87,21 @@ void Visit(Info& info, const IR::Inst& inst) {
|
||||
});
|
||||
info.has_readconst = true;
|
||||
}
|
||||
if (!info.uses_dma) {
|
||||
// For now, we only need U32, but we may need
|
||||
// to add more types in the future for other porposes.
|
||||
info.buffers.push_back({
|
||||
.used_types = IR::Type::U32,
|
||||
.inline_cbuf = AmdGpu::Buffer::Null(),
|
||||
.buffer_type = BufferType::BdaPagetable,
|
||||
});
|
||||
info.buffers.push_back({
|
||||
.used_types = IR::Type::U32,
|
||||
.inline_cbuf = AmdGpu::Buffer::Null(),
|
||||
.buffer_type = BufferType::FaultReadback,
|
||||
});
|
||||
info.uses_dma = true;
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::PackUfloat10_11_11:
|
||||
info.uses_pack_10_11_11 = true;
|
||||
|
@ -379,10 +379,7 @@ void BufferCache::ImportMemory(u64 start, u64 end) {
|
||||
}
|
||||
WriteDataBuffer(bda_pagetable_buffer, range_start * sizeof(vk::DeviceAddress), bda_addrs.data(),
|
||||
bda_addrs.size() * sizeof(vk::DeviceAddress));
|
||||
{
|
||||
std::scoped_lock lk{mutex};
|
||||
imported_buffers.emplace_back(std::move(buffer));
|
||||
}
|
||||
imported_buffers.emplace_back(std::move(buffer));
|
||||
// Mark the pages as covered
|
||||
imported_regions += range;
|
||||
}
|
||||
@ -850,7 +847,7 @@ void BufferCache::WriteDataBuffer(Buffer& buffer, VAddr address, const void* val
|
||||
vk::Buffer src_buffer = staging_buffer.Handle();
|
||||
if (num_bytes < StagingBufferSize) {
|
||||
const auto [staging, offset] = staging_buffer.Map(num_bytes);
|
||||
std::memcpy(staging + offset, value, num_bytes);
|
||||
std::memcpy(staging, value, num_bytes);
|
||||
copy.srcOffset = offset;
|
||||
staging_buffer.Commit();
|
||||
} else {
|
||||
|
@ -79,13 +79,13 @@ public:
|
||||
}
|
||||
|
||||
/// Retrieves the device local DBA page table buffer.
|
||||
[[nodiscard]] Buffer& GetBdaPageTableBuffer() noexcept {
|
||||
return bda_pagetable_buffer;
|
||||
[[nodiscard]] Buffer* GetBdaPageTableBuffer() noexcept {
|
||||
return &bda_pagetable_buffer;
|
||||
}
|
||||
|
||||
/// Retrieves the fault readback buffer.
|
||||
[[nodiscard]] Buffer& GetFaultReadbackBuffer() noexcept {
|
||||
return fault_readback_buffer;
|
||||
[[nodiscard]] Buffer* GetFaultReadbackBuffer() noexcept {
|
||||
return &fault_readback_buffer;
|
||||
}
|
||||
|
||||
/// Retrieves the buffer with the specified id.
|
||||
|
@ -459,9 +459,24 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
|
||||
stage->PushUd(binding, push_data);
|
||||
BindBuffers(*stage, binding, push_data);
|
||||
BindTextures(*stage, binding);
|
||||
|
||||
dma_enabled |= stage->uses_dma;
|
||||
}
|
||||
|
||||
pipeline->BindResources(set_writes, buffer_barriers, push_data);
|
||||
|
||||
if (dma_enabled) {
|
||||
// First, import any queued host memory, then sync every mapped
|
||||
// region that is cached on GPU memory.
|
||||
buffer_cache.ImportQueuedRegions();
|
||||
{
|
||||
std::shared_lock lock(mapped_ranges_mutex);
|
||||
for (const auto& range : mapped_ranges) {
|
||||
buffer_cache.SynchronizeRange(range.lower(), range.upper() - range.lower());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -526,6 +541,12 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
|
||||
const u64 offset = vk_buffer.Copy(stage.flattened_ud_buf.data(), ubo_size,
|
||||
instance.UniformMinAlignment());
|
||||
buffer_infos.emplace_back(vk_buffer.Handle(), offset, ubo_size);
|
||||
} else if (desc.buffer_type == Shader::BufferType::BdaPagetable) {
|
||||
const auto* bda_buffer = buffer_cache.GetBdaPageTableBuffer();
|
||||
buffer_infos.emplace_back(bda_buffer->Handle(), 0, bda_buffer->SizeBytes());
|
||||
} else if (desc.buffer_type == Shader::BufferType::FaultReadback) {
|
||||
const auto* fault_buffer = buffer_cache.GetFaultReadbackBuffer();
|
||||
buffer_infos.emplace_back(fault_buffer->Handle(), 0, fault_buffer->SizeBytes());
|
||||
} else if (desc.buffer_type == Shader::BufferType::SharedMemory) {
|
||||
auto& lds_buffer = buffer_cache.GetStreamBuffer();
|
||||
const auto& cs_program = liverpool->GetCsRegs();
|
||||
|
@ -95,6 +95,13 @@ private:
|
||||
texture_cache.GetImage(image_id).binding.Reset();
|
||||
}
|
||||
bound_images.clear();
|
||||
|
||||
if (dma_enabled) {
|
||||
dma_enabled = false;
|
||||
// If a shader accesses a buffer that is not cached, we need to
|
||||
// cache it.
|
||||
buffer_cache.CreateFaultBuffers();
|
||||
}
|
||||
}
|
||||
|
||||
bool IsComputeMetaClear(const Pipeline* pipeline);
|
||||
@ -126,6 +133,7 @@ private:
|
||||
boost::container::static_vector<BufferBindingInfo, Shader::NumBuffers> buffer_bindings;
|
||||
using ImageBindingInfo = std::pair<VideoCore::ImageId, VideoCore::TextureCache::TextureDesc>;
|
||||
boost::container::static_vector<ImageBindingInfo, Shader::NumImages> image_bindings;
|
||||
bool dma_enabled{false};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
Loading…
Reference in New Issue
Block a user