mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-28 04:54:30 +00:00
64K pages and fix memory mapping
This commit is contained in:
parent
52253b45fb
commit
d5e45fb492
@ -180,6 +180,9 @@ ImportedHostBuffer::ImportedHostBuffer(const Vulkan::Instance& instance_,
|
|||||||
auto device_memory_result = instance->GetDevice().allocateMemory(alloc_ci);
|
auto device_memory_result = instance->GetDevice().allocateMemory(alloc_ci);
|
||||||
if (device_memory_result.result != vk::Result::eSuccess) {
|
if (device_memory_result.result != vk::Result::eSuccess) {
|
||||||
// May fail to import the host memory if it is backed by a file. (AMD on Linux)
|
// May fail to import the host memory if it is backed by a file. (AMD on Linux)
|
||||||
|
LOG_WARNING(Render_Vulkan,
|
||||||
|
"Failed to import host memory at {} size {:#x}, Reason: {}",
|
||||||
|
cpu_addr, size_bytes, vk::to_string(device_memory_result.result));
|
||||||
instance->GetDevice().destroyBuffer(buffer);
|
instance->GetDevice().destroyBuffer(buffer);
|
||||||
has_failed = true;
|
has_failed = true;
|
||||||
return;
|
return;
|
||||||
|
@ -19,9 +19,9 @@ static constexpr size_t StagingBufferSize = 512_MB;
|
|||||||
static constexpr size_t UboStreamBufferSize = 128_MB;
|
static constexpr size_t UboStreamBufferSize = 128_MB;
|
||||||
|
|
||||||
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||||
AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_,
|
Vulkan::Rasterizer& rasterizer_, AmdGpu::Liverpool* liverpool_,
|
||||||
PageManager& tracker_)
|
TextureCache& texture_cache_, PageManager& tracker_)
|
||||||
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_},
|
: instance{instance_}, scheduler{scheduler_}, rasterizer{rasterizer_}, liverpool{liverpool_},
|
||||||
texture_cache{texture_cache_}, tracker{tracker_},
|
texture_cache{texture_cache_}, tracker{tracker_},
|
||||||
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
|
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
|
||||||
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
|
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
|
||||||
@ -324,31 +324,38 @@ BufferId BufferCache::FindBuffer(VAddr device_addr, u32 size) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void BufferCache::MapMemory(VAddr device_addr, u64 size) {
|
void BufferCache::MapMemory(VAddr device_addr, u64 size) {
|
||||||
const u64 page_start = device_addr >> BDA_PAGEBITS;
|
const u64 page_start = device_addr >> CACHING_PAGEBITS;
|
||||||
const u64 page_end = Common::DivCeil(device_addr + size, BDA_PAGESIZE);
|
const u64 page_end = Common::DivCeil(device_addr + size, CACHING_PAGESIZE);
|
||||||
|
auto interval = decltype(covered_regions)::interval_type::right_open(page_start, page_end);
|
||||||
|
auto interval_set = boost::icl::interval_set<u64>{interval};
|
||||||
|
auto uncovered_ranges = interval_set - covered_regions;
|
||||||
|
if (uncovered_ranges.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
// We fill any holes within the given range
|
// We fill any holes within the given range
|
||||||
boost::container::small_vector<u64, 1024> bda_addrs;
|
boost::container::small_vector<u64, 1024> bda_addrs;
|
||||||
bool importing_failed = false;
|
for (const auto& range : uncovered_ranges) {
|
||||||
u64 range_start = page_start;
|
// import host memory
|
||||||
u64 range_end = page_start;
|
const u64 range_start = range.lower();
|
||||||
const auto import_range = [&]() {
|
const u64 range_end = range.upper();
|
||||||
// Import the host memory
|
void* cpu_addr = reinterpret_cast<void*>(range_start << CACHING_PAGEBITS);
|
||||||
void* cpu_addr = reinterpret_cast<void*>(range_start << BDA_PAGEBITS);
|
const u64 range_size = (range_end - range_start) << CACHING_PAGEBITS;
|
||||||
const u64 range_size = (range_end - range_start) << BDA_PAGEBITS;
|
ImportedHostBuffer buffer(instance, scheduler, cpu_addr, range_size,
|
||||||
ImportedHostBuffer buffer(instance, scheduler, cpu_addr, range_size, vk::BufferUsageFlagBits::eShaderDeviceAddress | vk::BufferUsageFlagBits::eStorageBuffer);
|
vk::BufferUsageFlagBits::eShaderDeviceAddress |
|
||||||
|
vk::BufferUsageFlagBits::eStorageBuffer);
|
||||||
if (buffer.HasFailed()) {
|
if (buffer.HasFailed()) {
|
||||||
importing_failed = true;
|
continue;
|
||||||
}
|
}
|
||||||
// Update BDA page table
|
// Update BDA page table
|
||||||
u64 bda_addr = buffer.BufferDeviceAddress();
|
const u64 bda_addr = buffer.BufferDeviceAddress();
|
||||||
u64 range = range_end - range_start;
|
const u64 range_pages = range_end - range_start;
|
||||||
bda_addrs.clear();
|
bda_addrs.clear();
|
||||||
bda_addrs.reserve(range);
|
bda_addrs.reserve(range_pages);
|
||||||
for (u64 i = 0; i < range; ++i) {
|
for (u64 i = 0; i < range_pages; ++i) {
|
||||||
// TODO: we may want to mark the page as host imported
|
// TODO: we may want to mark the page as host imported
|
||||||
// to let the shader know so that it can notify us if it
|
// to let the shader know so that it can notify us if it
|
||||||
// accesses the page, so we can create a GPU local buffer.
|
// accesses the page, so we can create a GPU local buffer.
|
||||||
bda_addrs.push_back(bda_addr + (i << BDA_PAGEBITS));
|
bda_addrs.push_back((bda_addr + (i << CACHING_PAGEBITS)) | 0x1);
|
||||||
}
|
}
|
||||||
WriteDataBuffer(bda_pagetable_buffer, range_start * sizeof(u64), bda_addrs.data(),
|
WriteDataBuffer(bda_pagetable_buffer, range_start * sizeof(u64), bda_addrs.data(),
|
||||||
bda_addrs.size() * sizeof(u64));
|
bda_addrs.size() * sizeof(u64));
|
||||||
@ -356,34 +363,9 @@ void BufferCache::MapMemory(VAddr device_addr, u64 size) {
|
|||||||
std::scoped_lock lk{mutex};
|
std::scoped_lock lk{mutex};
|
||||||
imported_buffers.emplace_back(std::move(buffer));
|
imported_buffers.emplace_back(std::move(buffer));
|
||||||
}
|
}
|
||||||
};
|
// Mark the pages as covered
|
||||||
for (; range_end < page_end; ++range_end) {
|
covered_regions += range;
|
||||||
if (!bda_mapped_pages.test(range_end)) {
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
if (range_start != range_end) {
|
|
||||||
import_range();
|
|
||||||
if (importing_failed) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
range_start = range_end + 1;
|
|
||||||
}
|
|
||||||
if (!importing_failed && range_start != range_end) {
|
|
||||||
import_range();
|
|
||||||
}
|
|
||||||
// Mark the pages as mapped
|
|
||||||
for (u64 page = page_start; page < page_end; ++page) {
|
|
||||||
bda_mapped_pages.set(page);
|
|
||||||
}
|
|
||||||
if (!importing_failed) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
// If we failed to import the memory, fall back to copying the whole map
|
|
||||||
// to GPU memory.
|
|
||||||
LOG_INFO(Render_Vulkan, "Failed to import host memory at {:#x} size {:#x}, falling back to copying",
|
|
||||||
device_addr, size);
|
|
||||||
CreateBuffer(device_addr, size);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
BufferCache::OverlapResult BufferCache::ResolveOverlaps(VAddr device_addr, u32 wanted_size) {
|
BufferCache::OverlapResult BufferCache::ResolveOverlaps(VAddr device_addr, u32 wanted_size) {
|
||||||
|
@ -39,14 +39,11 @@ class TextureCache;
|
|||||||
|
|
||||||
class BufferCache {
|
class BufferCache {
|
||||||
public:
|
public:
|
||||||
static constexpr u32 CACHING_PAGEBITS = 12;
|
static constexpr u32 CACHING_PAGEBITS = 16;
|
||||||
static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS;
|
static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS;
|
||||||
static constexpr u64 DEVICE_PAGESIZE = 4_KB;
|
static constexpr u64 DEVICE_PAGESIZE = 64_KB;
|
||||||
|
static constexpr u64 CACHING_NUMPAGES = u64{1} << (40 - CACHING_PAGEBITS);
|
||||||
static constexpr u64 BDA_PAGEBITS = 16;
|
static constexpr u64 BDA_PAGETABLE_SIZE = CACHING_NUMPAGES * sizeof(u64);
|
||||||
static constexpr u64 BDA_PAGESIZE = u64{1} << BDA_PAGEBITS;
|
|
||||||
static constexpr u64 BDA_NUMPAGES = (u64{1} << (u64(40) - BDA_PAGEBITS));
|
|
||||||
static constexpr u64 BDA_PAGETABLE_SIZE = BDA_NUMPAGES * sizeof(u64);
|
|
||||||
|
|
||||||
struct Traits {
|
struct Traits {
|
||||||
using Entry = BufferId;
|
using Entry = BufferId;
|
||||||
@ -65,8 +62,8 @@ public:
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
|
explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
|
||||||
AmdGpu::Liverpool* liverpool, TextureCache& texture_cache,
|
Vulkan::Rasterizer& rasterizer_, AmdGpu::Liverpool* liverpool,
|
||||||
PageManager& tracker);
|
TextureCache& texture_cache, PageManager& tracker);
|
||||||
~BufferCache();
|
~BufferCache();
|
||||||
|
|
||||||
/// Returns a pointer to GDS device local buffer.
|
/// Returns a pointer to GDS device local buffer.
|
||||||
@ -124,8 +121,6 @@ public:
|
|||||||
|
|
||||||
[[nodiscard]] BufferId FindBuffer(VAddr device_addr, u32 size);
|
[[nodiscard]] BufferId FindBuffer(VAddr device_addr, u32 size);
|
||||||
|
|
||||||
void MapMemory(VAddr device_addr, u64 size);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
template <typename Func>
|
template <typename Func>
|
||||||
void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) {
|
void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) {
|
||||||
@ -169,8 +164,11 @@ private:
|
|||||||
|
|
||||||
void DeleteBuffer(BufferId buffer_id);
|
void DeleteBuffer(BufferId buffer_id);
|
||||||
|
|
||||||
|
void MapMemory(VAddr device_addr, u64 size);
|
||||||
|
|
||||||
const Vulkan::Instance& instance;
|
const Vulkan::Instance& instance;
|
||||||
Vulkan::Scheduler& scheduler;
|
Vulkan::Scheduler& scheduler;
|
||||||
|
Vulkan::Rasterizer& rasterizer;
|
||||||
AmdGpu::Liverpool* liverpool;
|
AmdGpu::Liverpool* liverpool;
|
||||||
TextureCache& texture_cache;
|
TextureCache& texture_cache;
|
||||||
PageManager& tracker;
|
PageManager& tracker;
|
||||||
@ -178,7 +176,7 @@ private:
|
|||||||
StreamBuffer stream_buffer;
|
StreamBuffer stream_buffer;
|
||||||
Buffer gds_buffer;
|
Buffer gds_buffer;
|
||||||
Buffer bda_pagetable_buffer;
|
Buffer bda_pagetable_buffer;
|
||||||
std::bitset<BDA_NUMPAGES> bda_mapped_pages;
|
boost::icl::interval_set<u64> covered_regions;
|
||||||
std::vector<ImportedHostBuffer> imported_buffers;
|
std::vector<ImportedHostBuffer> imported_buffers;
|
||||||
std::shared_mutex mutex;
|
std::shared_mutex mutex;
|
||||||
Common::SlotVector<Buffer> slot_buffers;
|
Common::SlotVector<Buffer> slot_buffers;
|
||||||
|
@ -36,7 +36,7 @@ static Shader::PushData MakeUserData(const AmdGpu::Liverpool::Regs& regs) {
|
|||||||
Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
||||||
AmdGpu::Liverpool* liverpool_)
|
AmdGpu::Liverpool* liverpool_)
|
||||||
: instance{instance_}, scheduler{scheduler_}, page_manager{this},
|
: instance{instance_}, scheduler{scheduler_}, page_manager{this},
|
||||||
buffer_cache{instance, scheduler, liverpool_, texture_cache, page_manager},
|
buffer_cache{instance, scheduler, *this, liverpool_, texture_cache, page_manager},
|
||||||
texture_cache{instance, scheduler, buffer_cache, page_manager}, liverpool{liverpool_},
|
texture_cache{instance, scheduler, buffer_cache, page_manager}, liverpool{liverpool_},
|
||||||
memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool} {
|
memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool} {
|
||||||
if (!Config::nullGpu()) {
|
if (!Config::nullGpu()) {
|
||||||
@ -946,7 +946,6 @@ void Rasterizer::MapMemory(VAddr addr, u64 size) {
|
|||||||
mapped_ranges += decltype(mapped_ranges)::interval_type::right_open(addr, addr + size);
|
mapped_ranges += decltype(mapped_ranges)::interval_type::right_open(addr, addr + size);
|
||||||
}
|
}
|
||||||
page_manager.OnGpuMap(addr, size);
|
page_manager.OnGpuMap(addr, size);
|
||||||
buffer_cache.MapMemory(addr, size);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Rasterizer::UnmapMemory(VAddr addr, u64 size) {
|
void Rasterizer::UnmapMemory(VAddr addr, u64 size) {
|
||||||
|
Loading…
Reference in New Issue
Block a user