mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-08-04 16:32:39 +00:00
shader_recompiler: Complete buffer aliasing support
* Add a bunch more types into buffers, such as F32 for float reads/writes and 8/16 bit integer types for formatted buffers
This commit is contained in:
parent
7423ccfe88
commit
b0dd81a2b9
@ -242,14 +242,17 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
|
||||
ctx.AddCapability(spv::Capability::Image1D);
|
||||
ctx.AddCapability(spv::Capability::Sampled1D);
|
||||
ctx.AddCapability(spv::Capability::ImageQuery);
|
||||
ctx.AddCapability(spv::Capability::Int8);
|
||||
ctx.AddCapability(spv::Capability::Int16);
|
||||
ctx.AddCapability(spv::Capability::Int64);
|
||||
ctx.AddCapability(spv::Capability::UniformAndStorageBuffer8BitAccess);
|
||||
ctx.AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess);
|
||||
if (info.uses_fp16) {
|
||||
ctx.AddCapability(spv::Capability::Float16);
|
||||
ctx.AddCapability(spv::Capability::Int16);
|
||||
}
|
||||
if (info.uses_fp64) {
|
||||
ctx.AddCapability(spv::Capability::Float64);
|
||||
}
|
||||
ctx.AddCapability(spv::Capability::Int64);
|
||||
if (info.has_storage_images) {
|
||||
ctx.AddCapability(spv::Capability::StorageImageExtendedFormats);
|
||||
ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat);
|
||||
|
@ -398,83 +398,96 @@ void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
|
||||
ctx.OpStore(pointer, value);
|
||||
}
|
||||
|
||||
template <u32 N>
|
||||
static Id EmitLoadBufferU32xN(EmitContext& ctx, u32 handle, Id address) {
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||
const auto [id, pointer_type] = buffer[BufferAlias::U32];
|
||||
template <u32 N, BufferAlias alias>
|
||||
static Id EmitLoadBufferB32xN(EmitContext& ctx, u32 handle, Id address) {
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
}
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||
const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32;
|
||||
const auto [id, pointer_type] = spv_buffer[alias];
|
||||
if constexpr (N == 1) {
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
|
||||
return ctx.OpLoad(ctx.U32[1], ptr);
|
||||
return ctx.OpLoad(data_types[1], ptr);
|
||||
} else {
|
||||
boost::container::static_vector<Id, N> ids;
|
||||
for (u32 i = 0; i < N; i++) {
|
||||
const Id index_i = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i));
|
||||
const Id ptr{
|
||||
ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i)};
|
||||
ids.push_back(ctx.OpLoad(ctx.U32[1], ptr));
|
||||
ids.push_back(ctx.OpLoad(data_types[1], ptr));
|
||||
}
|
||||
return ctx.OpCompositeConstruct(ctx.U32[N], ids);
|
||||
return ctx.OpCompositeConstruct(data_types[N], ids);
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
||||
const Id byte_index{ctx.OpBitwiseAnd(ctx.U32[1], address, ctx.ConstU32(3u))};
|
||||
const Id bit_offset{ctx.OpShiftLeftLogical(ctx.U32[1], byte_index, ctx.ConstU32(3u))};
|
||||
const Id dword{EmitLoadBufferU32xN<1>(ctx, handle, address)};
|
||||
return ctx.OpBitFieldUExtract(ctx.U32[1], dword, bit_offset, ctx.ConstU32(8u));
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
}
|
||||
const auto [id, pointer_type] = spv_buffer[BufferAlias::U8];
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
|
||||
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, ptr));
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
||||
const Id byte_index{ctx.OpBitwiseAnd(ctx.U32[1], address, ctx.ConstU32(2u))};
|
||||
const Id bit_offset{ctx.OpShiftLeftLogical(ctx.U32[1], byte_index, ctx.ConstU32(3u))};
|
||||
const Id dword{EmitLoadBufferU32xN<1>(ctx, handle, address)};
|
||||
return ctx.OpBitFieldUExtract(ctx.U32[1], dword, bit_offset, ctx.ConstU32(16u));
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
}
|
||||
const auto [id, pointer_type] = spv_buffer[BufferAlias::U16];
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u));
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
|
||||
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, ptr));
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
||||
return EmitLoadBufferU32xN<1>(ctx, handle, address);
|
||||
return EmitLoadBufferB32xN<1, BufferAlias::U32>(ctx, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
||||
return EmitLoadBufferU32xN<2>(ctx, handle, address);
|
||||
return EmitLoadBufferB32xN<2, BufferAlias::U32>(ctx, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
||||
return EmitLoadBufferU32xN<3>(ctx, handle, address);
|
||||
return EmitLoadBufferB32xN<3, BufferAlias::U32>(ctx, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
|
||||
return EmitLoadBufferU32xN<4>(ctx, handle, address);
|
||||
return EmitLoadBufferB32xN<4, BufferAlias::U32>(ctx, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return ctx.OpBitcast(ctx.F32[1], EmitLoadBufferU32(ctx, inst, handle, address));
|
||||
return EmitLoadBufferB32xN<1, BufferAlias::F32>(ctx, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return ctx.OpBitcast(ctx.F32[2], EmitLoadBufferU32x2(ctx, inst, handle, address));
|
||||
return EmitLoadBufferB32xN<2, BufferAlias::F32>(ctx, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return ctx.OpBitcast(ctx.F32[3], EmitLoadBufferU32x3(ctx, inst, handle, address));
|
||||
return EmitLoadBufferB32xN<3, BufferAlias::F32>(ctx, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
return ctx.OpBitcast(ctx.F32[4], EmitLoadBufferU32x4(ctx, inst, handle, address));
|
||||
return EmitLoadBufferB32xN<4, BufferAlias::F32>(ctx, handle, address);
|
||||
}
|
||||
|
||||
Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
|
||||
UNREACHABLE_MSG("SPIR-V instruction");
|
||||
}
|
||||
|
||||
template <u32 N>
|
||||
static void EmitStoreBufferU32xN(EmitContext& ctx, u32 handle, Id address, Id value) {
|
||||
auto& buffer = ctx.buffers[handle];
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||
const auto [id, pointer_type] = buffer[BufferAlias::U32];
|
||||
template <u32 N, BufferAlias alias>
|
||||
static void EmitStoreBufferB32xN(EmitContext& ctx, u32 handle, Id address, Id value) {
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
}
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||
const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32;
|
||||
const auto [id, pointer_type] = spv_buffer[alias];
|
||||
if constexpr (N == 1) {
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
|
||||
ctx.OpStore(ptr, value);
|
||||
@ -482,58 +495,63 @@ static void EmitStoreBufferU32xN(EmitContext& ctx, u32 handle, Id address, Id va
|
||||
for (u32 i = 0; i < N; i++) {
|
||||
const Id index_i = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i));
|
||||
const Id ptr =
|
||||
ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i);
|
||||
ctx.OpStore(ptr, ctx.OpCompositeExtract(ctx.U32[1], value, i));
|
||||
ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i);
|
||||
ctx.OpStore(ptr, ctx.OpCompositeExtract(data_types[1], value, i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
||||
const Id byte_index{ctx.OpBitwiseAnd(ctx.U32[1], address, ctx.ConstU32(3u))};
|
||||
const Id bit_offset{ctx.OpShiftLeftLogical(ctx.U32[1], byte_index, ctx.ConstU32(3u))};
|
||||
const Id dword{EmitLoadBufferU32xN<1>(ctx, handle, address)};
|
||||
const Id new_val{ctx.OpBitFieldInsert(ctx.U32[1], dword, value, bit_offset, ctx.ConstU32(8u))};
|
||||
EmitStoreBufferU32xN<1>(ctx, handle, address, new_val);
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
}
|
||||
const auto [id, pointer_type] = spv_buffer[BufferAlias::U8];
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
|
||||
ctx.OpStore(ptr, ctx.OpUConvert(ctx.U8, value));
|
||||
}
|
||||
|
||||
void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
||||
const Id byte_index{ctx.OpBitwiseAnd(ctx.U32[1], address, ctx.ConstU32(2u))};
|
||||
const Id bit_offset{ctx.OpShiftLeftLogical(ctx.U32[1], byte_index, ctx.ConstU32(3u))};
|
||||
const Id dword{EmitLoadBufferU32xN<1>(ctx, handle, address)};
|
||||
const Id new_val{ctx.OpBitFieldInsert(ctx.U32[1], dword, value, bit_offset, ctx.ConstU32(16u))};
|
||||
EmitStoreBufferU32xN<1>(ctx, handle, address, new_val);
|
||||
const auto& spv_buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(spv_buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
|
||||
}
|
||||
const auto [id, pointer_type] = spv_buffer[BufferAlias::U16];
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u));
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
|
||||
ctx.OpStore(ptr, ctx.OpUConvert(ctx.U16, value));
|
||||
}
|
||||
|
||||
void EmitStoreBufferU32(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferU32xN<1>(ctx, handle, address, value);
|
||||
EmitStoreBufferB32xN<1, BufferAlias::U32>(ctx, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferU32xN<2>(ctx, handle, address, value);
|
||||
EmitStoreBufferB32xN<2, BufferAlias::U32>(ctx, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferU32xN<3>(ctx, handle, address, value);
|
||||
EmitStoreBufferB32xN<3, BufferAlias::U32>(ctx, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferU32xN<4>(ctx, handle, address, value);
|
||||
EmitStoreBufferB32xN<4, BufferAlias::U32>(ctx, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferU32(ctx, inst, handle, address, ctx.OpBitcast(ctx.U32[1], value));
|
||||
EmitStoreBufferB32xN<1, BufferAlias::F32>(ctx, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferU32x2(ctx, inst, handle, address, ctx.OpBitcast(ctx.U32[2], value));
|
||||
EmitStoreBufferB32xN<2, BufferAlias::F32>(ctx, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferU32x3(ctx, inst, handle, address, ctx.OpBitcast(ctx.U32[3], value));
|
||||
EmitStoreBufferB32xN<3, BufferAlias::F32>(ctx, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
EmitStoreBufferU32x4(ctx, inst, handle, address, ctx.OpBitcast(ctx.U32[4], value));
|
||||
EmitStoreBufferB32xN<4, BufferAlias::F32>(ctx, handle, address, value);
|
||||
}
|
||||
|
||||
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
|
@ -107,6 +107,8 @@ Id EmitContext::Def(const IR::Value& value) {
|
||||
void EmitContext::DefineArithmeticTypes() {
|
||||
void_id = Name(TypeVoid(), "void_id");
|
||||
U1[1] = Name(TypeBool(), "bool_id");
|
||||
U8 = Name(TypeUInt(8), "u8_id");
|
||||
U16 = Name(TypeUInt(16), "u16_id");
|
||||
if (info.uses_fp16) {
|
||||
F16[1] = Name(TypeFloat(16), "f16_id");
|
||||
U16 = Name(TypeUInt(16), "u16_id");
|
||||
@ -638,8 +640,21 @@ void EmitContext::DefineBuffers() {
|
||||
for (const auto& desc : info.buffers) {
|
||||
const auto buf_sharp = desc.GetSharp(info);
|
||||
const bool is_storage = desc.IsStorage(buf_sharp, profile);
|
||||
|
||||
// Define aliases depending on the shader usage.
|
||||
auto& spv_buffer = buffers.emplace_back(binding.buffer++, desc.buffer_type);
|
||||
spv_buffer[BufferAlias::U32] = DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, U32[1]);
|
||||
if (True(desc.used_types & IR::Type::U32)) {
|
||||
spv_buffer[BufferAlias::U32] = DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, U32[1]);
|
||||
}
|
||||
if (True(desc.used_types & IR::Type::F32)) {
|
||||
spv_buffer[BufferAlias::F32] = DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, F32[1]);
|
||||
}
|
||||
if (True(desc.used_types & IR::Type::U16)) {
|
||||
spv_buffer[BufferAlias::U16] = DefineBuffer(is_storage, desc.is_written, 1, desc.buffer_type, U16);
|
||||
}
|
||||
if (True(desc.used_types & IR::Type::U8)) {
|
||||
spv_buffer[BufferAlias::U8] = DefineBuffer(is_storage, desc.is_written, 0, desc.buffer_type, U8);
|
||||
}
|
||||
++binding.unified;
|
||||
}
|
||||
}
|
||||
|
@ -78,7 +78,20 @@ bool IsDataRingInstruction(const IR::Inst& inst) {
|
||||
}
|
||||
|
||||
IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
|
||||
return IR::Type::U32;
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::LoadBufferU8:
|
||||
case IR::Opcode::StoreBufferU8:
|
||||
return IR::Type::U8;
|
||||
case IR::Opcode::LoadBufferU16:
|
||||
case IR::Opcode::StoreBufferU16:
|
||||
return IR::Type::U16;
|
||||
case IR::Opcode::LoadBufferFormatF32:
|
||||
case IR::Opcode::StoreBufferFormatF32:
|
||||
// Formatted buffer loads can use a variety of types.
|
||||
return IR::Type::U32 | IR::Type::F32 | IR::Type::U16 | IR::Type::U8;
|
||||
default:
|
||||
return IR::Type::U32;
|
||||
}
|
||||
}
|
||||
|
||||
bool IsImageAtomicInstruction(const IR::Inst& inst) {
|
||||
|
@ -1,14 +1,11 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <ranges>
|
||||
#include <span>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include <fmt/format.h>
|
||||
#include <fmt/ranges.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/config.h"
|
||||
#include "common/debug.h"
|
||||
#include "sdl_window.h"
|
||||
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
||||
@ -208,7 +205,8 @@ std::string Instance::GetDriverVersionName() {
|
||||
bool Instance::CreateDevice() {
|
||||
const vk::StructureChain feature_chain =
|
||||
physical_device
|
||||
.getFeatures2<vk::PhysicalDeviceFeatures2, vk::PhysicalDeviceVulkan12Features,
|
||||
.getFeatures2<vk::PhysicalDeviceFeatures2, vk::PhysicalDeviceVulkan11Features,
|
||||
vk::PhysicalDeviceVulkan12Features,
|
||||
vk::PhysicalDeviceRobustness2FeaturesEXT,
|
||||
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT,
|
||||
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT,
|
||||
@ -319,6 +317,7 @@ bool Instance::CreateDevice() {
|
||||
|
||||
const auto topology_list_restart_features =
|
||||
feature_chain.get<vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT>();
|
||||
const auto vk11_features = feature_chain.get<vk::PhysicalDeviceVulkan11Features>();
|
||||
const auto vk12_features = feature_chain.get<vk::PhysicalDeviceVulkan12Features>();
|
||||
vk::StructureChain device_chain = {
|
||||
vk::DeviceCreateInfo{
|
||||
@ -351,12 +350,17 @@ bool Instance::CreateDevice() {
|
||||
},
|
||||
},
|
||||
vk::PhysicalDeviceVulkan11Features{
|
||||
.shaderDrawParameters = true,
|
||||
.storageBuffer16BitAccess = vk11_features.storageBuffer16BitAccess,
|
||||
.uniformAndStorageBuffer16BitAccess = vk11_features.uniformAndStorageBuffer16BitAccess,
|
||||
.shaderDrawParameters = vk11_features.shaderDrawParameters,
|
||||
},
|
||||
vk::PhysicalDeviceVulkan12Features{
|
||||
.samplerMirrorClampToEdge = vk12_features.samplerMirrorClampToEdge,
|
||||
.drawIndirectCount = vk12_features.drawIndirectCount,
|
||||
.storageBuffer8BitAccess = vk12_features.storageBuffer8BitAccess,
|
||||
.uniformAndStorageBuffer8BitAccess = vk12_features.uniformAndStorageBuffer8BitAccess,
|
||||
.shaderFloat16 = vk12_features.shaderFloat16,
|
||||
.shaderInt8 = vk12_features.shaderInt8,
|
||||
.scalarBlockLayout = vk12_features.scalarBlockLayout,
|
||||
.uniformBufferStandardLayout = vk12_features.uniformBufferStandardLayout,
|
||||
.separateDepthStencilLayouts = vk12_features.separateDepthStencilLayouts,
|
||||
|
Loading…
Reference in New Issue
Block a user