mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-22 18:15:14 +00:00
Merge remote-tracking branch 'origin/main' into register-lib
This commit is contained in:
commit
ae613cfee0
2
externals/sirit
vendored
2
externals/sirit
vendored
@ -1 +1 @@
|
|||||||
Subproject commit b4eccb336f1b1169af48dac1e04015985af86e3e
|
Subproject commit 282083a595dcca86814dedab2f2b0363ef38f1ec
|
@ -526,7 +526,14 @@ s32 PS4_SYSV_ABI sceAudio3dStrError() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
s32 PS4_SYSV_ABI sceAudio3dTerminate() {
|
s32 PS4_SYSV_ABI sceAudio3dTerminate() {
|
||||||
LOG_ERROR(Lib_Audio3d, "(STUBBED) called");
|
LOG_INFO(Lib_Audio3d, "called");
|
||||||
|
if (!state) {
|
||||||
|
return ORBIS_AUDIO3D_ERROR_NOT_READY;
|
||||||
|
}
|
||||||
|
|
||||||
|
AudioOut::sceAudioOutOutput(state->audio_out_handle, nullptr);
|
||||||
|
AudioOut::sceAudioOutClose(state->audio_out_handle);
|
||||||
|
state.release();
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/debug.h"
|
#include "common/debug.h"
|
||||||
|
#include "common/elf_info.h"
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
#include "common/polyfill_thread.h"
|
#include "common/polyfill_thread.h"
|
||||||
#include "common/thread.h"
|
#include "common/thread.h"
|
||||||
@ -243,6 +244,19 @@ s32 PS4_SYSV_ABI sceKernelSetGPO() {
|
|||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
s32 PS4_SYSV_ABI sceKernelGetSystemSwVersion(SwVersionStruct* ret) {
|
||||||
|
if (ret == nullptr) {
|
||||||
|
return ORBIS_OK; // but why?
|
||||||
|
}
|
||||||
|
ASSERT(ret->struct_size == 40);
|
||||||
|
u32 fake_fw = Common::ElfInfo::Instance().RawFirmwareVer();
|
||||||
|
ret->hex_representation = fake_fw;
|
||||||
|
std::snprintf(ret->text_representation, 28, "%2x.%03x.%03x", fake_fw >> 0x18,
|
||||||
|
fake_fw >> 0xc & 0xfff, fake_fw & 0xfff); // why %2x?
|
||||||
|
LOG_INFO(Lib_Kernel, "called, returned sw version: {}", ret->text_representation);
|
||||||
|
return ORBIS_OK;
|
||||||
|
}
|
||||||
|
|
||||||
void RegisterLib(Core::Loader::SymbolsResolver* sym) {
|
void RegisterLib(Core::Loader::SymbolsResolver* sym) {
|
||||||
service_thread = std::jthread{KernelServiceThread};
|
service_thread = std::jthread{KernelServiceThread};
|
||||||
|
|
||||||
@ -258,6 +272,7 @@ void RegisterLib(Core::Loader::SymbolsResolver* sym) {
|
|||||||
Libraries::Kernel::RegisterDebug(sym);
|
Libraries::Kernel::RegisterDebug(sym);
|
||||||
|
|
||||||
LIB_OBJ("f7uOxY9mM1U", "libkernel", 1, "libkernel", 1, 1, &g_stack_chk_guard);
|
LIB_OBJ("f7uOxY9mM1U", "libkernel", 1, "libkernel", 1, 1, &g_stack_chk_guard);
|
||||||
|
LIB_FUNCTION("Mv1zUObHvXI", "libkernel", 1, "libkernel", 1, 1, sceKernelGetSystemSwVersion);
|
||||||
LIB_FUNCTION("PfccT7qURYE", "libkernel", 1, "libkernel", 1, 1, kernel_ioctl);
|
LIB_FUNCTION("PfccT7qURYE", "libkernel", 1, "libkernel", 1, 1, kernel_ioctl);
|
||||||
LIB_FUNCTION("JGfTMBOdUJo", "libkernel", 1, "libkernel", 1, 1, sceKernelGetFsSandboxRandomWord);
|
LIB_FUNCTION("JGfTMBOdUJo", "libkernel", 1, "libkernel", 1, 1, sceKernelGetFsSandboxRandomWord);
|
||||||
LIB_FUNCTION("6xVpy0Fdq+I", "libkernel", 1, "libkernel", 1, 1, _sigprocmask);
|
LIB_FUNCTION("6xVpy0Fdq+I", "libkernel", 1, "libkernel", 1, 1, _sigprocmask);
|
||||||
|
@ -35,6 +35,12 @@ struct OrbisWrapperImpl<PS4_SYSV_ABI R (*)(Args...), f> {
|
|||||||
|
|
||||||
s32* PS4_SYSV_ABI __Error();
|
s32* PS4_SYSV_ABI __Error();
|
||||||
|
|
||||||
|
struct SwVersionStruct {
|
||||||
|
u64 struct_size;
|
||||||
|
char text_representation[0x1c];
|
||||||
|
u32 hex_representation;
|
||||||
|
};
|
||||||
|
|
||||||
void RegisterLib(Core::Loader::SymbolsResolver* sym);
|
void RegisterLib(Core::Loader::SymbolsResolver* sym);
|
||||||
|
|
||||||
} // namespace Libraries::Kernel
|
} // namespace Libraries::Kernel
|
||||||
|
@ -537,6 +537,7 @@ u64 MemoryManager::UnmapBytesFromEntry(VAddr virtual_addr, VirtualMemoryArea vma
|
|||||||
vma_base_size - start_in_vma < size ? vma_base_size - start_in_vma : size;
|
vma_base_size - start_in_vma < size ? vma_base_size - start_in_vma : size;
|
||||||
const bool has_backing = type == VMAType::Direct || type == VMAType::File;
|
const bool has_backing = type == VMAType::Direct || type == VMAType::File;
|
||||||
const auto prot = vma_base.prot;
|
const auto prot = vma_base.prot;
|
||||||
|
const bool readonly_file = prot == MemoryProt::CpuRead && type == VMAType::File;
|
||||||
|
|
||||||
if (type == VMAType::Free) {
|
if (type == VMAType::Free) {
|
||||||
return adjusted_size;
|
return adjusted_size;
|
||||||
@ -554,9 +555,8 @@ u64 MemoryManager::UnmapBytesFromEntry(VAddr virtual_addr, VirtualMemoryArea vma
|
|||||||
vma.phys_base = 0;
|
vma.phys_base = 0;
|
||||||
vma.disallow_merge = false;
|
vma.disallow_merge = false;
|
||||||
vma.name = "";
|
vma.name = "";
|
||||||
const auto post_merge_it = MergeAdjacent(vma_map, new_it);
|
MergeAdjacent(vma_map, new_it);
|
||||||
auto& post_merge_vma = post_merge_it->second;
|
|
||||||
bool readonly_file = post_merge_vma.prot == MemoryProt::CpuRead && type == VMAType::File;
|
|
||||||
if (type != VMAType::Reserved && type != VMAType::PoolReserved) {
|
if (type != VMAType::Reserved && type != VMAType::PoolReserved) {
|
||||||
// If this mapping has GPU access, unmap from GPU.
|
// If this mapping has GPU access, unmap from GPU.
|
||||||
if (IsValidGpuMapping(virtual_addr, size)) {
|
if (IsValidGpuMapping(virtual_addr, size)) {
|
||||||
|
@ -437,7 +437,7 @@ void SettingsDialog::LoadValuesFromConfig() {
|
|||||||
toml::find_or<int>(data, "Settings", "consoleLanguage", 6))) %
|
toml::find_or<int>(data, "Settings", "consoleLanguage", 6))) %
|
||||||
languageIndexes.size());
|
languageIndexes.size());
|
||||||
ui->emulatorLanguageComboBox->setCurrentIndex(
|
ui->emulatorLanguageComboBox->setCurrentIndex(
|
||||||
languages[toml::find_or<std::string>(data, "GUI", "emulatorLanguage", "en_US")]);
|
languages[m_gui_settings->GetValue(gui::gen_guiLanguage).toString().toStdString()]);
|
||||||
ui->hideCursorComboBox->setCurrentIndex(toml::find_or<int>(data, "Input", "cursorState", 1));
|
ui->hideCursorComboBox->setCurrentIndex(toml::find_or<int>(data, "Input", "cursorState", 1));
|
||||||
OnCursorStateChanged(toml::find_or<int>(data, "Input", "cursorState", 1));
|
OnCursorStateChanged(toml::find_or<int>(data, "Input", "cursorState", 1));
|
||||||
ui->idleTimeoutSpinBox->setValue(toml::find_or<int>(data, "Input", "cursorHideTimeout", 5));
|
ui->idleTimeoutSpinBox->setValue(toml::find_or<int>(data, "Input", "cursorHideTimeout", 5));
|
||||||
|
@ -293,9 +293,17 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
|
|||||||
if (stage == LogicalStage::Geometry) {
|
if (stage == LogicalStage::Geometry) {
|
||||||
ctx.AddCapability(spv::Capability::Geometry);
|
ctx.AddCapability(spv::Capability::Geometry);
|
||||||
}
|
}
|
||||||
if (info.stage == Stage::Fragment && profile.needs_manual_interpolation) {
|
if (info.stage == Stage::Fragment) {
|
||||||
ctx.AddExtension("SPV_KHR_fragment_shader_barycentric");
|
if (profile.supports_amd_shader_explicit_vertex_parameter) {
|
||||||
ctx.AddCapability(spv::Capability::FragmentBarycentricKHR);
|
ctx.AddExtension("SPV_AMD_shader_explicit_vertex_parameter");
|
||||||
|
} else if (profile.supports_fragment_shader_barycentric) {
|
||||||
|
ctx.AddExtension("SPV_KHR_fragment_shader_barycentric");
|
||||||
|
ctx.AddCapability(spv::Capability::FragmentBarycentricKHR);
|
||||||
|
}
|
||||||
|
if (info.loads.GetAny(IR::Attribute::BaryCoordSmoothSample) ||
|
||||||
|
info.loads.GetAny(IR::Attribute::BaryCoordNoPerspSample)) {
|
||||||
|
ctx.AddCapability(spv::Capability::SampleRateShading);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (stage == LogicalStage::TessellationControl || stage == LogicalStage::TessellationEval) {
|
if (stage == LogicalStage::TessellationControl || stage == LogicalStage::TessellationEval) {
|
||||||
ctx.AddCapability(spv::Capability::Tessellation);
|
ctx.AddCapability(spv::Capability::Tessellation);
|
||||||
|
@ -45,14 +45,14 @@ Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) {
|
|||||||
return ctx.OpAccessChain(ctx.output_f32, ctx.cull_distances, cull_num);
|
return ctx.OpAccessChain(ctx.output_f32, ctx.cull_distances, cull_num);
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
UNREACHABLE();
|
UNREACHABLE_MSG("Vertex output {}", u32(output));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
|
Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
|
||||||
if (IR::IsParam(attr)) {
|
if (IR::IsParam(attr)) {
|
||||||
const u32 attr_index{u32(attr) - u32(IR::Attribute::Param0)};
|
const u32 attr_index{u32(attr) - u32(IR::Attribute::Param0)};
|
||||||
if (ctx.stage == Stage::Local && ctx.runtime_info.ls_info.links_with_tcs) {
|
if (ctx.stage == Stage::Local) {
|
||||||
const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]);
|
const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]);
|
||||||
return ctx.OpAccessChain(component_ptr, ctx.output_attr_array, ctx.ConstU32(attr_index),
|
return ctx.OpAccessChain(component_ptr, ctx.output_attr_array, ctx.ConstU32(attr_index),
|
||||||
ctx.ConstU32(element));
|
ctx.ConstU32(element));
|
||||||
@ -88,19 +88,15 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
|
|||||||
case IR::Attribute::Depth:
|
case IR::Attribute::Depth:
|
||||||
return ctx.frag_depth;
|
return ctx.frag_depth;
|
||||||
default:
|
default:
|
||||||
throw NotImplementedException("Write attribute {}", attr);
|
UNREACHABLE_MSG("Write attribute {}", attr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<Id, bool> OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr) {
|
std::pair<Id, bool> OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr) {
|
||||||
if (IR::IsParam(attr)) {
|
if (IR::IsParam(attr)) {
|
||||||
if (ctx.stage == Stage::Local && ctx.runtime_info.ls_info.links_with_tcs) {
|
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
|
||||||
return {ctx.F32[1], false};
|
const auto& info{ctx.output_params.at(index)};
|
||||||
} else {
|
return {info.component_type, info.is_integer};
|
||||||
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
|
|
||||||
const auto& info{ctx.output_params.at(index)};
|
|
||||||
return {info.component_type, info.is_integer};
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (IR::IsMrt(attr)) {
|
if (IR::IsMrt(attr)) {
|
||||||
const u32 index{u32(attr) - u32(IR::Attribute::RenderTarget0)};
|
const u32 index{u32(attr) - u32(IR::Attribute::RenderTarget0)};
|
||||||
@ -115,11 +111,14 @@ std::pair<Id, bool> OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr
|
|||||||
case IR::Attribute::Depth:
|
case IR::Attribute::Depth:
|
||||||
return {ctx.F32[1], false};
|
return {ctx.F32[1], false};
|
||||||
default:
|
default:
|
||||||
throw NotImplementedException("Write attribute {}", attr);
|
UNREACHABLE_MSG("Write attribute {}", attr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
|
using PointerType = EmitContext::PointerType;
|
||||||
|
using PointerSize = EmitContext::PointerSize;
|
||||||
|
|
||||||
Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg) {
|
Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg) {
|
||||||
const u32 index = ctx.binding.user_data + ctx.info.ud_mask.Index(reg);
|
const u32 index = ctx.binding.user_data + ctx.info.ud_mask.Index(reg);
|
||||||
const u32 half = PushData::UdRegsIndex + (index >> 2);
|
const u32 half = PushData::UdRegsIndex + (index >> 2);
|
||||||
@ -131,41 +130,6 @@ Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg) {
|
|||||||
return ud_reg;
|
return ud_reg;
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitGetThreadBitScalarReg(EmitContext& ctx) {
|
|
||||||
UNREACHABLE_MSG("Unreachable instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitSetThreadBitScalarReg(EmitContext& ctx) {
|
|
||||||
UNREACHABLE_MSG("Unreachable instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGetScalarRegister(EmitContext&) {
|
|
||||||
UNREACHABLE_MSG("Unreachable instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitSetScalarRegister(EmitContext&) {
|
|
||||||
UNREACHABLE_MSG("Unreachable instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGetVectorRegister(EmitContext& ctx) {
|
|
||||||
UNREACHABLE_MSG("Unreachable instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitSetVectorRegister(EmitContext& ctx) {
|
|
||||||
UNREACHABLE_MSG("Unreachable instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitSetGotoVariable(EmitContext&) {
|
|
||||||
UNREACHABLE_MSG("Unreachable instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitGetGotoVariable(EmitContext&) {
|
|
||||||
UNREACHABLE_MSG("Unreachable instruction");
|
|
||||||
}
|
|
||||||
|
|
||||||
using PointerType = EmitContext::PointerType;
|
|
||||||
using PointerSize = EmitContext::PointerSize;
|
|
||||||
|
|
||||||
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) {
|
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) {
|
||||||
const u32 flatbuf_off_dw = inst->Flags<u32>();
|
const u32 flatbuf_off_dw = inst->Flags<u32>();
|
||||||
if (!Config::directMemoryAccess()) {
|
if (!Config::directMemoryAccess()) {
|
||||||
@ -180,120 +144,76 @@ Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <PointerType type>
|
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
|
||||||
Id ReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
|
|
||||||
const auto& buffer = ctx.buffers[handle];
|
const auto& buffer = ctx.buffers[handle];
|
||||||
if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) {
|
if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) {
|
||||||
index = ctx.OpIAdd(ctx.U32[1], index, offset);
|
index = ctx.OpIAdd(ctx.U32[1], index, offset);
|
||||||
}
|
}
|
||||||
const auto [id, pointer_type] = buffer.Alias(type);
|
const auto [id, pointer_type] = buffer.Alias(PointerType::U32);
|
||||||
const auto value_type = type == PointerType::U32 ? ctx.U32[1] : ctx.F32[1];
|
|
||||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
|
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
|
||||||
const Id result{ctx.OpLoad(value_type, ptr)};
|
const Id result{ctx.OpLoad(ctx.U32[1], ptr)};
|
||||||
if (const Id size = buffer.Size(PointerSize::B32); Sirit::ValidId(size)) {
|
if (const Id size = buffer.Size(PointerSize::B32); Sirit::ValidId(size)) {
|
||||||
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, size);
|
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, size);
|
||||||
return ctx.OpSelect(value_type, in_bounds, result, ctx.u32_zero_value);
|
return ctx.OpSelect(ctx.U32[1], in_bounds, result, ctx.u32_zero_value);
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
|
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
|
||||||
return ReadConstBuffer<PointerType::U32>(ctx, handle, index);
|
|
||||||
}
|
|
||||||
|
|
||||||
Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
|
|
||||||
const auto index{rate_idx == 0 ? PushData::Step0Index : PushData::Step1Index};
|
|
||||||
return ctx.OpLoad(
|
|
||||||
ctx.U32[1], ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]),
|
|
||||||
ctx.push_data_block, ctx.ConstU32(index)));
|
|
||||||
}
|
|
||||||
|
|
||||||
static Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
|
|
||||||
if (IR::IsPosition(attr)) {
|
|
||||||
ASSERT(attr == IR::Attribute::Position0);
|
|
||||||
const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
|
|
||||||
const auto pointer{ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, index, ctx.ConstU32(0u))};
|
|
||||||
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
|
|
||||||
return ctx.OpLoad(ctx.F32[1],
|
|
||||||
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (IR::IsParam(attr)) {
|
|
||||||
const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
|
|
||||||
const auto param = ctx.input_params.at(param_id).id;
|
|
||||||
const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
|
|
||||||
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)};
|
|
||||||
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
|
|
||||||
return ctx.OpLoad(ctx.F32[1],
|
|
||||||
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
|
|
||||||
}
|
|
||||||
UNREACHABLE();
|
|
||||||
}
|
|
||||||
|
|
||||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
|
|
||||||
if (ctx.info.l_stage == LogicalStage::Geometry) {
|
|
||||||
return EmitGetAttributeForGeometry(ctx, attr, comp, index);
|
|
||||||
} else if (ctx.info.l_stage == LogicalStage::TessellationControl ||
|
|
||||||
ctx.info.l_stage == LogicalStage::TessellationEval) {
|
|
||||||
if (IR::IsTessCoord(attr)) {
|
|
||||||
const u32 component = attr == IR::Attribute::TessellationEvaluationPointU ? 0 : 1;
|
|
||||||
const auto component_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
|
|
||||||
const auto pointer{
|
|
||||||
ctx.OpAccessChain(component_ptr, ctx.tess_coord, ctx.ConstU32(component))};
|
|
||||||
return ctx.OpLoad(ctx.F32[1], pointer);
|
|
||||||
}
|
|
||||||
UNREACHABLE();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (IR::IsParam(attr)) {
|
if (IR::IsParam(attr)) {
|
||||||
const u32 param_index{u32(attr) - u32(IR::Attribute::Param0)};
|
const u32 param_index{u32(attr) - u32(IR::Attribute::Param0)};
|
||||||
const auto& param{ctx.input_params.at(param_index)};
|
const auto& param{ctx.input_params.at(param_index)};
|
||||||
if (param.buffer_handle >= 0) {
|
const Id value = [&] {
|
||||||
const auto step_rate = EmitReadStepRate(ctx, param.id.value);
|
if (param.is_array) {
|
||||||
const auto offset = ctx.OpIAdd(
|
ASSERT(param.num_components > 1);
|
||||||
ctx.U32[1],
|
if (param.is_loaded) {
|
||||||
ctx.OpIMul(
|
return ctx.OpCompositeExtract(param.component_type, param.id_array[index],
|
||||||
ctx.U32[1],
|
comp);
|
||||||
ctx.OpUDiv(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id), step_rate),
|
} else {
|
||||||
ctx.ConstU32(param.num_components)),
|
return ctx.OpLoad(param.component_type,
|
||||||
ctx.ConstU32(comp));
|
ctx.OpAccessChain(param.pointer_type, param.id,
|
||||||
return ReadConstBuffer<PointerType::F32>(ctx, param.buffer_handle, offset);
|
ctx.ConstU32(index), ctx.ConstU32(comp)));
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
Id result;
|
ASSERT(!param.is_loaded);
|
||||||
if (param.is_loaded) {
|
if (param.num_components > 1) {
|
||||||
// Attribute is either default or manually interpolated. The id points to an already
|
return ctx.OpLoad(
|
||||||
// loaded vector.
|
param.component_type,
|
||||||
result = ctx.OpCompositeExtract(param.component_type, param.id, comp);
|
ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp)));
|
||||||
} else if (param.num_components > 1) {
|
} else {
|
||||||
// Attribute is a vector and we need to access a specific component.
|
return ctx.OpLoad(param.component_type, param.id);
|
||||||
const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))};
|
}
|
||||||
result = ctx.OpLoad(param.component_type, pointer);
|
}
|
||||||
} else {
|
}();
|
||||||
// Attribute is a single float or interger, simply load it.
|
return param.is_integer ? ctx.OpBitcast(ctx.F32[1], value) : value;
|
||||||
result = ctx.OpLoad(param.component_type, param.id);
|
}
|
||||||
}
|
if (IR::IsBarycentricCoord(attr) && ctx.profile.supports_fragment_shader_barycentric) {
|
||||||
if (param.is_integer) {
|
++comp;
|
||||||
result = ctx.OpBitcast(ctx.F32[1], result);
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (attr) {
|
switch (attr) {
|
||||||
case IR::Attribute::FragCoord: {
|
case IR::Attribute::Position0:
|
||||||
const Id coord = ctx.OpLoad(
|
ASSERT(ctx.l_stage == LogicalStage::Geometry);
|
||||||
ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.frag_coord, ctx.ConstU32(comp)));
|
return ctx.OpLoad(ctx.F32[1],
|
||||||
if (comp == 3) {
|
ctx.OpAccessChain(ctx.input_f32, ctx.gl_in, ctx.ConstU32(index),
|
||||||
return ctx.OpFDiv(ctx.F32[1], ctx.ConstF32(1.f), coord);
|
ctx.ConstU32(0U), ctx.ConstU32(comp)));
|
||||||
}
|
case IR::Attribute::FragCoord:
|
||||||
return coord;
|
return ctx.OpLoad(ctx.F32[1],
|
||||||
}
|
ctx.OpAccessChain(ctx.input_f32, ctx.frag_coord, ctx.ConstU32(comp)));
|
||||||
case IR::Attribute::TessellationEvaluationPointU:
|
case IR::Attribute::TessellationEvaluationPointU:
|
||||||
return ctx.OpLoad(ctx.F32[1],
|
return ctx.OpLoad(ctx.F32[1],
|
||||||
ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value));
|
ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value));
|
||||||
case IR::Attribute::TessellationEvaluationPointV:
|
case IR::Attribute::TessellationEvaluationPointV:
|
||||||
return ctx.OpLoad(ctx.F32[1],
|
return ctx.OpLoad(ctx.F32[1],
|
||||||
ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.ConstU32(1U)));
|
ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.ConstU32(1U)));
|
||||||
|
case IR::Attribute::BaryCoordSmooth:
|
||||||
|
return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.bary_coord_smooth,
|
||||||
|
ctx.ConstU32(comp)));
|
||||||
|
case IR::Attribute::BaryCoordSmoothSample:
|
||||||
|
return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.bary_coord_smooth_sample,
|
||||||
|
ctx.ConstU32(comp)));
|
||||||
|
case IR::Attribute::BaryCoordNoPersp:
|
||||||
|
return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.bary_coord_nopersp,
|
||||||
|
ctx.ConstU32(comp)));
|
||||||
default:
|
default:
|
||||||
UNREACHABLE_MSG("Read attribute {}", attr);
|
UNREACHABLE_MSG("Read attribute {}", attr);
|
||||||
}
|
}
|
||||||
@ -305,10 +225,6 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
|||||||
return ctx.OpLoad(ctx.U32[1], ctx.vertex_index);
|
return ctx.OpLoad(ctx.U32[1], ctx.vertex_index);
|
||||||
case IR::Attribute::InstanceId:
|
case IR::Attribute::InstanceId:
|
||||||
return ctx.OpLoad(ctx.U32[1], ctx.instance_id);
|
return ctx.OpLoad(ctx.U32[1], ctx.instance_id);
|
||||||
case IR::Attribute::InstanceId0:
|
|
||||||
return EmitReadStepRate(ctx, 0);
|
|
||||||
case IR::Attribute::InstanceId1:
|
|
||||||
return EmitReadStepRate(ctx, 1);
|
|
||||||
case IR::Attribute::WorkgroupIndex:
|
case IR::Attribute::WorkgroupIndex:
|
||||||
return ctx.workgroup_index_id;
|
return ctx.workgroup_index_id;
|
||||||
case IR::Attribute::WorkgroupId:
|
case IR::Attribute::WorkgroupId:
|
||||||
@ -640,4 +556,36 @@ void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a
|
|||||||
UNREACHABLE_MSG("SPIR-V instruction");
|
UNREACHABLE_MSG("SPIR-V instruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitGetThreadBitScalarReg(EmitContext& ctx) {
|
||||||
|
UNREACHABLE_MSG("Unreachable instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitSetThreadBitScalarReg(EmitContext& ctx) {
|
||||||
|
UNREACHABLE_MSG("Unreachable instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGetScalarRegister(EmitContext&) {
|
||||||
|
UNREACHABLE_MSG("Unreachable instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitSetScalarRegister(EmitContext&) {
|
||||||
|
UNREACHABLE_MSG("Unreachable instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGetVectorRegister(EmitContext& ctx) {
|
||||||
|
UNREACHABLE_MSG("Unreachable instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitSetVectorRegister(EmitContext& ctx) {
|
||||||
|
UNREACHABLE_MSG("Unreachable instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitSetGotoVariable(EmitContext&) {
|
||||||
|
UNREACHABLE_MSG("Unreachable instruction");
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmitGetGotoVariable(EmitContext&) {
|
||||||
|
UNREACHABLE_MSG("Unreachable instruction");
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Shader::Backend::SPIRV
|
} // namespace Shader::Backend::SPIRV
|
||||||
|
@ -108,7 +108,7 @@ Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres
|
|||||||
Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||||
Id EmitBufferAtomicCmpSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
|
Id EmitBufferAtomicCmpSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
|
||||||
Id cmp_value);
|
Id cmp_value);
|
||||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index);
|
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index);
|
||||||
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
|
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
|
||||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
|
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
|
||||||
Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index);
|
Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index);
|
||||||
|
@ -9,7 +9,7 @@ namespace Shader::Backend::SPIRV {
|
|||||||
|
|
||||||
void EmitPrologue(EmitContext& ctx) {
|
void EmitPrologue(EmitContext& ctx) {
|
||||||
if (ctx.stage == Stage::Fragment) {
|
if (ctx.stage == Stage::Fragment) {
|
||||||
ctx.DefineInterpolatedAttribs();
|
ctx.DefineAmdPerVertexAttribs();
|
||||||
}
|
}
|
||||||
if (ctx.info.loads.Get(IR::Attribute::WorkgroupIndex)) {
|
if (ctx.info.loads.Get(IR::Attribute::WorkgroupIndex)) {
|
||||||
ctx.DefineWorkgroupIndex();
|
ctx.DefineWorkgroupIndex();
|
||||||
|
@ -196,14 +196,15 @@ const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id,
|
EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id,
|
||||||
u32 num_components, bool output) {
|
u32 num_components, bool output,
|
||||||
|
bool loaded, bool array) {
|
||||||
switch (GetNumberClass(fmt)) {
|
switch (GetNumberClass(fmt)) {
|
||||||
case AmdGpu::NumberClass::Float:
|
case AmdGpu::NumberClass::Float:
|
||||||
return {id, output ? output_f32 : input_f32, F32[1], num_components, false};
|
return {id, output ? output_f32 : input_f32, F32[1], num_components, false, loaded, array};
|
||||||
case AmdGpu::NumberClass::Uint:
|
case AmdGpu::NumberClass::Uint:
|
||||||
return {id, output ? output_u32 : input_u32, U32[1], num_components, true};
|
return {id, output ? output_u32 : input_u32, U32[1], num_components, true, loaded, array};
|
||||||
case AmdGpu::NumberClass::Sint:
|
case AmdGpu::NumberClass::Sint:
|
||||||
return {id, output ? output_s32 : input_s32, S32[1], num_components, true};
|
return {id, output ? output_s32 : input_s32, S32[1], num_components, true, loaded, array};
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -298,33 +299,24 @@ void EmitContext::DefineBufferProperties() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitContext::DefineInterpolatedAttribs() {
|
void EmitContext::DefineAmdPerVertexAttribs() {
|
||||||
if (!profile.needs_manual_interpolation) {
|
if (!profile.supports_amd_shader_explicit_vertex_parameter) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// Iterate all input attributes, load them and manually interpolate.
|
|
||||||
for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) {
|
for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) {
|
||||||
const auto& input = runtime_info.fs_info.inputs[i];
|
const auto& input = runtime_info.fs_info.inputs[i];
|
||||||
auto& params = input_params[i];
|
if (input.IsDefault() || info.fs_interpolation[i].primary != Qualifier::PerVertex) {
|
||||||
if (input.is_flat || params.is_loaded) {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const Id p_array{OpLoad(TypeArray(F32[4], ConstU32(3U)), params.id)};
|
auto& param = input_params[i];
|
||||||
const Id p0{OpCompositeExtract(F32[4], p_array, 0U)};
|
const Id pointer = param.id;
|
||||||
const Id p1{OpCompositeExtract(F32[4], p_array, 1U)};
|
param.id_array[0] =
|
||||||
const Id p2{OpCompositeExtract(F32[4], p_array, 2U)};
|
OpInterpolateAtVertexAMD(F32[param.num_components], pointer, ConstU32(0U));
|
||||||
const Id p10{OpFSub(F32[4], p1, p0)};
|
param.id_array[1] =
|
||||||
const Id p20{OpFSub(F32[4], p2, p0)};
|
OpInterpolateAtVertexAMD(F32[param.num_components], pointer, ConstU32(1U));
|
||||||
const Id bary_coord{OpLoad(F32[3], IsLinear(info.interp_qualifiers[i])
|
param.id_array[2] =
|
||||||
? bary_coord_linear_id
|
OpInterpolateAtVertexAMD(F32[param.num_components], pointer, ConstU32(2U));
|
||||||
: bary_coord_persp_id)};
|
param.is_loaded = true;
|
||||||
const Id bary_coord_y{OpCompositeExtract(F32[1], bary_coord, 1)};
|
|
||||||
const Id bary_coord_z{OpCompositeExtract(F32[1], bary_coord, 2)};
|
|
||||||
const Id p10_y{OpVectorTimesScalar(F32[4], p10, bary_coord_y)};
|
|
||||||
const Id p20_z{OpVectorTimesScalar(F32[4], p20, bary_coord_z)};
|
|
||||||
params.id = OpFAdd(F32[4], p0, OpFAdd(F32[4], p10_y, p20_z));
|
|
||||||
Name(params.id, fmt::format("fs_in_attr{}", i));
|
|
||||||
params.is_loaded = true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -342,21 +334,6 @@ void EmitContext::DefineWorkgroupIndex() {
|
|||||||
Name(workgroup_index_id, "workgroup_index");
|
Name(workgroup_index_id, "workgroup_index");
|
||||||
}
|
}
|
||||||
|
|
||||||
Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
|
|
||||||
switch (default_value) {
|
|
||||||
case 0:
|
|
||||||
return ctx.ConstF32(0.f, 0.f, 0.f, 0.f);
|
|
||||||
case 1:
|
|
||||||
return ctx.ConstF32(0.f, 0.f, 0.f, 1.f);
|
|
||||||
case 2:
|
|
||||||
return ctx.ConstF32(1.f, 1.f, 1.f, 0.f);
|
|
||||||
case 3:
|
|
||||||
return ctx.ConstF32(1.f, 1.f, 1.f, 1.f);
|
|
||||||
default:
|
|
||||||
UNREACHABLE();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void EmitContext::DefineInputs() {
|
void EmitContext::DefineInputs() {
|
||||||
if (info.uses_lane_id) {
|
if (info.uses_lane_id) {
|
||||||
subgroup_local_invocation_id = DefineVariable(
|
subgroup_local_invocation_id = DefineVariable(
|
||||||
@ -377,35 +354,13 @@ void EmitContext::DefineInputs() {
|
|||||||
ASSERT(attrib.semantic < IR::NumParams);
|
ASSERT(attrib.semantic < IR::NumParams);
|
||||||
const auto sharp = attrib.GetSharp(info);
|
const auto sharp = attrib.GetSharp(info);
|
||||||
const Id type{GetAttributeType(*this, sharp.GetNumberFmt())[4]};
|
const Id type{GetAttributeType(*this, sharp.GetNumberFmt())[4]};
|
||||||
if (attrib.UsesStepRates()) {
|
Id id{DefineInput(type, attrib.semantic)};
|
||||||
const u32 rate_idx =
|
if (attrib.GetStepRate() != Gcn::VertexAttribute::InstanceIdType::None) {
|
||||||
attrib.GetStepRate() == Gcn::VertexAttribute::InstanceIdType::OverStepRate0 ? 0
|
Name(id, fmt::format("vs_instance_attr{}", attrib.semantic));
|
||||||
: 1;
|
|
||||||
const u32 num_components = AmdGpu::NumComponents(sharp.GetDataFmt());
|
|
||||||
const auto buffer =
|
|
||||||
std::ranges::find_if(info.buffers, [&attrib](const auto& buffer) {
|
|
||||||
return buffer.instance_attrib == attrib.semantic;
|
|
||||||
});
|
|
||||||
// Note that we pass index rather than Id
|
|
||||||
input_params[attrib.semantic] = SpirvAttribute{
|
|
||||||
.id = {rate_idx},
|
|
||||||
.pointer_type = input_u32,
|
|
||||||
.component_type = U32[1],
|
|
||||||
.num_components = std::min<u16>(attrib.num_elements, num_components),
|
|
||||||
.is_integer = true,
|
|
||||||
.is_loaded = false,
|
|
||||||
.buffer_handle = int(buffer - info.buffers.begin()),
|
|
||||||
};
|
|
||||||
} else {
|
} else {
|
||||||
Id id{DefineInput(type, attrib.semantic)};
|
Name(id, fmt::format("vs_in_attr{}", attrib.semantic));
|
||||||
if (attrib.GetStepRate() == Gcn::VertexAttribute::InstanceIdType::Plain) {
|
|
||||||
Name(id, fmt::format("vs_instance_attr{}", attrib.semantic));
|
|
||||||
} else {
|
|
||||||
Name(id, fmt::format("vs_in_attr{}", attrib.semantic));
|
|
||||||
}
|
|
||||||
input_params[attrib.semantic] =
|
|
||||||
GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false);
|
|
||||||
}
|
}
|
||||||
|
input_params[attrib.semantic] = GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -420,49 +375,71 @@ void EmitContext::DefineInputs() {
|
|||||||
front_facing =
|
front_facing =
|
||||||
DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
|
DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
|
||||||
}
|
}
|
||||||
if (profile.needs_manual_interpolation) {
|
if (info.loads.GetAny(IR::Attribute::BaryCoordSmooth)) {
|
||||||
if (info.has_perspective_interp) {
|
if (profile.supports_amd_shader_explicit_vertex_parameter) {
|
||||||
bary_coord_persp_id =
|
bary_coord_smooth = DefineVariable(F32[2], spv::BuiltIn::BaryCoordSmoothAMD,
|
||||||
|
spv::StorageClass::Input);
|
||||||
|
} else if (profile.supports_fragment_shader_barycentric) {
|
||||||
|
bary_coord_smooth =
|
||||||
DefineVariable(F32[3], spv::BuiltIn::BaryCoordKHR, spv::StorageClass::Input);
|
DefineVariable(F32[3], spv::BuiltIn::BaryCoordKHR, spv::StorageClass::Input);
|
||||||
|
} else {
|
||||||
|
bary_coord_smooth = ConstF32(0.f, 0.f);
|
||||||
}
|
}
|
||||||
if (info.has_linear_interp) {
|
}
|
||||||
bary_coord_linear_id = DefineVariable(F32[3], spv::BuiltIn::BaryCoordNoPerspKHR,
|
if (info.loads.GetAny(IR::Attribute::BaryCoordSmoothSample)) {
|
||||||
spv::StorageClass::Input);
|
if (profile.supports_amd_shader_explicit_vertex_parameter) {
|
||||||
|
bary_coord_smooth_sample = DefineVariable(
|
||||||
|
F32[2], spv::BuiltIn::BaryCoordSmoothSampleAMD, spv::StorageClass::Input);
|
||||||
|
} else if (profile.supports_fragment_shader_barycentric) {
|
||||||
|
bary_coord_smooth_sample =
|
||||||
|
DefineVariable(F32[3], spv::BuiltIn::BaryCoordKHR, spv::StorageClass::Input);
|
||||||
|
// Decorate(bary_coord_smooth_sample, spv::Decoration::Sample);
|
||||||
|
} else {
|
||||||
|
bary_coord_smooth_sample = ConstF32(0.f, 0.f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (info.loads.GetAny(IR::Attribute::BaryCoordNoPersp)) {
|
||||||
|
if (profile.supports_amd_shader_explicit_vertex_parameter) {
|
||||||
|
bary_coord_nopersp = DefineVariable(F32[2], spv::BuiltIn::BaryCoordNoPerspAMD,
|
||||||
|
spv::StorageClass::Input);
|
||||||
|
} else if (profile.supports_fragment_shader_barycentric) {
|
||||||
|
bary_coord_nopersp = DefineVariable(F32[3], spv::BuiltIn::BaryCoordNoPerspKHR,
|
||||||
|
spv::StorageClass::Input);
|
||||||
|
} else {
|
||||||
|
bary_coord_nopersp = ConstF32(0.f, 0.f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) {
|
for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) {
|
||||||
const auto& input = runtime_info.fs_info.inputs[i];
|
const auto& input = runtime_info.fs_info.inputs[i];
|
||||||
if (input.IsDefault()) {
|
if (input.IsDefault()) {
|
||||||
input_params[i] = {
|
|
||||||
.id = MakeDefaultValue(*this, input.default_value),
|
|
||||||
.pointer_type = input_f32,
|
|
||||||
.component_type = F32[1],
|
|
||||||
.num_components = 4,
|
|
||||||
.is_integer = false,
|
|
||||||
.is_loaded = true,
|
|
||||||
};
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const IR::Attribute param{IR::Attribute::Param0 + i};
|
const IR::Attribute param = IR::Attribute::Param0 + i;
|
||||||
const u32 num_components = info.loads.NumComponents(param);
|
const u32 num_components = info.loads.NumComponents(param);
|
||||||
const Id type{F32[num_components]};
|
const auto [primary, auxiliary] = info.fs_interpolation[i];
|
||||||
Id attr_id{};
|
const Id type = F32[num_components];
|
||||||
if (profile.needs_manual_interpolation && !input.is_flat) {
|
const Id attr_id = [&] {
|
||||||
attr_id = DefineInput(TypeArray(type, ConstU32(3U)), input.param_index);
|
if (primary == Qualifier::PerVertex &&
|
||||||
Decorate(attr_id, spv::Decoration::PerVertexKHR);
|
profile.supports_fragment_shader_barycentric) {
|
||||||
Name(attr_id, fmt::format("fs_in_attr{}_p", i));
|
return Name(DefineInput(TypeArray(type, ConstU32(3U)), input.param_index),
|
||||||
} else {
|
fmt::format("fs_in_attr{}_p", i));
|
||||||
attr_id = DefineInput(type, input.param_index);
|
|
||||||
Name(attr_id, fmt::format("fs_in_attr{}", i));
|
|
||||||
|
|
||||||
if (input.is_flat) {
|
|
||||||
Decorate(attr_id, spv::Decoration::Flat);
|
|
||||||
} else if (IsLinear(info.interp_qualifiers[i])) {
|
|
||||||
Decorate(attr_id, spv::Decoration::NoPerspective);
|
|
||||||
}
|
}
|
||||||
|
return Name(DefineInput(type, input.param_index), fmt::format("fs_in_attr{}", i));
|
||||||
|
}();
|
||||||
|
if (primary == Qualifier::PerVertex) {
|
||||||
|
Decorate(attr_id, profile.supports_amd_shader_explicit_vertex_parameter
|
||||||
|
? spv::Decoration::ExplicitInterpAMD
|
||||||
|
: spv::Decoration::PerVertexKHR);
|
||||||
|
} else if (primary != Qualifier::Smooth) {
|
||||||
|
Decorate(attr_id, primary == Qualifier::Flat ? spv::Decoration::Flat
|
||||||
|
: spv::Decoration::NoPerspective);
|
||||||
}
|
}
|
||||||
input_params[i] =
|
if (auxiliary != Qualifier::None) {
|
||||||
GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, false);
|
Decorate(attr_id, auxiliary == Qualifier::Centroid ? spv::Decoration::Centroid
|
||||||
|
: spv::Decoration::Sample);
|
||||||
|
}
|
||||||
|
input_params[i] = GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components,
|
||||||
|
false, false, primary == Qualifier::PerVertex);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case LogicalStage::Compute:
|
case LogicalStage::Compute:
|
||||||
@ -483,17 +460,16 @@ void EmitContext::DefineInputs() {
|
|||||||
case LogicalStage::Geometry: {
|
case LogicalStage::Geometry: {
|
||||||
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
|
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
|
||||||
const auto gl_per_vertex =
|
const auto gl_per_vertex =
|
||||||
Name(TypeStruct(TypeVector(F32[1], 4), F32[1], TypeArray(F32[1], ConstU32(1u))),
|
Name(TypeStruct(F32[4], F32[1], TypeArray(F32[1], ConstU32(1u))), "gl_PerVertex");
|
||||||
"gl_PerVertex");
|
|
||||||
MemberName(gl_per_vertex, 0, "gl_Position");
|
MemberName(gl_per_vertex, 0, "gl_Position");
|
||||||
MemberName(gl_per_vertex, 1, "gl_PointSize");
|
MemberName(gl_per_vertex, 1, "gl_PointSize");
|
||||||
MemberName(gl_per_vertex, 2, "gl_ClipDistance");
|
MemberName(gl_per_vertex, 2, "gl_ClipDistance");
|
||||||
MemberDecorate(gl_per_vertex, 0, spv::Decoration::BuiltIn,
|
MemberDecorate(gl_per_vertex, 0, spv::Decoration::BuiltIn,
|
||||||
static_cast<std::uint32_t>(spv::BuiltIn::Position));
|
static_cast<u32>(spv::BuiltIn::Position));
|
||||||
MemberDecorate(gl_per_vertex, 1, spv::Decoration::BuiltIn,
|
MemberDecorate(gl_per_vertex, 1, spv::Decoration::BuiltIn,
|
||||||
static_cast<std::uint32_t>(spv::BuiltIn::PointSize));
|
static_cast<u32>(spv::BuiltIn::PointSize));
|
||||||
MemberDecorate(gl_per_vertex, 2, spv::Decoration::BuiltIn,
|
MemberDecorate(gl_per_vertex, 2, spv::Decoration::BuiltIn,
|
||||||
static_cast<std::uint32_t>(spv::BuiltIn::ClipDistance));
|
static_cast<u32>(spv::BuiltIn::ClipDistance));
|
||||||
Decorate(gl_per_vertex, spv::Decoration::Block);
|
Decorate(gl_per_vertex, spv::Decoration::Block);
|
||||||
const auto num_verts_in = NumVertices(runtime_info.gs_info.in_primitive);
|
const auto num_verts_in = NumVertices(runtime_info.gs_info.in_primitive);
|
||||||
const auto vertices_in = TypeArray(gl_per_vertex, ConstU32(num_verts_in));
|
const auto vertices_in = TypeArray(gl_per_vertex, ConstU32(num_verts_in));
|
||||||
@ -505,7 +481,8 @@ void EmitContext::DefineInputs() {
|
|||||||
const Id type{TypeArray(F32[4], ConstU32(num_verts_in))};
|
const Id type{TypeArray(F32[4], ConstU32(num_verts_in))};
|
||||||
const Id id{DefineInput(type, param_id)};
|
const Id id{DefineInput(type, param_id)};
|
||||||
Name(id, fmt::format("gs_in_attr{}", param_id));
|
Name(id, fmt::format("gs_in_attr{}", param_id));
|
||||||
input_params[param_id] = {id, input_f32, F32[1], 4};
|
input_params[param_id] =
|
||||||
|
GetAttributeInfo(AmdGpu::NumberFormat::Float, id, 4, false, false, true);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -573,7 +550,7 @@ void EmitContext::DefineOutputs() {
|
|||||||
cull_distances =
|
cull_distances =
|
||||||
DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output);
|
DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output);
|
||||||
}
|
}
|
||||||
if (stage == Shader::Stage::Local && runtime_info.ls_info.links_with_tcs) {
|
if (stage == Stage::Local) {
|
||||||
const u32 num_attrs = Common::AlignUp(runtime_info.ls_info.ls_stride, 16) >> 4;
|
const u32 num_attrs = Common::AlignUp(runtime_info.ls_info.ls_stride, 16) >> 4;
|
||||||
if (num_attrs > 0) {
|
if (num_attrs > 0) {
|
||||||
const Id type{TypeArray(F32[4], ConstU32(num_attrs))};
|
const Id type{TypeArray(F32[4], ConstU32(num_attrs))};
|
||||||
@ -687,7 +664,7 @@ void EmitContext::DefineOutputs() {
|
|||||||
for (u32 attr_id = 0; attr_id < info.gs_copy_data.num_attrs; attr_id++) {
|
for (u32 attr_id = 0; attr_id < info.gs_copy_data.num_attrs; attr_id++) {
|
||||||
const Id id{DefineOutput(F32[4], attr_id)};
|
const Id id{DefineOutput(F32[4], attr_id)};
|
||||||
Name(id, fmt::format("out_attr{}", attr_id));
|
Name(id, fmt::format("out_attr{}", attr_id));
|
||||||
output_params[attr_id] = {id, output_f32, F32[1], 4u};
|
output_params[attr_id] = GetAttributeInfo(AmdGpu::NumberFormat::Float, id, 4, true);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -700,12 +677,10 @@ void EmitContext::DefineOutputs() {
|
|||||||
|
|
||||||
void EmitContext::DefinePushDataBlock() {
|
void EmitContext::DefinePushDataBlock() {
|
||||||
// Create push constants block for instance steps rates
|
// Create push constants block for instance steps rates
|
||||||
const Id struct_type{Name(TypeStruct(U32[1], U32[1], F32[1], F32[1], F32[1], F32[1], U32[4],
|
const Id struct_type{Name(TypeStruct(F32[1], F32[1], F32[1], F32[1], U32[4], U32[4], U32[4],
|
||||||
U32[4], U32[4], U32[4], U32[4], U32[4], U32[2]),
|
U32[4], U32[4], U32[4], U32[2]),
|
||||||
"AuxData")};
|
"AuxData")};
|
||||||
Decorate(struct_type, spv::Decoration::Block);
|
Decorate(struct_type, spv::Decoration::Block);
|
||||||
MemberName(struct_type, PushData::Step0Index, "sr0");
|
|
||||||
MemberName(struct_type, PushData::Step1Index, "sr1");
|
|
||||||
MemberName(struct_type, PushData::XOffsetIndex, "xoffset");
|
MemberName(struct_type, PushData::XOffsetIndex, "xoffset");
|
||||||
MemberName(struct_type, PushData::YOffsetIndex, "yoffset");
|
MemberName(struct_type, PushData::YOffsetIndex, "yoffset");
|
||||||
MemberName(struct_type, PushData::XScaleIndex, "xscale");
|
MemberName(struct_type, PushData::XScaleIndex, "xscale");
|
||||||
@ -717,19 +692,17 @@ void EmitContext::DefinePushDataBlock() {
|
|||||||
MemberName(struct_type, PushData::BufOffsetIndex + 0, "buf_offsets0");
|
MemberName(struct_type, PushData::BufOffsetIndex + 0, "buf_offsets0");
|
||||||
MemberName(struct_type, PushData::BufOffsetIndex + 1, "buf_offsets1");
|
MemberName(struct_type, PushData::BufOffsetIndex + 1, "buf_offsets1");
|
||||||
MemberName(struct_type, PushData::BufOffsetIndex + 2, "buf_offsets2");
|
MemberName(struct_type, PushData::BufOffsetIndex + 2, "buf_offsets2");
|
||||||
MemberDecorate(struct_type, PushData::Step0Index, spv::Decoration::Offset, 0U);
|
MemberDecorate(struct_type, PushData::XOffsetIndex, spv::Decoration::Offset, 0U);
|
||||||
MemberDecorate(struct_type, PushData::Step1Index, spv::Decoration::Offset, 4U);
|
MemberDecorate(struct_type, PushData::YOffsetIndex, spv::Decoration::Offset, 4U);
|
||||||
MemberDecorate(struct_type, PushData::XOffsetIndex, spv::Decoration::Offset, 8U);
|
MemberDecorate(struct_type, PushData::XScaleIndex, spv::Decoration::Offset, 8U);
|
||||||
MemberDecorate(struct_type, PushData::YOffsetIndex, spv::Decoration::Offset, 12U);
|
MemberDecorate(struct_type, PushData::YScaleIndex, spv::Decoration::Offset, 12U);
|
||||||
MemberDecorate(struct_type, PushData::XScaleIndex, spv::Decoration::Offset, 16U);
|
MemberDecorate(struct_type, PushData::UdRegsIndex + 0, spv::Decoration::Offset, 16U);
|
||||||
MemberDecorate(struct_type, PushData::YScaleIndex, spv::Decoration::Offset, 20U);
|
MemberDecorate(struct_type, PushData::UdRegsIndex + 1, spv::Decoration::Offset, 32U);
|
||||||
MemberDecorate(struct_type, PushData::UdRegsIndex + 0, spv::Decoration::Offset, 24U);
|
MemberDecorate(struct_type, PushData::UdRegsIndex + 2, spv::Decoration::Offset, 48U);
|
||||||
MemberDecorate(struct_type, PushData::UdRegsIndex + 1, spv::Decoration::Offset, 40U);
|
MemberDecorate(struct_type, PushData::UdRegsIndex + 3, spv::Decoration::Offset, 64U);
|
||||||
MemberDecorate(struct_type, PushData::UdRegsIndex + 2, spv::Decoration::Offset, 56U);
|
MemberDecorate(struct_type, PushData::BufOffsetIndex + 0, spv::Decoration::Offset, 80U);
|
||||||
MemberDecorate(struct_type, PushData::UdRegsIndex + 3, spv::Decoration::Offset, 72U);
|
MemberDecorate(struct_type, PushData::BufOffsetIndex + 1, spv::Decoration::Offset, 96U);
|
||||||
MemberDecorate(struct_type, PushData::BufOffsetIndex + 0, spv::Decoration::Offset, 88U);
|
MemberDecorate(struct_type, PushData::BufOffsetIndex + 2, spv::Decoration::Offset, 112U);
|
||||||
MemberDecorate(struct_type, PushData::BufOffsetIndex + 1, spv::Decoration::Offset, 104U);
|
|
||||||
MemberDecorate(struct_type, PushData::BufOffsetIndex + 2, spv::Decoration::Offset, 120U);
|
|
||||||
push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant);
|
push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant);
|
||||||
Name(push_data_block, "push_data");
|
Name(push_data_block, "push_data");
|
||||||
interfaces.push_back(push_data_block);
|
interfaces.push_back(push_data_block);
|
||||||
@ -763,19 +736,19 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte
|
|||||||
Decorate(id, spv::Decoration::NonWritable);
|
Decorate(id, spv::Decoration::NonWritable);
|
||||||
}
|
}
|
||||||
switch (buffer_type) {
|
switch (buffer_type) {
|
||||||
case Shader::BufferType::GdsBuffer:
|
case BufferType::GdsBuffer:
|
||||||
Name(id, "gds_buffer");
|
Name(id, "gds_buffer");
|
||||||
break;
|
break;
|
||||||
case Shader::BufferType::Flatbuf:
|
case BufferType::Flatbuf:
|
||||||
Name(id, "srt_flatbuf");
|
Name(id, "srt_flatbuf");
|
||||||
break;
|
break;
|
||||||
case Shader::BufferType::BdaPagetable:
|
case BufferType::BdaPagetable:
|
||||||
Name(id, "bda_pagetable");
|
Name(id, "bda_pagetable");
|
||||||
break;
|
break;
|
||||||
case Shader::BufferType::FaultBuffer:
|
case BufferType::FaultBuffer:
|
||||||
Name(id, "fault_buffer");
|
Name(id, "fault_buffer");
|
||||||
break;
|
break;
|
||||||
case Shader::BufferType::SharedMemory:
|
case BufferType::SharedMemory:
|
||||||
Name(id, "ssbo_shmem");
|
Name(id, "ssbo_shmem");
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
@ -45,7 +45,7 @@ public:
|
|||||||
Id Def(const IR::Value& value);
|
Id Def(const IR::Value& value);
|
||||||
|
|
||||||
void DefineBufferProperties();
|
void DefineBufferProperties();
|
||||||
void DefineInterpolatedAttribs();
|
void DefineAmdPerVertexAttribs();
|
||||||
void DefineWorkgroupIndex();
|
void DefineWorkgroupIndex();
|
||||||
|
|
||||||
[[nodiscard]] Id DefineInput(Id type, std::optional<u32> location = std::nullopt,
|
[[nodiscard]] Id DefineInput(Id type, std::optional<u32> location = std::nullopt,
|
||||||
@ -279,8 +279,9 @@ public:
|
|||||||
Id shared_memory_u32_type{};
|
Id shared_memory_u32_type{};
|
||||||
Id shared_memory_u64_type{};
|
Id shared_memory_u64_type{};
|
||||||
|
|
||||||
Id bary_coord_persp_id{};
|
Id bary_coord_smooth{};
|
||||||
Id bary_coord_linear_id{};
|
Id bary_coord_smooth_sample{};
|
||||||
|
Id bary_coord_nopersp{};
|
||||||
|
|
||||||
struct TextureDefinition {
|
struct TextureDefinition {
|
||||||
const VectorIds* data_types;
|
const VectorIds* data_types;
|
||||||
@ -355,13 +356,16 @@ public:
|
|||||||
Id sampler_pointer_type{};
|
Id sampler_pointer_type{};
|
||||||
|
|
||||||
struct SpirvAttribute {
|
struct SpirvAttribute {
|
||||||
Id id;
|
union {
|
||||||
|
Id id;
|
||||||
|
std::array<Id, 3> id_array;
|
||||||
|
};
|
||||||
Id pointer_type;
|
Id pointer_type;
|
||||||
Id component_type;
|
Id component_type;
|
||||||
u32 num_components;
|
u32 num_components;
|
||||||
bool is_integer{};
|
bool is_integer{};
|
||||||
bool is_loaded{};
|
bool is_loaded{};
|
||||||
s32 buffer_handle{-1};
|
bool is_array{};
|
||||||
};
|
};
|
||||||
Id input_attr_array;
|
Id input_attr_array;
|
||||||
Id output_attr_array;
|
Id output_attr_array;
|
||||||
@ -391,7 +395,7 @@ private:
|
|||||||
void DefineFunctions();
|
void DefineFunctions();
|
||||||
|
|
||||||
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id, u32 num_components,
|
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id, u32 num_components,
|
||||||
bool output);
|
bool output, bool loaded = false, bool array = false);
|
||||||
|
|
||||||
BufferSpv DefineBuffer(bool is_storage, bool is_written, u32 elem_shift, BufferType buffer_type,
|
BufferSpv DefineBuffer(bool is_storage, bool is_written, u32 elem_shift, BufferType buffer_type,
|
||||||
Id data_type);
|
Id data_type);
|
||||||
|
@ -3,7 +3,6 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <ranges>
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
#include "shader_recompiler/info.h"
|
#include "shader_recompiler/info.h"
|
||||||
@ -29,11 +28,6 @@ struct VertexAttribute {
|
|||||||
return static_cast<InstanceIdType>(instance_data);
|
return static_cast<InstanceIdType>(instance_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] bool UsesStepRates() const {
|
|
||||||
const auto step_rate = GetStepRate();
|
|
||||||
return step_rate == OverStepRate0 || step_rate == OverStepRate1;
|
|
||||||
}
|
|
||||||
|
|
||||||
[[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Shader::Info& info) const noexcept {
|
[[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Shader::Info& info) const noexcept {
|
||||||
return info.ReadUdReg<AmdGpu::Buffer>(sgpr_base, dword_offset);
|
return info.ReadUdReg<AmdGpu::Buffer>(sgpr_base, dword_offset);
|
||||||
}
|
}
|
||||||
@ -52,12 +46,6 @@ struct FetchShaderData {
|
|||||||
s8 vertex_offset_sgpr = -1; ///< SGPR of vertex offset from VADDR
|
s8 vertex_offset_sgpr = -1; ///< SGPR of vertex offset from VADDR
|
||||||
s8 instance_offset_sgpr = -1; ///< SGPR of instance offset from VADDR
|
s8 instance_offset_sgpr = -1; ///< SGPR of instance offset from VADDR
|
||||||
|
|
||||||
[[nodiscard]] bool UsesStepRates() const {
|
|
||||||
return std::ranges::find_if(attributes, [](const VertexAttribute& attribute) {
|
|
||||||
return attribute.UsesStepRates();
|
|
||||||
}) != attributes.end();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool operator==(const FetchShaderData& other) const {
|
bool operator==(const FetchShaderData& other) const {
|
||||||
return attributes == other.attributes && vertex_offset_sgpr == other.vertex_offset_sgpr &&
|
return attributes == other.attributes && vertex_offset_sgpr == other.vertex_offset_sgpr &&
|
||||||
instance_offset_sgpr == other.instance_offset_sgpr;
|
instance_offset_sgpr == other.instance_offset_sgpr;
|
||||||
|
@ -3,8 +3,6 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <limits>
|
|
||||||
#include "common/bit_field.h"
|
|
||||||
#include "shader_recompiler/frontend/opcodes.h"
|
#include "shader_recompiler/frontend/opcodes.h"
|
||||||
|
|
||||||
namespace Shader::Gcn {
|
namespace Shader::Gcn {
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include <magic_enum/magic_enum.hpp>
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "shader_recompiler/frontend/translate/translate.h"
|
#include "shader_recompiler/frontend/translate/translate.h"
|
||||||
|
|
||||||
@ -680,9 +681,18 @@ void Translator::S_FF1_I32_B32(const GcnInst& inst) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Translator::S_FF1_I32_B64(const GcnInst& inst) {
|
void Translator::S_FF1_I32_B64(const GcnInst& inst) {
|
||||||
ASSERT(inst.src[0].field == OperandField::ScalarGPR);
|
const auto src = [&] {
|
||||||
const IR::U32 result{
|
switch (inst.src[0].field) {
|
||||||
ir.BallotFindLsb(ir.Ballot(ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code))))};
|
case OperandField::ScalarGPR:
|
||||||
|
return ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code));
|
||||||
|
case OperandField::VccLo:
|
||||||
|
return ir.GetVcc();
|
||||||
|
default:
|
||||||
|
UNREACHABLE_MSG("unhandled operand type {}", magic_enum::enum_name(inst.src[0].field));
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
const IR::U32 result{ir.BallotFindLsb(ir.Ballot(src))};
|
||||||
|
|
||||||
SetDst(inst.dst[0], result);
|
SetDst(inst.dst[0], result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -21,50 +21,39 @@
|
|||||||
|
|
||||||
namespace Shader::Gcn {
|
namespace Shader::Gcn {
|
||||||
|
|
||||||
Translator::Translator(Info& info_, const RuntimeInfo& runtime_info_, const Profile& profile_)
|
static IR::VectorReg IterateBarycentrics(const RuntimeInfo& runtime_info, auto&& set_attribute) {
|
||||||
: info{info_}, runtime_info{runtime_info_}, profile{profile_},
|
if (runtime_info.stage != Stage::Fragment) {
|
||||||
next_vgpr_num{runtime_info.num_allocated_vgprs} {
|
return IR::VectorReg::V0;
|
||||||
if (info.l_stage == LogicalStage::Fragment) {
|
|
||||||
dst_frag_vreg = GatherInterpQualifiers();
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
IR::VectorReg Translator::GatherInterpQualifiers() {
|
|
||||||
u32 dst_vreg{};
|
u32 dst_vreg{};
|
||||||
if (runtime_info.fs_info.addr_flags.persp_sample_ena) {
|
if (runtime_info.fs_info.addr_flags.persp_sample_ena) {
|
||||||
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveSample; // I
|
set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmoothSample, 0); // I
|
||||||
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveSample; // J
|
set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmoothSample, 1); // J
|
||||||
info.has_perspective_interp = true;
|
|
||||||
}
|
}
|
||||||
if (runtime_info.fs_info.addr_flags.persp_center_ena) {
|
if (runtime_info.fs_info.addr_flags.persp_center_ena) {
|
||||||
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCenter; // I
|
set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmooth, 0); // I
|
||||||
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCenter; // J
|
set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmooth, 1); // J
|
||||||
info.has_perspective_interp = true;
|
|
||||||
}
|
}
|
||||||
if (runtime_info.fs_info.addr_flags.persp_centroid_ena) {
|
if (runtime_info.fs_info.addr_flags.persp_centroid_ena) {
|
||||||
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCentroid; // I
|
set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmoothCentroid, 0); // I
|
||||||
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCentroid; // J
|
set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmoothCentroid, 1); // J
|
||||||
info.has_perspective_interp = true;
|
|
||||||
}
|
}
|
||||||
if (runtime_info.fs_info.addr_flags.persp_pull_model_ena) {
|
if (runtime_info.fs_info.addr_flags.persp_pull_model_ena) {
|
||||||
++dst_vreg; // I/W
|
set_attribute(dst_vreg++, IR::Attribute::BaryCoordPullModel, 0); // I/W
|
||||||
++dst_vreg; // J/W
|
set_attribute(dst_vreg++, IR::Attribute::BaryCoordPullModel, 1); // J/W
|
||||||
++dst_vreg; // 1/W
|
set_attribute(dst_vreg++, IR::Attribute::BaryCoordPullModel, 2); // 1/W
|
||||||
}
|
}
|
||||||
if (runtime_info.fs_info.addr_flags.linear_sample_ena) {
|
if (runtime_info.fs_info.addr_flags.linear_sample_ena) {
|
||||||
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearSample; // I
|
set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPerspSample, 0); // I
|
||||||
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearSample; // J
|
set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPerspSample, 1); // J
|
||||||
info.has_linear_interp = true;
|
|
||||||
}
|
}
|
||||||
if (runtime_info.fs_info.addr_flags.linear_center_ena) {
|
if (runtime_info.fs_info.addr_flags.linear_center_ena) {
|
||||||
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCenter; // I
|
set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPersp, 0); // I
|
||||||
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCenter; // J
|
set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPersp, 1); // J
|
||||||
info.has_linear_interp = true;
|
|
||||||
}
|
}
|
||||||
if (runtime_info.fs_info.addr_flags.linear_centroid_ena) {
|
if (runtime_info.fs_info.addr_flags.linear_centroid_ena) {
|
||||||
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCentroid; // I
|
set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPerspCentroid, 0); // I
|
||||||
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCentroid; // J
|
set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPerspCentroid, 1); // J
|
||||||
info.has_linear_interp = true;
|
|
||||||
}
|
}
|
||||||
if (runtime_info.fs_info.addr_flags.line_stipple_tex_ena) {
|
if (runtime_info.fs_info.addr_flags.line_stipple_tex_ena) {
|
||||||
++dst_vreg;
|
++dst_vreg;
|
||||||
@ -72,6 +61,14 @@ IR::VectorReg Translator::GatherInterpQualifiers() {
|
|||||||
return IR::VectorReg(dst_vreg);
|
return IR::VectorReg(dst_vreg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Translator::Translator(Info& info_, const RuntimeInfo& runtime_info_, const Profile& profile_)
|
||||||
|
: info{info_}, runtime_info{runtime_info_}, profile{profile_},
|
||||||
|
next_vgpr_num{runtime_info.num_allocated_vgprs} {
|
||||||
|
IterateBarycentrics(runtime_info, [this](u32 vreg, IR::Attribute attrib, u32) {
|
||||||
|
vgpr_to_interp[vreg] = attrib;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
void Translator::EmitPrologue(IR::Block* first_block) {
|
void Translator::EmitPrologue(IR::Block* first_block) {
|
||||||
ir = IR::IREmitter(*first_block, first_block->begin());
|
ir = IR::IREmitter(*first_block, first_block->begin());
|
||||||
|
|
||||||
@ -90,21 +87,47 @@ void Translator::EmitPrologue(IR::Block* first_block) {
|
|||||||
case LogicalStage::Vertex:
|
case LogicalStage::Vertex:
|
||||||
// v0: vertex ID, always present
|
// v0: vertex ID, always present
|
||||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId));
|
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId));
|
||||||
// v1: instance ID, step rate 0
|
if (info.stage == Stage::Local) {
|
||||||
if (runtime_info.num_input_vgprs > 0) {
|
// v1: rel patch ID
|
||||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId0));
|
if (runtime_info.num_input_vgprs > 0) {
|
||||||
}
|
ir.SetVectorReg(dst_vreg++, ir.Imm32(0));
|
||||||
// v2: instance ID, step rate 1
|
}
|
||||||
if (runtime_info.num_input_vgprs > 1) {
|
// v2: instance ID
|
||||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId1));
|
if (runtime_info.num_input_vgprs > 1) {
|
||||||
}
|
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId));
|
||||||
// v3: instance ID, plain
|
}
|
||||||
if (runtime_info.num_input_vgprs > 2) {
|
} else {
|
||||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId));
|
// v1: instance ID, step rate 0
|
||||||
|
if (runtime_info.num_input_vgprs > 0) {
|
||||||
|
if (runtime_info.vs_info.step_rate_0 != 0) {
|
||||||
|
ir.SetVectorReg(dst_vreg++,
|
||||||
|
ir.IDiv(ir.GetAttributeU32(IR::Attribute::InstanceId),
|
||||||
|
ir.Imm32(runtime_info.vs_info.step_rate_0)));
|
||||||
|
} else {
|
||||||
|
ir.SetVectorReg(dst_vreg++, ir.Imm32(0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// v2: instance ID, step rate 1
|
||||||
|
if (runtime_info.num_input_vgprs > 1) {
|
||||||
|
if (runtime_info.vs_info.step_rate_1 != 0) {
|
||||||
|
ir.SetVectorReg(dst_vreg++,
|
||||||
|
ir.IDiv(ir.GetAttributeU32(IR::Attribute::InstanceId),
|
||||||
|
ir.Imm32(runtime_info.vs_info.step_rate_1)));
|
||||||
|
} else {
|
||||||
|
ir.SetVectorReg(dst_vreg++, ir.Imm32(0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// v3: instance ID, plain
|
||||||
|
if (runtime_info.num_input_vgprs > 2) {
|
||||||
|
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case LogicalStage::Fragment:
|
case LogicalStage::Fragment:
|
||||||
dst_vreg = dst_frag_vreg;
|
dst_vreg =
|
||||||
|
IterateBarycentrics(runtime_info, [this](u32 vreg, IR::Attribute attrib, u32 comp) {
|
||||||
|
ir.SetVectorReg(IR::VectorReg(vreg), ir.GetAttribute(attrib, comp));
|
||||||
|
});
|
||||||
if (runtime_info.fs_info.addr_flags.pos_x_float_ena) {
|
if (runtime_info.fs_info.addr_flags.pos_x_float_ena) {
|
||||||
if (runtime_info.fs_info.en_flags.pos_x_float_ena) {
|
if (runtime_info.fs_info.en_flags.pos_x_float_ena) {
|
||||||
ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 0));
|
ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 0));
|
||||||
@ -128,7 +151,8 @@ void Translator::EmitPrologue(IR::Block* first_block) {
|
|||||||
}
|
}
|
||||||
if (runtime_info.fs_info.addr_flags.pos_w_float_ena) {
|
if (runtime_info.fs_info.addr_flags.pos_w_float_ena) {
|
||||||
if (runtime_info.fs_info.en_flags.pos_w_float_ena) {
|
if (runtime_info.fs_info.en_flags.pos_w_float_ena) {
|
||||||
ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 3));
|
ir.SetVectorReg(dst_vreg++,
|
||||||
|
ir.FPRecip(ir.GetAttribute(IR::Attribute::FragCoord, 3)));
|
||||||
} else {
|
} else {
|
||||||
ir.SetVectorReg(dst_vreg++, ir.Imm32(0.0f));
|
ir.SetVectorReg(dst_vreg++, ir.Imm32(0.0f));
|
||||||
}
|
}
|
||||||
@ -183,10 +207,8 @@ void Translator::EmitPrologue(IR::Block* first_block) {
|
|||||||
switch (runtime_info.gs_info.out_primitive[0]) {
|
switch (runtime_info.gs_info.out_primitive[0]) {
|
||||||
case AmdGpu::GsOutputPrimitiveType::TriangleStrip:
|
case AmdGpu::GsOutputPrimitiveType::TriangleStrip:
|
||||||
ir.SetVectorReg(IR::VectorReg::V3, ir.Imm32(2u)); // vertex 2
|
ir.SetVectorReg(IR::VectorReg::V3, ir.Imm32(2u)); // vertex 2
|
||||||
[[fallthrough]];
|
|
||||||
case AmdGpu::GsOutputPrimitiveType::LineStrip:
|
case AmdGpu::GsOutputPrimitiveType::LineStrip:
|
||||||
ir.SetVectorReg(IR::VectorReg::V1, ir.Imm32(1u)); // vertex 1
|
ir.SetVectorReg(IR::VectorReg::V1, ir.Imm32(1u)); // vertex 1
|
||||||
[[fallthrough]];
|
|
||||||
default:
|
default:
|
||||||
ir.SetVectorReg(IR::VectorReg::V0, ir.Imm32(0u)); // vertex 0
|
ir.SetVectorReg(IR::VectorReg::V0, ir.Imm32(0u)); // vertex 0
|
||||||
break;
|
break;
|
||||||
@ -481,11 +503,11 @@ void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_ra
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Translator::EmitFetch(const GcnInst& inst) {
|
void Translator::EmitFetch(const GcnInst& inst) {
|
||||||
// Read the pointer to the fetch shader assembly.
|
|
||||||
const auto code_sgpr_base = inst.src[0].code;
|
const auto code_sgpr_base = inst.src[0].code;
|
||||||
|
|
||||||
|
// The fetch shader must be inlined to access as regular buffers, so that
|
||||||
|
// bounds checks can be emitted to emulate robust buffer access.
|
||||||
if (!profile.supports_robust_buffer_access) {
|
if (!profile.supports_robust_buffer_access) {
|
||||||
// The fetch shader must be inlined to access as regular buffers, so that
|
|
||||||
// bounds checks can be emitted to emulate robust buffer access.
|
|
||||||
const auto* code = GetFetchShaderCode(info, code_sgpr_base);
|
const auto* code = GetFetchShaderCode(info, code_sgpr_base);
|
||||||
GcnCodeSlice slice(code, code + std::numeric_limits<u32>::max());
|
GcnCodeSlice slice(code, code + std::numeric_limits<u32>::max());
|
||||||
GcnDecodeContext decoder;
|
GcnDecodeContext decoder;
|
||||||
@ -535,16 +557,6 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||||||
for (u32 i = 0; i < 4; i++) {
|
for (u32 i = 0; i < 4; i++) {
|
||||||
ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(swizzled, i)});
|
ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(swizzled, i)});
|
||||||
}
|
}
|
||||||
|
|
||||||
// In case of programmable step rates we need to fallback to instance data pulling in
|
|
||||||
// shader, so VBs should be bound as regular data buffers
|
|
||||||
if (attrib.UsesStepRates()) {
|
|
||||||
info.buffers.push_back({
|
|
||||||
.sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4),
|
|
||||||
.used_types = IR::Type::F32,
|
|
||||||
.instance_attrib = attrib.semantic,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -265,6 +265,7 @@ public:
|
|||||||
|
|
||||||
// Vector interpolation
|
// Vector interpolation
|
||||||
// VINTRP
|
// VINTRP
|
||||||
|
void V_INTERP_P1_F32(const GcnInst& inst);
|
||||||
void V_INTERP_P2_F32(const GcnInst& inst);
|
void V_INTERP_P2_F32(const GcnInst& inst);
|
||||||
void V_INTERP_MOV_F32(const GcnInst& inst);
|
void V_INTERP_MOV_F32(const GcnInst& inst);
|
||||||
|
|
||||||
@ -323,7 +324,6 @@ private:
|
|||||||
void LogMissingOpcode(const GcnInst& inst);
|
void LogMissingOpcode(const GcnInst& inst);
|
||||||
|
|
||||||
IR::VectorReg GetScratchVgpr(u32 offset);
|
IR::VectorReg GetScratchVgpr(u32 offset);
|
||||||
IR::VectorReg GatherInterpQualifiers();
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
IR::IREmitter ir;
|
IR::IREmitter ir;
|
||||||
@ -332,8 +332,7 @@ private:
|
|||||||
const Profile& profile;
|
const Profile& profile;
|
||||||
u32 next_vgpr_num;
|
u32 next_vgpr_num;
|
||||||
std::unordered_map<u32, IR::VectorReg> vgpr_map;
|
std::unordered_map<u32, IR::VectorReg> vgpr_map;
|
||||||
std::array<IR::Interpolation, MaxInterpVgpr> vgpr_to_interp{};
|
std::array<IR::Attribute, MaxInterpVgpr> vgpr_to_interp{};
|
||||||
IR::VectorReg dst_frag_vreg{};
|
|
||||||
bool opcode_missing = false;
|
bool opcode_missing = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -5,11 +5,32 @@
|
|||||||
|
|
||||||
namespace Shader::Gcn {
|
namespace Shader::Gcn {
|
||||||
|
|
||||||
|
using Interpolation = Info::Interpolation;
|
||||||
|
|
||||||
|
static Interpolation GetInterpolation(IR::Attribute attribute) {
|
||||||
|
switch (attribute) {
|
||||||
|
case IR::Attribute::BaryCoordNoPersp:
|
||||||
|
return {Qualifier::NoPerspective, Qualifier::None};
|
||||||
|
case IR::Attribute::BaryCoordNoPerspCentroid:
|
||||||
|
return {Qualifier::NoPerspective, Qualifier::Centroid};
|
||||||
|
case IR::Attribute::BaryCoordNoPerspSample:
|
||||||
|
return {Qualifier::NoPerspective, Qualifier::Sample};
|
||||||
|
case IR::Attribute::BaryCoordSmooth:
|
||||||
|
return {Qualifier::Smooth, Qualifier::None};
|
||||||
|
case IR::Attribute::BaryCoordSmoothCentroid:
|
||||||
|
return {Qualifier::Smooth, Qualifier::Centroid};
|
||||||
|
case IR::Attribute::BaryCoordSmoothSample:
|
||||||
|
return {Qualifier::Smooth, Qualifier::Sample};
|
||||||
|
default:
|
||||||
|
UNREACHABLE_MSG("Unhandled barycentric attribute {}", NameOf(attribute));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Translator::EmitVectorInterpolation(const GcnInst& inst) {
|
void Translator::EmitVectorInterpolation(const GcnInst& inst) {
|
||||||
switch (inst.opcode) {
|
switch (inst.opcode) {
|
||||||
// VINTRP
|
// VINTRP
|
||||||
case Opcode::V_INTERP_P1_F32:
|
case Opcode::V_INTERP_P1_F32:
|
||||||
return;
|
return V_INTERP_P1_F32(inst);
|
||||||
case Opcode::V_INTERP_P2_F32:
|
case Opcode::V_INTERP_P2_F32:
|
||||||
return V_INTERP_P2_F32(inst);
|
return V_INTERP_P2_F32(inst);
|
||||||
case Opcode::V_INTERP_MOV_F32:
|
case Opcode::V_INTERP_MOV_F32:
|
||||||
@ -21,19 +42,57 @@ void Translator::EmitVectorInterpolation(const GcnInst& inst) {
|
|||||||
|
|
||||||
// VINTRP
|
// VINTRP
|
||||||
|
|
||||||
|
void Translator::V_INTERP_P1_F32(const GcnInst& inst) {
|
||||||
|
if (!profile.needs_manual_interpolation) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// VDST = P10 * VSRC + P0
|
||||||
|
const u32 attr_index = inst.control.vintrp.attr;
|
||||||
|
const IR::Attribute attrib = IR::Attribute::Param0 + attr_index;
|
||||||
|
const IR::F32 p0 = ir.GetAttribute(attrib, inst.control.vintrp.chan, 0);
|
||||||
|
const IR::F32 p1 = ir.GetAttribute(attrib, inst.control.vintrp.chan, 1);
|
||||||
|
const IR::F32 i = GetSrc<IR::F32>(inst.src[0]);
|
||||||
|
const IR::F32 result = ir.FPFma(ir.FPSub(p1, p0), i, p0);
|
||||||
|
SetDst(inst.dst[0], result);
|
||||||
|
}
|
||||||
|
|
||||||
void Translator::V_INTERP_P2_F32(const GcnInst& inst) {
|
void Translator::V_INTERP_P2_F32(const GcnInst& inst) {
|
||||||
const u32 attr_index = inst.control.vintrp.attr;
|
const u32 attr_index = inst.control.vintrp.attr;
|
||||||
const auto& attr = runtime_info.fs_info.inputs.at(attr_index);
|
const IR::Attribute attrib = IR::Attribute::Param0 + attr_index;
|
||||||
info.interp_qualifiers[attr_index] = vgpr_to_interp[inst.src[0].code];
|
const auto& attr = runtime_info.fs_info.inputs[attr_index];
|
||||||
const IR::Attribute attrib{IR::Attribute::Param0 + attr_index};
|
auto& interp = info.fs_interpolation[attr_index];
|
||||||
SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan));
|
ASSERT(!attr.IsDefault() && !attr.is_flat);
|
||||||
|
if (!profile.needs_manual_interpolation) {
|
||||||
|
interp = GetInterpolation(vgpr_to_interp[inst.src[0].code]);
|
||||||
|
SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// VDST = P20 * VSRC + VDST
|
||||||
|
const IR::F32 p0 = ir.GetAttribute(attrib, inst.control.vintrp.chan, 0);
|
||||||
|
const IR::F32 p2 = ir.GetAttribute(attrib, inst.control.vintrp.chan, 2);
|
||||||
|
const IR::F32 j = GetSrc<IR::F32>(inst.src[0]);
|
||||||
|
const IR::F32 result = ir.FPFma(ir.FPSub(p2, p0), j, GetSrc<IR::F32>(inst.dst[0]));
|
||||||
|
interp.primary = Qualifier::PerVertex;
|
||||||
|
SetDst(inst.dst[0], result);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_INTERP_MOV_F32(const GcnInst& inst) {
|
void Translator::V_INTERP_MOV_F32(const GcnInst& inst) {
|
||||||
const u32 attr_index = inst.control.vintrp.attr;
|
const u32 attr_index = inst.control.vintrp.attr;
|
||||||
const auto& attr = runtime_info.fs_info.inputs.at(attr_index);
|
const IR::Attribute attrib = IR::Attribute::Param0 + attr_index;
|
||||||
const IR::Attribute attrib{IR::Attribute::Param0 + attr_index};
|
const auto& attr = runtime_info.fs_info.inputs[attr_index];
|
||||||
SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan));
|
auto& interp = info.fs_interpolation[attr_index];
|
||||||
|
ASSERT(attr.is_flat);
|
||||||
|
if (profile.supports_amd_shader_explicit_vertex_parameter ||
|
||||||
|
(profile.supports_fragment_shader_barycentric &&
|
||||||
|
!profile.has_incomplete_fragment_shader_barycentric)) {
|
||||||
|
// VSRC 0=P10, 1=P20, 2=P0
|
||||||
|
interp.primary = Qualifier::PerVertex;
|
||||||
|
SetDst(inst.dst[0],
|
||||||
|
ir.GetAttribute(attrib, inst.control.vintrp.chan, (inst.src[0].code + 1) % 3));
|
||||||
|
} else {
|
||||||
|
interp.primary = Qualifier::Flat;
|
||||||
|
SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Shader::Gcn
|
} // namespace Shader::Gcn
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <span>
|
#include <span>
|
||||||
@ -113,17 +114,13 @@ struct FMaskResource {
|
|||||||
using FMaskResourceList = boost::container::small_vector<FMaskResource, NumFMasks>;
|
using FMaskResourceList = boost::container::small_vector<FMaskResource, NumFMasks>;
|
||||||
|
|
||||||
struct PushData {
|
struct PushData {
|
||||||
static constexpr u32 Step0Index = 0;
|
static constexpr u32 XOffsetIndex = 0;
|
||||||
static constexpr u32 Step1Index = 1;
|
static constexpr u32 YOffsetIndex = 1;
|
||||||
static constexpr u32 XOffsetIndex = 2;
|
static constexpr u32 XScaleIndex = 2;
|
||||||
static constexpr u32 YOffsetIndex = 3;
|
static constexpr u32 YScaleIndex = 3;
|
||||||
static constexpr u32 XScaleIndex = 4;
|
static constexpr u32 UdRegsIndex = 4;
|
||||||
static constexpr u32 YScaleIndex = 5;
|
|
||||||
static constexpr u32 UdRegsIndex = 6;
|
|
||||||
static constexpr u32 BufOffsetIndex = UdRegsIndex + NumUserDataRegs / 4;
|
static constexpr u32 BufOffsetIndex = UdRegsIndex + NumUserDataRegs / 4;
|
||||||
|
|
||||||
u32 step0;
|
|
||||||
u32 step1;
|
|
||||||
float xoffset;
|
float xoffset;
|
||||||
float yoffset;
|
float yoffset;
|
||||||
float xscale;
|
float xscale;
|
||||||
@ -139,6 +136,16 @@ struct PushData {
|
|||||||
static_assert(sizeof(PushData) <= 128,
|
static_assert(sizeof(PushData) <= 128,
|
||||||
"PushData size is greater than minimum size guaranteed by Vulkan spec");
|
"PushData size is greater than minimum size guaranteed by Vulkan spec");
|
||||||
|
|
||||||
|
enum class Qualifier : u8 {
|
||||||
|
None,
|
||||||
|
Smooth,
|
||||||
|
NoPerspective,
|
||||||
|
PerVertex,
|
||||||
|
Flat,
|
||||||
|
Centroid,
|
||||||
|
Sample,
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Contains general information generated by the shader recompiler for an input program.
|
* Contains general information generated by the shader recompiler for an input program.
|
||||||
*/
|
*/
|
||||||
@ -198,7 +205,11 @@ struct Info {
|
|||||||
PersistentSrtInfo srt_info;
|
PersistentSrtInfo srt_info;
|
||||||
std::vector<u32> flattened_ud_buf;
|
std::vector<u32> flattened_ud_buf;
|
||||||
|
|
||||||
std::array<IR::Interpolation, 32> interp_qualifiers{};
|
struct Interpolation {
|
||||||
|
Qualifier primary;
|
||||||
|
Qualifier auxiliary;
|
||||||
|
};
|
||||||
|
std::array<Interpolation, IR::NumParams> fs_interpolation{};
|
||||||
|
|
||||||
IR::ScalarReg tess_consts_ptr_base = IR::ScalarReg::Max;
|
IR::ScalarReg tess_consts_ptr_base = IR::ScalarReg::Max;
|
||||||
s32 tess_consts_dword_offset = -1;
|
s32 tess_consts_dword_offset = -1;
|
||||||
@ -211,10 +222,9 @@ struct Info {
|
|||||||
VAddr pgm_base;
|
VAddr pgm_base;
|
||||||
bool has_storage_images{};
|
bool has_storage_images{};
|
||||||
bool has_discard{};
|
bool has_discard{};
|
||||||
|
bool has_bitwise_xor{};
|
||||||
bool has_image_gather{};
|
bool has_image_gather{};
|
||||||
bool has_image_query{};
|
bool has_image_query{};
|
||||||
bool has_perspective_interp{};
|
|
||||||
bool has_linear_interp{};
|
|
||||||
bool uses_buffer_atomic_float_min_max{};
|
bool uses_buffer_atomic_float_min_max{};
|
||||||
bool uses_image_atomic_float_min_max{};
|
bool uses_image_atomic_float_min_max{};
|
||||||
bool uses_lane_id{};
|
bool uses_lane_id{};
|
||||||
|
@ -100,22 +100,50 @@ std::string NameOf(Attribute attribute) {
|
|||||||
return "Param30";
|
return "Param30";
|
||||||
case Attribute::Param31:
|
case Attribute::Param31:
|
||||||
return "Param31";
|
return "Param31";
|
||||||
|
case Attribute::ClipDistance:
|
||||||
|
return "ClipDistanace";
|
||||||
|
case Attribute::CullDistance:
|
||||||
|
return "CullDistance";
|
||||||
|
case Attribute::RenderTargetId:
|
||||||
|
return "RenderTargetId";
|
||||||
|
case Attribute::ViewportId:
|
||||||
|
return "ViewportId";
|
||||||
case Attribute::VertexId:
|
case Attribute::VertexId:
|
||||||
return "VertexId";
|
return "VertexId";
|
||||||
case Attribute::InstanceId:
|
|
||||||
return "InstanceId";
|
|
||||||
case Attribute::PrimitiveId:
|
case Attribute::PrimitiveId:
|
||||||
return "PrimitiveId";
|
return "PrimitiveId";
|
||||||
case Attribute::FragCoord:
|
case Attribute::InstanceId:
|
||||||
return "FragCoord";
|
return "InstanceId";
|
||||||
case Attribute::IsFrontFace:
|
case Attribute::IsFrontFace:
|
||||||
return "IsFrontFace";
|
return "IsFrontFace";
|
||||||
|
case Attribute::SampleIndex:
|
||||||
|
return "SampleIndex";
|
||||||
|
case Attribute::GlobalInvocationId:
|
||||||
|
return "GlobalInvocationId";
|
||||||
case Attribute::WorkgroupId:
|
case Attribute::WorkgroupId:
|
||||||
return "WorkgroupId";
|
return "WorkgroupId";
|
||||||
|
case Attribute::WorkgroupIndex:
|
||||||
|
return "WorkgroupIndex";
|
||||||
case Attribute::LocalInvocationId:
|
case Attribute::LocalInvocationId:
|
||||||
return "LocalInvocationId";
|
return "LocalInvocationId";
|
||||||
case Attribute::LocalInvocationIndex:
|
case Attribute::LocalInvocationIndex:
|
||||||
return "LocalInvocationIndex";
|
return "LocalInvocationIndex";
|
||||||
|
case Attribute::FragCoord:
|
||||||
|
return "FragCoord";
|
||||||
|
case Attribute::BaryCoordNoPersp:
|
||||||
|
return "BaryCoordNoPersp";
|
||||||
|
case Attribute::BaryCoordNoPerspCentroid:
|
||||||
|
return "BaryCoordNoPerspCentroid";
|
||||||
|
case Attribute::BaryCoordNoPerspSample:
|
||||||
|
return "BaryCoordNoPerspSample";
|
||||||
|
case Attribute::BaryCoordSmooth:
|
||||||
|
return "BaryCoordSmooth";
|
||||||
|
case Attribute::BaryCoordSmoothCentroid:
|
||||||
|
return "BaryCoordSmoothCentroid";
|
||||||
|
case Attribute::BaryCoordSmoothSample:
|
||||||
|
return "BaryCoordSmoothSample";
|
||||||
|
case Attribute::BaryCoordPullModel:
|
||||||
|
return "BaryCoordPullModel";
|
||||||
case Attribute::InvocationId:
|
case Attribute::InvocationId:
|
||||||
return "InvocationId";
|
return "InvocationId";
|
||||||
case Attribute::PatchVertices:
|
case Attribute::PatchVertices:
|
||||||
|
@ -73,26 +73,21 @@ enum class Attribute : u64 {
|
|||||||
LocalInvocationId = 76,
|
LocalInvocationId = 76,
|
||||||
LocalInvocationIndex = 77,
|
LocalInvocationIndex = 77,
|
||||||
FragCoord = 78,
|
FragCoord = 78,
|
||||||
InstanceId0 = 79, // step rate 0
|
BaryCoordNoPersp = 79,
|
||||||
InstanceId1 = 80, // step rate 1
|
BaryCoordNoPerspCentroid = 80,
|
||||||
InvocationId = 81, // TCS id in output patch and instanced geometry shader id
|
BaryCoordNoPerspSample = 81,
|
||||||
PatchVertices = 82,
|
BaryCoordSmooth = 82,
|
||||||
TessellationEvaluationPointU = 83,
|
BaryCoordSmoothCentroid = 83,
|
||||||
TessellationEvaluationPointV = 84,
|
BaryCoordSmoothSample = 84,
|
||||||
PackedHullInvocationInfo = 85, // contains patch id within the VGT and invocation ID
|
BaryCoordPullModel = 85,
|
||||||
|
InvocationId = 86, // TCS id in output patch and instanced geometry shader id
|
||||||
|
PatchVertices = 87,
|
||||||
|
TessellationEvaluationPointU = 88,
|
||||||
|
TessellationEvaluationPointV = 89,
|
||||||
|
PackedHullInvocationInfo = 90, // contains patch id within the VGT and invocation ID
|
||||||
Max,
|
Max,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class Interpolation {
|
|
||||||
Invalid = 0,
|
|
||||||
PerspectiveSample = 1,
|
|
||||||
PerspectiveCenter = 2,
|
|
||||||
PerspectiveCentroid = 3,
|
|
||||||
LinearSample = 4,
|
|
||||||
LinearCenter = 5,
|
|
||||||
LinearCentroid = 6,
|
|
||||||
};
|
|
||||||
|
|
||||||
constexpr size_t NumAttributes = static_cast<size_t>(Attribute::Max);
|
constexpr size_t NumAttributes = static_cast<size_t>(Attribute::Max);
|
||||||
constexpr size_t NumRenderTargets = 8;
|
constexpr size_t NumRenderTargets = 8;
|
||||||
constexpr size_t NumParams = 32;
|
constexpr size_t NumParams = 32;
|
||||||
@ -114,13 +109,9 @@ constexpr bool IsMrt(Attribute attribute) noexcept {
|
|||||||
return attribute >= Attribute::RenderTarget0 && attribute <= Attribute::RenderTarget7;
|
return attribute >= Attribute::RenderTarget0 && attribute <= Attribute::RenderTarget7;
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr bool IsLinear(Interpolation interp) noexcept {
|
constexpr bool IsBarycentricCoord(Attribute attribute) noexcept {
|
||||||
return interp >= Interpolation::LinearSample && interp <= Interpolation::LinearCentroid;
|
return attribute >= Attribute::BaryCoordNoPersp &&
|
||||||
}
|
attribute <= Attribute::BaryCoordSmoothSample;
|
||||||
|
|
||||||
constexpr bool IsPerspective(Interpolation interp) noexcept {
|
|
||||||
return interp >= Interpolation::PerspectiveSample &&
|
|
||||||
interp <= Interpolation::PerspectiveCentroid;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] std::string NameOf(Attribute attribute);
|
[[nodiscard]] std::string NameOf(Attribute attribute);
|
||||||
|
@ -255,8 +255,8 @@ void IREmitter::SetM0(const U32& value) {
|
|||||||
Inst(Opcode::SetM0, value);
|
Inst(Opcode::SetM0, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, IR::Value index) {
|
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, u32 index) {
|
||||||
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp), index);
|
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp), Imm32(index));
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) {
|
U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) {
|
||||||
|
@ -81,8 +81,7 @@ public:
|
|||||||
|
|
||||||
[[nodiscard]] U1 Condition(IR::Condition cond);
|
[[nodiscard]] U1 Condition(IR::Condition cond);
|
||||||
|
|
||||||
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0,
|
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0, u32 index = 0);
|
||||||
IR::Value index = IR::Value(u32(0u)));
|
|
||||||
[[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
|
[[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
|
||||||
void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);
|
void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);
|
||||||
|
|
||||||
|
@ -191,7 +191,7 @@ static void VisitPointer(u32 off_dw, IR::Inst* subtree, PassInfo& pass_info,
|
|||||||
static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {
|
static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {
|
||||||
Xbyak::CodeGenerator& c = g_srt_codegen;
|
Xbyak::CodeGenerator& c = g_srt_codegen;
|
||||||
|
|
||||||
if (info.srt_info.srt_reservations.empty() && pass_info.srt_roots.empty()) {
|
if (pass_info.srt_roots.empty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -205,29 +205,7 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
info.srt_info.walker_func = c.getCurr<PFN_SrtWalker>();
|
info.srt_info.walker_func = c.getCurr<PFN_SrtWalker>();
|
||||||
|
|
||||||
pass_info.dst_off_dw = NumUserDataRegs;
|
pass_info.dst_off_dw = NumUserDataRegs;
|
||||||
|
|
||||||
// Special case for V# step rate buffers in fetch shader
|
|
||||||
for (const auto [sgpr_base, dword_offset, num_dwords] : info.srt_info.srt_reservations) {
|
|
||||||
// get pointer to V#
|
|
||||||
if (sgpr_base != IR::NumScalarRegs) {
|
|
||||||
PushPtr(c, sgpr_base);
|
|
||||||
}
|
|
||||||
u32 src_off = dword_offset << 2;
|
|
||||||
|
|
||||||
for (auto j = 0; j < num_dwords; j++) {
|
|
||||||
c.mov(r11d, ptr[rdi + src_off]);
|
|
||||||
c.mov(ptr[rsi + (pass_info.dst_off_dw << 2)], r11d);
|
|
||||||
|
|
||||||
src_off += 4;
|
|
||||||
++pass_info.dst_off_dw;
|
|
||||||
}
|
|
||||||
if (sgpr_base != IR::NumScalarRegs) {
|
|
||||||
PopPtr(c);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ASSERT(pass_info.dst_off_dw == info.srt_info.flattened_bufsize_dw);
|
ASSERT(pass_info.dst_off_dw == info.srt_info.flattened_bufsize_dw);
|
||||||
|
|
||||||
for (const auto& [sgpr_base, root] : pass_info.srt_roots) {
|
for (const auto& [sgpr_base, root] : pass_info.srt_roots) {
|
||||||
|
@ -455,11 +455,12 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
|
|||||||
// Read image sharp.
|
// Read image sharp.
|
||||||
const auto tsharp = TrackSharp(tsharp_handle, info);
|
const auto tsharp = TrackSharp(tsharp_handle, info);
|
||||||
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||||
const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite;
|
const bool is_atomic = IsImageAtomicInstruction(inst);
|
||||||
|
const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite || is_atomic;
|
||||||
const ImageResource image_res = {
|
const ImageResource image_res = {
|
||||||
.sharp_idx = tsharp,
|
.sharp_idx = tsharp,
|
||||||
.is_depth = bool(inst_info.is_depth),
|
.is_depth = bool(inst_info.is_depth),
|
||||||
.is_atomic = IsImageAtomicInstruction(inst),
|
.is_atomic = is_atomic,
|
||||||
.is_array = bool(inst_info.is_array),
|
.is_array = bool(inst_info.is_array),
|
||||||
.is_written = is_written,
|
.is_written = is_written,
|
||||||
.is_r128 = bool(inst_info.is_r128),
|
.is_r128 = bool(inst_info.is_r128),
|
||||||
|
@ -33,12 +33,9 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
|||||||
bool is_composite = opcode == IR::Opcode::WriteSharedU64;
|
bool is_composite = opcode == IR::Opcode::WriteSharedU64;
|
||||||
u32 num_components = opcode == IR::Opcode::WriteSharedU32 ? 1 : 2;
|
u32 num_components = opcode == IR::Opcode::WriteSharedU32 ? 1 : 2;
|
||||||
|
|
||||||
u32 offset = 0;
|
ASSERT(inst.Arg(0).IsImmediate());
|
||||||
const auto* addr = inst.Arg(0).InstRecursive();
|
|
||||||
if (addr->GetOpcode() == IR::Opcode::IAdd32) {
|
u32 offset = inst.Arg(0).U32();
|
||||||
ASSERT(addr->Arg(1).IsImmediate());
|
|
||||||
offset = addr->Arg(1).U32();
|
|
||||||
}
|
|
||||||
IR::Value data = is_composite ? ir.UnpackUint2x32(IR::U64{inst.Arg(1).Resolve()})
|
IR::Value data = is_composite ? ir.UnpackUint2x32(IR::U64{inst.Arg(1).Resolve()})
|
||||||
: inst.Arg(1).Resolve();
|
: inst.Arg(1).Resolve();
|
||||||
for (s32 i = 0; i < num_components; i++) {
|
for (s32 i = 0; i < num_components; i++) {
|
||||||
@ -116,7 +113,7 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
|||||||
}
|
}
|
||||||
|
|
||||||
const auto shl_inst = inst.Arg(1).TryInstRecursive();
|
const auto shl_inst = inst.Arg(1).TryInstRecursive();
|
||||||
const auto vertex_id = ir.Imm32(shl_inst->Arg(0).Resolve().U32() >> 2);
|
const auto vertex_id = shl_inst->Arg(0).Resolve().U32() >> 2;
|
||||||
const auto offset = inst.Arg(1).TryInstRecursive()->Arg(1);
|
const auto offset = inst.Arg(1).TryInstRecursive()->Arg(1);
|
||||||
const auto bucket = offset.Resolve().U32() / 256u;
|
const auto bucket = offset.Resolve().U32() / 256u;
|
||||||
const auto attrib = bucket < 4 ? IR::Attribute::Position0
|
const auto attrib = bucket < 4 ? IR::Attribute::Position0
|
||||||
|
@ -95,6 +95,9 @@ void Visit(Info& info, const IR::Inst& inst) {
|
|||||||
case IR::Opcode::DiscardCond:
|
case IR::Opcode::DiscardCond:
|
||||||
info.has_discard = true;
|
info.has_discard = true;
|
||||||
break;
|
break;
|
||||||
|
case IR::Opcode::BitwiseXor32:
|
||||||
|
info.has_bitwise_xor = true;
|
||||||
|
break;
|
||||||
case IR::Opcode::ImageGather:
|
case IR::Opcode::ImageGather:
|
||||||
case IR::Opcode::ImageGatherDref:
|
case IR::Opcode::ImageGatherDref:
|
||||||
info.has_image_gather = true;
|
info.has_image_gather = true;
|
||||||
|
@ -20,18 +20,7 @@ struct PersistentSrtInfo {
|
|||||||
};
|
};
|
||||||
|
|
||||||
PFN_SrtWalker walker_func{};
|
PFN_SrtWalker walker_func{};
|
||||||
boost::container::small_vector<SrtSharpReservation, 2> srt_reservations;
|
|
||||||
u32 flattened_bufsize_dw = 16; // NumUserDataRegs
|
u32 flattened_bufsize_dw = 16; // NumUserDataRegs
|
||||||
|
|
||||||
// Special case for fetch shaders because we don't generate IR to read from step rate buffers,
|
|
||||||
// so we won't see usage with GetUserData/ReadConst.
|
|
||||||
// Reserve space in the flattened buffer for a sharp ahead of time
|
|
||||||
u32 ReserveSharp(u32 sgpr_base, u32 dword_offset, u32 num_dwords) {
|
|
||||||
u32 rv = flattened_bufsize_dw;
|
|
||||||
srt_reservations.emplace_back(sgpr_base, dword_offset, num_dwords);
|
|
||||||
flattened_bufsize_dw += num_dwords;
|
|
||||||
return rv;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Shader
|
} // namespace Shader
|
||||||
|
@ -10,16 +10,10 @@ namespace Shader {
|
|||||||
struct Profile {
|
struct Profile {
|
||||||
u32 supported_spirv{0x00010000};
|
u32 supported_spirv{0x00010000};
|
||||||
u32 subgroup_size{};
|
u32 subgroup_size{};
|
||||||
bool unified_descriptor_binding{};
|
|
||||||
bool support_descriptor_aliasing{};
|
|
||||||
bool support_int8{};
|
bool support_int8{};
|
||||||
bool support_int16{};
|
bool support_int16{};
|
||||||
bool support_int64{};
|
bool support_int64{};
|
||||||
bool support_float64{};
|
bool support_float64{};
|
||||||
bool support_vertex_instance_id{};
|
|
||||||
bool support_float_controls{};
|
|
||||||
bool support_separate_denorm_behavior{};
|
|
||||||
bool support_separate_rounding_mode{};
|
|
||||||
bool support_fp32_denorm_preserve{};
|
bool support_fp32_denorm_preserve{};
|
||||||
bool support_fp32_denorm_flush{};
|
bool support_fp32_denorm_flush{};
|
||||||
bool support_fp32_round_to_zero{};
|
bool support_fp32_round_to_zero{};
|
||||||
@ -33,6 +27,9 @@ struct Profile {
|
|||||||
bool supports_buffer_int64_atomics{};
|
bool supports_buffer_int64_atomics{};
|
||||||
bool supports_shared_int64_atomics{};
|
bool supports_shared_int64_atomics{};
|
||||||
bool supports_workgroup_explicit_memory_layout{};
|
bool supports_workgroup_explicit_memory_layout{};
|
||||||
|
bool supports_amd_shader_explicit_vertex_parameter{};
|
||||||
|
bool supports_fragment_shader_barycentric{};
|
||||||
|
bool has_incomplete_fragment_shader_barycentric{};
|
||||||
bool has_broken_spirv_clamp{};
|
bool has_broken_spirv_clamp{};
|
||||||
bool lower_left_origin_mode{};
|
bool lower_left_origin_mode{};
|
||||||
bool needs_manual_interpolation{};
|
bool needs_manual_interpolation{};
|
||||||
|
@ -42,7 +42,6 @@ constexpr u32 MaxStageTypes = static_cast<u32>(LogicalStage::NumLogicalStages);
|
|||||||
|
|
||||||
struct LocalRuntimeInfo {
|
struct LocalRuntimeInfo {
|
||||||
u32 ls_stride;
|
u32 ls_stride;
|
||||||
bool links_with_tcs;
|
|
||||||
|
|
||||||
auto operator<=>(const LocalRuntimeInfo&) const noexcept = default;
|
auto operator<=>(const LocalRuntimeInfo&) const noexcept = default;
|
||||||
};
|
};
|
||||||
@ -85,6 +84,8 @@ struct VertexRuntimeInfo {
|
|||||||
std::array<VsOutputMap, 3> outputs;
|
std::array<VsOutputMap, 3> outputs;
|
||||||
bool emulate_depth_negative_one_to_one{};
|
bool emulate_depth_negative_one_to_one{};
|
||||||
bool clip_disable{};
|
bool clip_disable{};
|
||||||
|
u32 step_rate_0;
|
||||||
|
u32 step_rate_1;
|
||||||
// Domain
|
// Domain
|
||||||
AmdGpu::TessellationType tess_type;
|
AmdGpu::TessellationType tess_type;
|
||||||
AmdGpu::TessellationTopology tess_topology;
|
AmdGpu::TessellationTopology tess_topology;
|
||||||
@ -96,7 +97,8 @@ struct VertexRuntimeInfo {
|
|||||||
clip_disable == other.clip_disable && tess_type == other.tess_type &&
|
clip_disable == other.clip_disable && tess_type == other.tess_type &&
|
||||||
tess_topology == other.tess_topology &&
|
tess_topology == other.tess_topology &&
|
||||||
tess_partitioning == other.tess_partitioning &&
|
tess_partitioning == other.tess_partitioning &&
|
||||||
hs_output_cp_stride == other.hs_output_cp_stride;
|
hs_output_cp_stride == other.hs_output_cp_stride &&
|
||||||
|
step_rate_0 == other.step_rate_0 && step_rate_1 == other.step_rate_1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) {
|
void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) {
|
||||||
|
@ -13,7 +13,7 @@
|
|||||||
namespace Shader {
|
namespace Shader {
|
||||||
|
|
||||||
struct VsAttribSpecialization {
|
struct VsAttribSpecialization {
|
||||||
s32 num_components{};
|
u32 divisor{};
|
||||||
AmdGpu::NumberClass num_class{};
|
AmdGpu::NumberClass num_class{};
|
||||||
AmdGpu::CompMapping dst_select{};
|
AmdGpu::CompMapping dst_select{};
|
||||||
|
|
||||||
@ -74,13 +74,13 @@ struct SamplerSpecialization {
|
|||||||
* after the first compilation of a module.
|
* after the first compilation of a module.
|
||||||
*/
|
*/
|
||||||
struct StageSpecialization {
|
struct StageSpecialization {
|
||||||
static constexpr size_t MaxStageResources = 64;
|
static constexpr size_t MaxStageResources = 128;
|
||||||
|
|
||||||
const Shader::Info* info;
|
const Shader::Info* info;
|
||||||
RuntimeInfo runtime_info;
|
RuntimeInfo runtime_info;
|
||||||
|
std::bitset<MaxStageResources> bitset{};
|
||||||
std::optional<Gcn::FetchShaderData> fetch_shader_data{};
|
std::optional<Gcn::FetchShaderData> fetch_shader_data{};
|
||||||
boost::container::small_vector<VsAttribSpecialization, 32> vs_attribs;
|
boost::container::small_vector<VsAttribSpecialization, 32> vs_attribs;
|
||||||
std::bitset<MaxStageResources> bitset{};
|
|
||||||
boost::container::small_vector<BufferSpecialization, 16> buffers;
|
boost::container::small_vector<BufferSpecialization, 16> buffers;
|
||||||
boost::container::small_vector<ImageSpecialization, 16> images;
|
boost::container::small_vector<ImageSpecialization, 16> images;
|
||||||
boost::container::small_vector<FMaskSpecialization, 8> fmasks;
|
boost::container::small_vector<FMaskSpecialization, 8> fmasks;
|
||||||
@ -94,10 +94,16 @@ struct StageSpecialization {
|
|||||||
if (info_.stage == Stage::Vertex && fetch_shader_data) {
|
if (info_.stage == Stage::Vertex && fetch_shader_data) {
|
||||||
// Specialize shader on VS input number types to follow spec.
|
// Specialize shader on VS input number types to follow spec.
|
||||||
ForEachSharp(vs_attribs, fetch_shader_data->attributes,
|
ForEachSharp(vs_attribs, fetch_shader_data->attributes,
|
||||||
[&profile_](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
[&profile_, this](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
||||||
spec.num_components = desc.UsesStepRates()
|
using InstanceIdType = Shader::Gcn::VertexAttribute::InstanceIdType;
|
||||||
? AmdGpu::NumComponents(sharp.GetDataFmt())
|
if (const auto step_rate = desc.GetStepRate();
|
||||||
: 0;
|
step_rate != InstanceIdType::None) {
|
||||||
|
spec.divisor = step_rate == InstanceIdType::OverStepRate0
|
||||||
|
? runtime_info.vs_info.step_rate_0
|
||||||
|
: (step_rate == InstanceIdType::OverStepRate1
|
||||||
|
? runtime_info.vs_info.step_rate_1
|
||||||
|
: 1);
|
||||||
|
}
|
||||||
spec.num_class = profile_.support_legacy_vertex_attributes
|
spec.num_class = profile_.support_legacy_vertex_attributes
|
||||||
? AmdGpu::NumberClass{}
|
? AmdGpu::NumberClass{}
|
||||||
: AmdGpu::GetNumberClass(sharp.GetNumberFmt());
|
: AmdGpu::GetNumberClass(sharp.GetNumberFmt());
|
||||||
|
@ -304,6 +304,14 @@ struct Liverpool {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct LineControl {
|
||||||
|
u32 width_fixed_point;
|
||||||
|
|
||||||
|
float Width() const {
|
||||||
|
return static_cast<float>(width_fixed_point) / 8.0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
struct ModeControl {
|
struct ModeControl {
|
||||||
s32 msaa_enable : 1;
|
s32 msaa_enable : 1;
|
||||||
s32 vport_scissor_enable : 1;
|
s32 vport_scissor_enable : 1;
|
||||||
@ -513,9 +521,16 @@ struct Liverpool {
|
|||||||
BitField<19, 1, ClipSpace> clip_space;
|
BitField<19, 1, ClipSpace> clip_space;
|
||||||
BitField<21, 1, PrimKillCond> vtx_kill_or;
|
BitField<21, 1, PrimKillCond> vtx_kill_or;
|
||||||
BitField<22, 1, u32> dx_rasterization_kill;
|
BitField<22, 1, u32> dx_rasterization_kill;
|
||||||
BitField<23, 1, u32> dx_linear_attr_clip_enable;
|
BitField<24, 1, u32> dx_linear_attr_clip_enable;
|
||||||
BitField<26, 1, u32> zclip_near_disable;
|
BitField<26, 1, u32> zclip_near_disable;
|
||||||
BitField<26, 1, u32> zclip_far_disable;
|
BitField<27, 1, u32> zclip_far_disable;
|
||||||
|
|
||||||
|
bool ZclipEnable() const {
|
||||||
|
if (zclip_near_disable != zclip_far_disable) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return !zclip_near_disable;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class PolygonMode : u32 {
|
enum class PolygonMode : u32 {
|
||||||
@ -738,12 +753,7 @@ struct Liverpool {
|
|||||||
u32 data_w;
|
u32 data_w;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct BlendConstants {
|
using BlendConstants = std::array<float, 4>;
|
||||||
float red;
|
|
||||||
float green;
|
|
||||||
float blue;
|
|
||||||
float alpha;
|
|
||||||
};
|
|
||||||
|
|
||||||
union BlendControl {
|
union BlendControl {
|
||||||
enum class BlendFactor : u32 {
|
enum class BlendFactor : u32 {
|
||||||
@ -796,11 +806,29 @@ struct Liverpool {
|
|||||||
Err = 4u,
|
Err = 4u,
|
||||||
FmaskDecompress = 5u,
|
FmaskDecompress = 5u,
|
||||||
};
|
};
|
||||||
|
enum class LogicOp : u32 {
|
||||||
|
Clear = 0x00,
|
||||||
|
Nor = 0x11,
|
||||||
|
AndInverted = 0x22,
|
||||||
|
CopyInverted = 0x33,
|
||||||
|
AndReverse = 0x44,
|
||||||
|
Invert = 0x55,
|
||||||
|
Xor = 0x66,
|
||||||
|
Nand = 0x77,
|
||||||
|
And = 0x88,
|
||||||
|
Equiv = 0x99,
|
||||||
|
Noop = 0xAA,
|
||||||
|
OrInverted = 0xBB,
|
||||||
|
Copy = 0xCC,
|
||||||
|
OrReverse = 0xDD,
|
||||||
|
Or = 0xEE,
|
||||||
|
Set = 0xFF,
|
||||||
|
};
|
||||||
|
|
||||||
BitField<0, 1, u32> disable_dual_quad;
|
BitField<0, 1, u32> disable_dual_quad;
|
||||||
BitField<3, 1, u32> degamma_enable;
|
BitField<3, 1, u32> degamma_enable;
|
||||||
BitField<4, 3, OperationMode> mode;
|
BitField<4, 3, OperationMode> mode;
|
||||||
BitField<16, 8, u32> rop3;
|
BitField<16, 8, LogicOp> rop3;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ColorBuffer {
|
struct ColorBuffer {
|
||||||
@ -981,7 +1009,6 @@ struct Liverpool {
|
|||||||
return RemapSwizzle(info.format, mrt_swizzle);
|
return RemapSwizzle(info.format, mrt_swizzle);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
|
||||||
[[nodiscard]] NumberFormat GetFixedNumberFormat() const {
|
[[nodiscard]] NumberFormat GetFixedNumberFormat() const {
|
||||||
// There is a small difference between T# and CB number types, account for it.
|
// There is a small difference between T# and CB number types, account for it.
|
||||||
return info.number_type == NumberFormat::SnormNz ? NumberFormat::Srgb
|
return info.number_type == NumberFormat::SnormNz ? NumberFormat::Srgb
|
||||||
@ -1369,7 +1396,9 @@ struct Liverpool {
|
|||||||
PolygonControl polygon_control;
|
PolygonControl polygon_control;
|
||||||
ViewportControl viewport_control;
|
ViewportControl viewport_control;
|
||||||
VsOutputControl vs_output_control;
|
VsOutputControl vs_output_control;
|
||||||
INSERT_PADDING_WORDS(0xA287 - 0xA207 - 1);
|
INSERT_PADDING_WORDS(0xA287 - 0xA207 - 6);
|
||||||
|
LineControl line_control;
|
||||||
|
INSERT_PADDING_WORDS(4);
|
||||||
HsTessFactorClamp hs_clamp;
|
HsTessFactorClamp hs_clamp;
|
||||||
INSERT_PADDING_WORDS(0xA290 - 0xA287 - 2);
|
INSERT_PADDING_WORDS(0xA290 - 0xA287 - 2);
|
||||||
GsMode vgt_gs_mode;
|
GsMode vgt_gs_mode;
|
||||||
@ -1695,6 +1724,7 @@ static_assert(GFX6_3D_REG_INDEX(color_control) == 0xA202);
|
|||||||
static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204);
|
static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204);
|
||||||
static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206);
|
static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206);
|
||||||
static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
|
static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
|
||||||
|
static_assert(GFX6_3D_REG_INDEX(line_control) == 0xA282);
|
||||||
static_assert(GFX6_3D_REG_INDEX(hs_clamp) == 0xA287);
|
static_assert(GFX6_3D_REG_INDEX(hs_clamp) == 0xA287);
|
||||||
static_assert(GFX6_3D_REG_INDEX(vgt_gs_mode) == 0xA290);
|
static_assert(GFX6_3D_REG_INDEX(vgt_gs_mode) == 0xA290);
|
||||||
static_assert(GFX6_3D_REG_INDEX(mode_control) == 0xA292);
|
static_assert(GFX6_3D_REG_INDEX(mode_control) == 0xA292);
|
||||||
|
@ -198,10 +198,13 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
|
|||||||
}
|
}
|
||||||
|
|
||||||
void BufferCache::BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline) {
|
void BufferCache::BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline) {
|
||||||
|
const auto& regs = liverpool->regs;
|
||||||
Vulkan::VertexInputs<vk::VertexInputAttributeDescription2EXT> attributes;
|
Vulkan::VertexInputs<vk::VertexInputAttributeDescription2EXT> attributes;
|
||||||
Vulkan::VertexInputs<vk::VertexInputBindingDescription2EXT> bindings;
|
Vulkan::VertexInputs<vk::VertexInputBindingDescription2EXT> bindings;
|
||||||
|
Vulkan::VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT> divisors;
|
||||||
Vulkan::VertexInputs<AmdGpu::Buffer> guest_buffers;
|
Vulkan::VertexInputs<AmdGpu::Buffer> guest_buffers;
|
||||||
pipeline.GetVertexInputs(attributes, bindings, guest_buffers);
|
pipeline.GetVertexInputs(attributes, bindings, divisors, guest_buffers,
|
||||||
|
regs.vgt_instance_step_rate_0, regs.vgt_instance_step_rate_1);
|
||||||
|
|
||||||
if (instance.IsVertexInputDynamicState()) {
|
if (instance.IsVertexInputDynamicState()) {
|
||||||
// Update current vertex inputs.
|
// Update current vertex inputs.
|
||||||
|
@ -245,6 +245,46 @@ vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vk::LogicOp LogicOp(Liverpool::ColorControl::LogicOp logic_op) {
|
||||||
|
using LogicOp = Liverpool::ColorControl::LogicOp;
|
||||||
|
switch (logic_op) {
|
||||||
|
case LogicOp::Clear:
|
||||||
|
return vk::LogicOp::eClear;
|
||||||
|
case LogicOp::Nor:
|
||||||
|
return vk::LogicOp::eNor;
|
||||||
|
case LogicOp::AndInverted:
|
||||||
|
return vk::LogicOp::eAndInverted;
|
||||||
|
case LogicOp::CopyInverted:
|
||||||
|
return vk::LogicOp::eCopyInverted;
|
||||||
|
case LogicOp::AndReverse:
|
||||||
|
return vk::LogicOp::eAndReverse;
|
||||||
|
case LogicOp::Invert:
|
||||||
|
return vk::LogicOp::eInvert;
|
||||||
|
case LogicOp::Xor:
|
||||||
|
return vk::LogicOp::eXor;
|
||||||
|
case LogicOp::Nand:
|
||||||
|
return vk::LogicOp::eNand;
|
||||||
|
case LogicOp::And:
|
||||||
|
return vk::LogicOp::eAnd;
|
||||||
|
case LogicOp::Equiv:
|
||||||
|
return vk::LogicOp::eEquivalent;
|
||||||
|
case LogicOp::Noop:
|
||||||
|
return vk::LogicOp::eNoOp;
|
||||||
|
case LogicOp::OrInverted:
|
||||||
|
return vk::LogicOp::eOrInverted;
|
||||||
|
case LogicOp::Copy:
|
||||||
|
return vk::LogicOp::eCopy;
|
||||||
|
case LogicOp::OrReverse:
|
||||||
|
return vk::LogicOp::eOrReverse;
|
||||||
|
case LogicOp::Or:
|
||||||
|
return vk::LogicOp::eOr;
|
||||||
|
case LogicOp::Set:
|
||||||
|
return vk::LogicOp::eSet;
|
||||||
|
default:
|
||||||
|
UNREACHABLE_MSG("Unknown logic op {}", u32(logic_op));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// https://github.com/chaotic-cx/mesa-mirror/blob/0954afff5/src/amd/vulkan/radv_sampler.c#L21
|
// https://github.com/chaotic-cx/mesa-mirror/blob/0954afff5/src/amd/vulkan/radv_sampler.c#L21
|
||||||
vk::SamplerAddressMode ClampMode(AmdGpu::ClampMode mode) {
|
vk::SamplerAddressMode ClampMode(AmdGpu::ClampMode mode) {
|
||||||
switch (mode) {
|
switch (mode) {
|
||||||
@ -767,8 +807,8 @@ vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat
|
|||||||
|
|
||||||
vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color_buffer) {
|
vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color_buffer) {
|
||||||
const auto comp_swizzle = color_buffer.Swizzle();
|
const auto comp_swizzle = color_buffer.Swizzle();
|
||||||
const auto format = color_buffer.GetDataFmt();
|
const auto format = color_buffer.info.format.Value();
|
||||||
const auto number_type = color_buffer.GetNumberFmt();
|
const auto number_type = color_buffer.GetFixedNumberFormat();
|
||||||
|
|
||||||
const auto& c0 = color_buffer.clear_word0;
|
const auto& c0 = color_buffer.clear_word0;
|
||||||
const auto& c1 = color_buffer.clear_word1;
|
const auto& c1 = color_buffer.clear_word1;
|
||||||
|
@ -34,6 +34,8 @@ bool IsDualSourceBlendFactor(Liverpool::BlendControl::BlendFactor factor);
|
|||||||
|
|
||||||
vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func);
|
vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func);
|
||||||
|
|
||||||
|
vk::LogicOp LogicOp(Liverpool::ColorControl::LogicOp logic_op);
|
||||||
|
|
||||||
vk::SamplerAddressMode ClampMode(AmdGpu::ClampMode mode);
|
vk::SamplerAddressMode ClampMode(AmdGpu::ClampMode mode);
|
||||||
|
|
||||||
vk::CompareOp DepthCompare(AmdGpu::DepthCompare comp);
|
vk::CompareOp DepthCompare(AmdGpu::DepthCompare comp);
|
||||||
|
@ -72,12 +72,21 @@ GraphicsPipeline::GraphicsPipeline(
|
|||||||
|
|
||||||
VertexInputs<vk::VertexInputAttributeDescription> vertex_attributes;
|
VertexInputs<vk::VertexInputAttributeDescription> vertex_attributes;
|
||||||
VertexInputs<vk::VertexInputBindingDescription> vertex_bindings;
|
VertexInputs<vk::VertexInputBindingDescription> vertex_bindings;
|
||||||
|
VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT> divisors;
|
||||||
VertexInputs<AmdGpu::Buffer> guest_buffers;
|
VertexInputs<AmdGpu::Buffer> guest_buffers;
|
||||||
if (!instance.IsVertexInputDynamicState()) {
|
if (!instance.IsVertexInputDynamicState()) {
|
||||||
GetVertexInputs(vertex_attributes, vertex_bindings, guest_buffers);
|
const auto& vs_info = runtime_infos[u32(Shader::LogicalStage::Vertex)].vs_info;
|
||||||
|
GetVertexInputs(vertex_attributes, vertex_bindings, divisors, guest_buffers,
|
||||||
|
vs_info.step_rate_0, vs_info.step_rate_1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const vk::PipelineVertexInputDivisorStateCreateInfo divisor_state = {
|
||||||
|
.vertexBindingDivisorCount = static_cast<u32>(divisors.size()),
|
||||||
|
.pVertexBindingDivisors = divisors.data(),
|
||||||
|
};
|
||||||
|
|
||||||
const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
|
const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
|
||||||
|
.pNext = divisors.empty() ? nullptr : &divisor_state,
|
||||||
.vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size()),
|
.vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size()),
|
||||||
.pVertexBindingDescriptions = vertex_bindings.data(),
|
.pVertexBindingDescriptions = vertex_bindings.data(),
|
||||||
.vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()),
|
.vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()),
|
||||||
@ -100,17 +109,36 @@ GraphicsPipeline::GraphicsPipeline(
|
|||||||
.patchControlPoints = is_rect_list ? 3U : (is_quad_list ? 4U : key.patch_control_points),
|
.patchControlPoints = is_rect_list ? 3U : (is_quad_list ? 4U : key.patch_control_points),
|
||||||
};
|
};
|
||||||
|
|
||||||
const vk::PipelineRasterizationStateCreateInfo raster_state = {
|
vk::StructureChain raster_chain = {
|
||||||
.depthClampEnable = false,
|
vk::PipelineRasterizationStateCreateInfo{
|
||||||
.rasterizerDiscardEnable = false,
|
.depthClampEnable = key.depth_clamp_enable ||
|
||||||
.polygonMode = LiverpoolToVK::PolygonMode(key.polygon_mode),
|
(!key.depth_clip_enable && !instance.IsDepthClipEnableSupported()),
|
||||||
.lineWidth = 1.0f,
|
.rasterizerDiscardEnable = false,
|
||||||
|
.polygonMode = LiverpoolToVK::PolygonMode(key.polygon_mode),
|
||||||
|
.lineWidth = 1.0f,
|
||||||
|
},
|
||||||
|
vk::PipelineRasterizationProvokingVertexStateCreateInfoEXT{
|
||||||
|
.provokingVertexMode = key.provoking_vtx_last == Liverpool::ProvokingVtxLast::First
|
||||||
|
? vk::ProvokingVertexModeEXT::eFirstVertex
|
||||||
|
: vk::ProvokingVertexModeEXT::eLastVertex,
|
||||||
|
},
|
||||||
|
vk::PipelineRasterizationDepthClipStateCreateInfoEXT{
|
||||||
|
.depthClipEnable = key.depth_clip_enable,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if (!instance.IsProvokingVertexSupported()) {
|
||||||
|
raster_chain.unlink<vk::PipelineRasterizationProvokingVertexStateCreateInfoEXT>();
|
||||||
|
}
|
||||||
|
if (!instance.IsDepthClipEnableSupported()) {
|
||||||
|
raster_chain.unlink<vk::PipelineRasterizationDepthClipStateCreateInfoEXT>();
|
||||||
|
}
|
||||||
|
|
||||||
const vk::PipelineMultisampleStateCreateInfo multisampling = {
|
const vk::PipelineMultisampleStateCreateInfo multisampling = {
|
||||||
.rasterizationSamples =
|
.rasterizationSamples =
|
||||||
LiverpoolToVK::NumSamples(key.num_samples, instance.GetFramebufferSampleCounts()),
|
LiverpoolToVK::NumSamples(key.num_samples, instance.GetFramebufferSampleCounts()),
|
||||||
.sampleShadingEnable = false,
|
.sampleShadingEnable =
|
||||||
|
fs_info.addr_flags.persp_sample_ena || fs_info.addr_flags.linear_sample_ena,
|
||||||
};
|
};
|
||||||
|
|
||||||
const vk::PipelineViewportDepthClipControlCreateInfoEXT clip_control = {
|
const vk::PipelineViewportDepthClipControlCreateInfoEXT clip_control = {
|
||||||
@ -121,7 +149,7 @@ GraphicsPipeline::GraphicsPipeline(
|
|||||||
.pNext = instance.IsDepthClipControlSupported() ? &clip_control : nullptr,
|
.pNext = instance.IsDepthClipControlSupported() ? &clip_control : nullptr,
|
||||||
};
|
};
|
||||||
|
|
||||||
boost::container::static_vector<vk::DynamicState, 20> dynamic_states = {
|
boost::container::static_vector<vk::DynamicState, 32> dynamic_states = {
|
||||||
vk::DynamicState::eViewportWithCount, vk::DynamicState::eScissorWithCount,
|
vk::DynamicState::eViewportWithCount, vk::DynamicState::eScissorWithCount,
|
||||||
vk::DynamicState::eBlendConstants, vk::DynamicState::eDepthTestEnable,
|
vk::DynamicState::eBlendConstants, vk::DynamicState::eDepthTestEnable,
|
||||||
vk::DynamicState::eDepthWriteEnable, vk::DynamicState::eDepthCompareOp,
|
vk::DynamicState::eDepthWriteEnable, vk::DynamicState::eDepthCompareOp,
|
||||||
@ -129,7 +157,8 @@ GraphicsPipeline::GraphicsPipeline(
|
|||||||
vk::DynamicState::eStencilTestEnable, vk::DynamicState::eStencilReference,
|
vk::DynamicState::eStencilTestEnable, vk::DynamicState::eStencilReference,
|
||||||
vk::DynamicState::eStencilCompareMask, vk::DynamicState::eStencilWriteMask,
|
vk::DynamicState::eStencilCompareMask, vk::DynamicState::eStencilWriteMask,
|
||||||
vk::DynamicState::eStencilOp, vk::DynamicState::eCullMode,
|
vk::DynamicState::eStencilOp, vk::DynamicState::eCullMode,
|
||||||
vk::DynamicState::eFrontFace,
|
vk::DynamicState::eFrontFace, vk::DynamicState::eRasterizerDiscardEnable,
|
||||||
|
vk::DynamicState::eLineWidth,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (instance.IsPrimitiveRestartDisableSupported()) {
|
if (instance.IsPrimitiveRestartDisableSupported()) {
|
||||||
@ -212,11 +241,19 @@ GraphicsPipeline::GraphicsPipeline(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const auto depth_format =
|
||||||
|
instance.GetSupportedFormat(LiverpoolToVK::DepthFormat(key.z_format, key.stencil_format),
|
||||||
|
vk::FormatFeatureFlagBits2::eDepthStencilAttachment);
|
||||||
const vk::PipelineRenderingCreateInfo pipeline_rendering_ci = {
|
const vk::PipelineRenderingCreateInfo pipeline_rendering_ci = {
|
||||||
.colorAttachmentCount = key.num_color_attachments,
|
.colorAttachmentCount = key.num_color_attachments,
|
||||||
.pColorAttachmentFormats = key.color_formats.data(),
|
.pColorAttachmentFormats = key.color_formats.data(),
|
||||||
.depthAttachmentFormat = key.depth_format,
|
.depthAttachmentFormat = key.z_format != Liverpool::DepthBuffer::ZFormat::Invalid
|
||||||
.stencilAttachmentFormat = key.stencil_format,
|
? depth_format
|
||||||
|
: vk::Format::eUndefined,
|
||||||
|
.stencilAttachmentFormat =
|
||||||
|
key.stencil_format != Liverpool::DepthBuffer::StencilFormat::Invalid
|
||||||
|
? depth_format
|
||||||
|
: vk::Format::eUndefined,
|
||||||
};
|
};
|
||||||
|
|
||||||
std::array<vk::PipelineColorBlendAttachmentState, Liverpool::NumColorBuffers> attachments;
|
std::array<vk::PipelineColorBlendAttachmentState, Liverpool::NumColorBuffers> attachments;
|
||||||
@ -271,8 +308,9 @@ GraphicsPipeline::GraphicsPipeline(
|
|||||||
}
|
}
|
||||||
|
|
||||||
const vk::PipelineColorBlendStateCreateInfo color_blending = {
|
const vk::PipelineColorBlendStateCreateInfo color_blending = {
|
||||||
.logicOpEnable = false,
|
.logicOpEnable =
|
||||||
.logicOp = vk::LogicOp::eCopy,
|
instance.IsLogicOpSupported() && key.logic_op != Liverpool::ColorControl::LogicOp::Copy,
|
||||||
|
.logicOp = LiverpoolToVK::LogicOp(key.logic_op),
|
||||||
.attachmentCount = key.num_color_attachments,
|
.attachmentCount = key.num_color_attachments,
|
||||||
.pAttachments = attachments.data(),
|
.pAttachments = attachments.data(),
|
||||||
.blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f},
|
.blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f},
|
||||||
@ -286,7 +324,7 @@ GraphicsPipeline::GraphicsPipeline(
|
|||||||
.pInputAssemblyState = &input_assembly,
|
.pInputAssemblyState = &input_assembly,
|
||||||
.pTessellationState = &tessellation_state,
|
.pTessellationState = &tessellation_state,
|
||||||
.pViewportState = &viewport_info,
|
.pViewportState = &viewport_info,
|
||||||
.pRasterizationState = &raster_state,
|
.pRasterizationState = &raster_chain.get(),
|
||||||
.pMultisampleState = &multisampling,
|
.pMultisampleState = &multisampling,
|
||||||
.pColorBlendState = &color_blending,
|
.pColorBlendState = &color_blending,
|
||||||
.pDynamicState = &dynamic_info,
|
.pDynamicState = &dynamic_info,
|
||||||
@ -304,19 +342,17 @@ GraphicsPipeline::GraphicsPipeline(
|
|||||||
GraphicsPipeline::~GraphicsPipeline() = default;
|
GraphicsPipeline::~GraphicsPipeline() = default;
|
||||||
|
|
||||||
template <typename Attribute, typename Binding>
|
template <typename Attribute, typename Binding>
|
||||||
void GraphicsPipeline::GetVertexInputs(VertexInputs<Attribute>& attributes,
|
void GraphicsPipeline::GetVertexInputs(
|
||||||
VertexInputs<Binding>& bindings,
|
VertexInputs<Attribute>& attributes, VertexInputs<Binding>& bindings,
|
||||||
VertexInputs<AmdGpu::Buffer>& guest_buffers) const {
|
VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT>& divisors,
|
||||||
|
VertexInputs<AmdGpu::Buffer>& guest_buffers, u32 step_rate_0, u32 step_rate_1) const {
|
||||||
|
using InstanceIdType = Shader::Gcn::VertexAttribute::InstanceIdType;
|
||||||
if (!fetch_shader || fetch_shader->attributes.empty()) {
|
if (!fetch_shader || fetch_shader->attributes.empty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const auto& vs_info = GetStage(Shader::LogicalStage::Vertex);
|
const auto& vs_info = GetStage(Shader::LogicalStage::Vertex);
|
||||||
for (const auto& attrib : fetch_shader->attributes) {
|
for (const auto& attrib : fetch_shader->attributes) {
|
||||||
if (attrib.UsesStepRates()) {
|
const auto step_rate = attrib.GetStepRate();
|
||||||
// Skip attribute binding as the data will be pulled by shader.
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
const auto& buffer = attrib.GetSharp(vs_info);
|
const auto& buffer = attrib.GetSharp(vs_info);
|
||||||
attributes.push_back(Attribute{
|
attributes.push_back(Attribute{
|
||||||
.location = attrib.semantic,
|
.location = attrib.semantic,
|
||||||
@ -327,12 +363,19 @@ void GraphicsPipeline::GetVertexInputs(VertexInputs<Attribute>& attributes,
|
|||||||
bindings.push_back(Binding{
|
bindings.push_back(Binding{
|
||||||
.binding = attrib.semantic,
|
.binding = attrib.semantic,
|
||||||
.stride = buffer.GetStride(),
|
.stride = buffer.GetStride(),
|
||||||
.inputRate = attrib.GetStepRate() == Shader::Gcn::VertexAttribute::InstanceIdType::None
|
.inputRate = step_rate == InstanceIdType::None ? vk::VertexInputRate::eVertex
|
||||||
? vk::VertexInputRate::eVertex
|
: vk::VertexInputRate::eInstance,
|
||||||
: vk::VertexInputRate::eInstance,
|
|
||||||
});
|
});
|
||||||
|
const u32 divisor = step_rate == InstanceIdType::OverStepRate0
|
||||||
|
? step_rate_0
|
||||||
|
: (step_rate == InstanceIdType::OverStepRate1 ? step_rate_1 : 1);
|
||||||
if constexpr (std::is_same_v<Binding, vk::VertexInputBindingDescription2EXT>) {
|
if constexpr (std::is_same_v<Binding, vk::VertexInputBindingDescription2EXT>) {
|
||||||
bindings.back().divisor = 1;
|
bindings.back().divisor = divisor;
|
||||||
|
} else if (step_rate != InstanceIdType::None) {
|
||||||
|
divisors.push_back(vk::VertexInputBindingDivisorDescriptionEXT{
|
||||||
|
.binding = attrib.semantic,
|
||||||
|
.divisor = divisor,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
guest_buffers.emplace_back(buffer);
|
guest_buffers.emplace_back(buffer);
|
||||||
}
|
}
|
||||||
@ -342,11 +385,13 @@ void GraphicsPipeline::GetVertexInputs(VertexInputs<Attribute>& attributes,
|
|||||||
template void GraphicsPipeline::GetVertexInputs(
|
template void GraphicsPipeline::GetVertexInputs(
|
||||||
VertexInputs<vk::VertexInputAttributeDescription>& attributes,
|
VertexInputs<vk::VertexInputAttributeDescription>& attributes,
|
||||||
VertexInputs<vk::VertexInputBindingDescription>& bindings,
|
VertexInputs<vk::VertexInputBindingDescription>& bindings,
|
||||||
VertexInputs<AmdGpu::Buffer>& guest_buffers) const;
|
VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT>& divisors,
|
||||||
|
VertexInputs<AmdGpu::Buffer>& guest_buffers, u32 step_rate_0, u32 step_rate_1) const;
|
||||||
template void GraphicsPipeline::GetVertexInputs(
|
template void GraphicsPipeline::GetVertexInputs(
|
||||||
VertexInputs<vk::VertexInputAttributeDescription2EXT>& attributes,
|
VertexInputs<vk::VertexInputAttributeDescription2EXT>& attributes,
|
||||||
VertexInputs<vk::VertexInputBindingDescription2EXT>& bindings,
|
VertexInputs<vk::VertexInputBindingDescription2EXT>& bindings,
|
||||||
VertexInputs<AmdGpu::Buffer>& guest_buffers) const;
|
VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT>& divisors,
|
||||||
|
VertexInputs<AmdGpu::Buffer>& guest_buffers, u32 step_rate_0, u32 step_rate_1) const;
|
||||||
|
|
||||||
void GraphicsPipeline::BuildDescSetLayout() {
|
void GraphicsPipeline::BuildDescSetLayout() {
|
||||||
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
|
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
|
||||||
|
@ -33,22 +33,29 @@ using VertexInputs = boost::container::static_vector<T, MaxVertexBufferCount>;
|
|||||||
|
|
||||||
struct GraphicsPipelineKey {
|
struct GraphicsPipelineKey {
|
||||||
std::array<size_t, MaxShaderStages> stage_hashes;
|
std::array<size_t, MaxShaderStages> stage_hashes;
|
||||||
|
std::array<vk::Format, MaxVertexBufferCount> vertex_buffer_formats;
|
||||||
|
u32 patch_control_points;
|
||||||
u32 num_color_attachments;
|
u32 num_color_attachments;
|
||||||
std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
|
std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
|
||||||
std::array<Shader::PsColorBuffer, Liverpool::NumColorBuffers> color_buffers;
|
std::array<Shader::PsColorBuffer, Liverpool::NumColorBuffers> color_buffers;
|
||||||
vk::Format depth_format;
|
|
||||||
vk::Format stencil_format;
|
|
||||||
|
|
||||||
u32 num_samples;
|
|
||||||
u32 mrt_mask;
|
|
||||||
AmdGpu::PrimitiveType prim_type;
|
|
||||||
Liverpool::PolygonMode polygon_mode;
|
|
||||||
Liverpool::ClipSpace clip_space;
|
|
||||||
Liverpool::ColorBufferMask cb_shader_mask;
|
|
||||||
std::array<Liverpool::BlendControl, Liverpool::NumColorBuffers> blend_controls;
|
std::array<Liverpool::BlendControl, Liverpool::NumColorBuffers> blend_controls;
|
||||||
std::array<vk::ColorComponentFlags, Liverpool::NumColorBuffers> write_masks;
|
std::array<vk::ColorComponentFlags, Liverpool::NumColorBuffers> write_masks;
|
||||||
std::array<vk::Format, MaxVertexBufferCount> vertex_buffer_formats;
|
Liverpool::ColorBufferMask cb_shader_mask;
|
||||||
u32 patch_control_points;
|
Liverpool::ColorControl::LogicOp logic_op;
|
||||||
|
u32 num_samples;
|
||||||
|
u32 mrt_mask;
|
||||||
|
struct {
|
||||||
|
Liverpool::DepthBuffer::ZFormat z_format : 2;
|
||||||
|
Liverpool::DepthBuffer::StencilFormat stencil_format : 1;
|
||||||
|
u32 depth_clamp_enable : 1;
|
||||||
|
};
|
||||||
|
struct {
|
||||||
|
AmdGpu::PrimitiveType prim_type : 5;
|
||||||
|
Liverpool::PolygonMode polygon_mode : 2;
|
||||||
|
Liverpool::ClipSpace clip_space : 1;
|
||||||
|
Liverpool::ProvokingVtxLast provoking_vtx_last : 1;
|
||||||
|
u32 depth_clip_enable : 1;
|
||||||
|
};
|
||||||
|
|
||||||
bool operator==(const GraphicsPipelineKey& key) const noexcept {
|
bool operator==(const GraphicsPipelineKey& key) const noexcept {
|
||||||
return std::memcmp(this, &key, sizeof(key)) == 0;
|
return std::memcmp(this, &key, sizeof(key)) == 0;
|
||||||
@ -81,7 +88,9 @@ public:
|
|||||||
/// Gets the attributes and bindings for vertex inputs.
|
/// Gets the attributes and bindings for vertex inputs.
|
||||||
template <typename Attribute, typename Binding>
|
template <typename Attribute, typename Binding>
|
||||||
void GetVertexInputs(VertexInputs<Attribute>& attributes, VertexInputs<Binding>& bindings,
|
void GetVertexInputs(VertexInputs<Attribute>& attributes, VertexInputs<Binding>& bindings,
|
||||||
VertexInputs<AmdGpu::Buffer>& guest_buffers) const;
|
VertexInputs<vk::VertexInputBindingDivisorDescriptionEXT>& divisors,
|
||||||
|
VertexInputs<AmdGpu::Buffer>& guest_buffers, u32 step_rate_0,
|
||||||
|
u32 step_rate_1) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void BuildDescSetLayout();
|
void BuildDescSetLayout();
|
||||||
|
@ -248,6 +248,7 @@ bool Instance::CreateDevice() {
|
|||||||
// Required
|
// Required
|
||||||
ASSERT(add_extension(VK_KHR_SWAPCHAIN_EXTENSION_NAME));
|
ASSERT(add_extension(VK_KHR_SWAPCHAIN_EXTENSION_NAME));
|
||||||
ASSERT(add_extension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME));
|
ASSERT(add_extension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME));
|
||||||
|
ASSERT(add_extension(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME));
|
||||||
|
|
||||||
// Optional
|
// Optional
|
||||||
depth_range_unrestricted = add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
|
depth_range_unrestricted = add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
|
||||||
@ -269,10 +270,17 @@ bool Instance::CreateDevice() {
|
|||||||
}
|
}
|
||||||
custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
|
custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
|
||||||
depth_clip_control = add_extension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME);
|
depth_clip_control = add_extension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME);
|
||||||
|
depth_clip_enable = add_extension(VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME);
|
||||||
vertex_input_dynamic_state = add_extension(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
|
vertex_input_dynamic_state = add_extension(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
|
||||||
list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME);
|
list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME);
|
||||||
fragment_shader_barycentric = add_extension(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME);
|
amd_shader_explicit_vertex_parameter =
|
||||||
|
add_extension(VK_AMD_SHADER_EXPLICIT_VERTEX_PARAMETER_EXTENSION_NAME);
|
||||||
|
if (!amd_shader_explicit_vertex_parameter) {
|
||||||
|
fragment_shader_barycentric =
|
||||||
|
add_extension(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME);
|
||||||
|
}
|
||||||
legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME);
|
legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME);
|
||||||
|
provoking_vertex = add_extension(VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME);
|
||||||
shader_stencil_export = add_extension(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME);
|
shader_stencil_export = add_extension(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME);
|
||||||
image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME);
|
image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME);
|
||||||
amd_gcn_shader = add_extension(VK_AMD_GCN_SHADER_EXTENSION_NAME);
|
amd_gcn_shader = add_extension(VK_AMD_GCN_SHADER_EXTENSION_NAME);
|
||||||
@ -361,9 +369,11 @@ bool Instance::CreateDevice() {
|
|||||||
.dualSrcBlend = features.dualSrcBlend,
|
.dualSrcBlend = features.dualSrcBlend,
|
||||||
.logicOp = features.logicOp,
|
.logicOp = features.logicOp,
|
||||||
.multiDrawIndirect = features.multiDrawIndirect,
|
.multiDrawIndirect = features.multiDrawIndirect,
|
||||||
|
.depthClamp = features.depthClamp,
|
||||||
.depthBiasClamp = features.depthBiasClamp,
|
.depthBiasClamp = features.depthBiasClamp,
|
||||||
.fillModeNonSolid = features.fillModeNonSolid,
|
.fillModeNonSolid = features.fillModeNonSolid,
|
||||||
.depthBounds = features.depthBounds,
|
.depthBounds = features.depthBounds,
|
||||||
|
.wideLines = features.wideLines,
|
||||||
.multiViewport = features.multiViewport,
|
.multiViewport = features.multiViewport,
|
||||||
.samplerAnisotropy = features.samplerAnisotropy,
|
.samplerAnisotropy = features.samplerAnisotropy,
|
||||||
.vertexPipelineStoresAndAtomics = features.vertexPipelineStoresAndAtomics,
|
.vertexPipelineStoresAndAtomics = features.vertexPipelineStoresAndAtomics,
|
||||||
@ -417,6 +427,9 @@ bool Instance::CreateDevice() {
|
|||||||
vk::PhysicalDeviceDepthClipControlFeaturesEXT{
|
vk::PhysicalDeviceDepthClipControlFeaturesEXT{
|
||||||
.depthClipControl = true,
|
.depthClipControl = true,
|
||||||
},
|
},
|
||||||
|
vk::PhysicalDeviceDepthClipEnableFeaturesEXT{
|
||||||
|
.depthClipEnable = true,
|
||||||
|
},
|
||||||
vk::PhysicalDeviceRobustness2FeaturesEXT{
|
vk::PhysicalDeviceRobustness2FeaturesEXT{
|
||||||
.robustBufferAccess2 = robustness2_features.robustBufferAccess2,
|
.robustBufferAccess2 = robustness2_features.robustBufferAccess2,
|
||||||
.robustImageAccess2 = robustness2_features.robustImageAccess2,
|
.robustImageAccess2 = robustness2_features.robustImageAccess2,
|
||||||
@ -436,6 +449,12 @@ bool Instance::CreateDevice() {
|
|||||||
vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT{
|
vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT{
|
||||||
.legacyVertexAttributes = true,
|
.legacyVertexAttributes = true,
|
||||||
},
|
},
|
||||||
|
vk::PhysicalDeviceProvokingVertexFeaturesEXT{
|
||||||
|
.provokingVertexLast = true,
|
||||||
|
},
|
||||||
|
vk::PhysicalDeviceVertexAttributeDivisorFeatures{
|
||||||
|
.vertexAttributeInstanceRateDivisor = true,
|
||||||
|
},
|
||||||
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT{
|
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT{
|
||||||
.shaderBufferFloat32AtomicMinMax =
|
.shaderBufferFloat32AtomicMinMax =
|
||||||
shader_atomic_float2_features.shaderBufferFloat32AtomicMinMax,
|
shader_atomic_float2_features.shaderBufferFloat32AtomicMinMax,
|
||||||
@ -483,6 +502,9 @@ bool Instance::CreateDevice() {
|
|||||||
if (!depth_clip_control) {
|
if (!depth_clip_control) {
|
||||||
device_chain.unlink<vk::PhysicalDeviceDepthClipControlFeaturesEXT>();
|
device_chain.unlink<vk::PhysicalDeviceDepthClipControlFeaturesEXT>();
|
||||||
}
|
}
|
||||||
|
if (!depth_clip_enable) {
|
||||||
|
device_chain.unlink<vk::PhysicalDeviceDepthClipEnableFeaturesEXT>();
|
||||||
|
}
|
||||||
if (!robustness2) {
|
if (!robustness2) {
|
||||||
device_chain.unlink<vk::PhysicalDeviceRobustness2FeaturesEXT>();
|
device_chain.unlink<vk::PhysicalDeviceRobustness2FeaturesEXT>();
|
||||||
}
|
}
|
||||||
@ -498,6 +520,9 @@ bool Instance::CreateDevice() {
|
|||||||
if (!legacy_vertex_attributes) {
|
if (!legacy_vertex_attributes) {
|
||||||
device_chain.unlink<vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT>();
|
device_chain.unlink<vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT>();
|
||||||
}
|
}
|
||||||
|
if (!provoking_vertex) {
|
||||||
|
device_chain.unlink<vk::PhysicalDeviceProvokingVertexFeaturesEXT>();
|
||||||
|
}
|
||||||
if (!shader_atomic_float2) {
|
if (!shader_atomic_float2) {
|
||||||
device_chain.unlink<vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>();
|
device_chain.unlink<vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>();
|
||||||
}
|
}
|
||||||
|
@ -109,6 +109,11 @@ public:
|
|||||||
return depth_clip_control;
|
return depth_clip_control;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true when VK_EXT_depth_clip_enable is supported
|
||||||
|
bool IsDepthClipEnableSupported() const {
|
||||||
|
return depth_clip_enable;
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns true when VK_EXT_depth_range_unrestricted is supported
|
/// Returns true when VK_EXT_depth_range_unrestricted is supported
|
||||||
bool IsDepthRangeUnrestrictedSupported() const {
|
bool IsDepthRangeUnrestrictedSupported() const {
|
||||||
return depth_range_unrestricted;
|
return depth_range_unrestricted;
|
||||||
@ -140,6 +145,11 @@ public:
|
|||||||
return fragment_shader_barycentric;
|
return fragment_shader_barycentric;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true when VK_AMD_shader_explicit_vertex_parameter is supported.
|
||||||
|
bool IsAmdShaderExplicitVertexParameterSupported() const {
|
||||||
|
return amd_shader_explicit_vertex_parameter;
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns true when VK_EXT_primitive_topology_list_restart is supported.
|
/// Returns true when VK_EXT_primitive_topology_list_restart is supported.
|
||||||
bool IsListRestartSupported() const {
|
bool IsListRestartSupported() const {
|
||||||
return list_restart;
|
return list_restart;
|
||||||
@ -150,6 +160,11 @@ public:
|
|||||||
return legacy_vertex_attributes;
|
return legacy_vertex_attributes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true when VK_EXT_provoking_vertex is supported.
|
||||||
|
bool IsProvokingVertexSupported() const {
|
||||||
|
return provoking_vertex;
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns true when VK_AMD_shader_image_load_store_lod is supported.
|
/// Returns true when VK_AMD_shader_image_load_store_lod is supported.
|
||||||
bool IsImageLoadStoreLodSupported() const {
|
bool IsImageLoadStoreLodSupported() const {
|
||||||
return image_load_store_lod;
|
return image_load_store_lod;
|
||||||
@ -351,6 +366,11 @@ public:
|
|||||||
return driver_id != vk::DriverId::eMoltenvk;
|
return driver_id != vk::DriverId::eMoltenvk;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if logic ops are supported by the device.
|
||||||
|
bool IsLogicOpSupported() const {
|
||||||
|
return features.logicOp;
|
||||||
|
}
|
||||||
|
|
||||||
/// Determines if a format is supported for a set of feature flags.
|
/// Determines if a format is supported for a set of feature flags.
|
||||||
[[nodiscard]] bool IsFormatSupported(vk::Format format, vk::FormatFeatureFlags2 flags) const;
|
[[nodiscard]] bool IsFormatSupported(vk::Format format, vk::FormatFeatureFlags2 flags) const;
|
||||||
|
|
||||||
@ -398,13 +418,16 @@ private:
|
|||||||
u32 queue_family_index{0};
|
u32 queue_family_index{0};
|
||||||
bool custom_border_color{};
|
bool custom_border_color{};
|
||||||
bool fragment_shader_barycentric{};
|
bool fragment_shader_barycentric{};
|
||||||
|
bool amd_shader_explicit_vertex_parameter{};
|
||||||
bool depth_clip_control{};
|
bool depth_clip_control{};
|
||||||
|
bool depth_clip_enable{};
|
||||||
bool depth_range_unrestricted{};
|
bool depth_range_unrestricted{};
|
||||||
bool dynamic_state_3{};
|
bool dynamic_state_3{};
|
||||||
bool vertex_input_dynamic_state{};
|
bool vertex_input_dynamic_state{};
|
||||||
bool robustness2{};
|
bool robustness2{};
|
||||||
bool list_restart{};
|
bool list_restart{};
|
||||||
bool legacy_vertex_attributes{};
|
bool legacy_vertex_attributes{};
|
||||||
|
bool provoking_vertex{};
|
||||||
bool shader_stencil_export{};
|
bool shader_stencil_export{};
|
||||||
bool image_load_store_lod{};
|
bool image_load_store_lod{};
|
||||||
bool amd_gcn_shader{};
|
bool amd_gcn_shader{};
|
||||||
|
@ -94,15 +94,10 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
|
|||||||
switch (stage) {
|
switch (stage) {
|
||||||
case Stage::Local: {
|
case Stage::Local: {
|
||||||
BuildCommon(regs.ls_program);
|
BuildCommon(regs.ls_program);
|
||||||
if (regs.stage_enable.IsStageEnabled(static_cast<u32>(Stage::Hull))) {
|
Shader::TessellationDataConstantBuffer tess_constants;
|
||||||
info.ls_info.links_with_tcs = true;
|
const auto* hull_info = infos[u32(Shader::LogicalStage::TessellationControl)];
|
||||||
Shader::TessellationDataConstantBuffer tess_constants;
|
hull_info->ReadTessConstantBuffer(tess_constants);
|
||||||
const auto* pgm = regs.ProgramForStage(static_cast<u32>(Stage::Hull));
|
info.ls_info.ls_stride = tess_constants.ls_stride;
|
||||||
const auto params = Liverpool::GetParams(*pgm);
|
|
||||||
const auto& hull_info = program_cache.at(params.hash)->info;
|
|
||||||
hull_info.ReadTessConstantBuffer(tess_constants);
|
|
||||||
info.ls_info.ls_stride = tess_constants.ls_stride;
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Stage::Hull: {
|
case Stage::Hull: {
|
||||||
@ -122,6 +117,8 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
|
|||||||
case Stage::Vertex: {
|
case Stage::Vertex: {
|
||||||
BuildCommon(regs.vs_program);
|
BuildCommon(regs.vs_program);
|
||||||
GatherVertexOutputs(info.vs_info, regs.vs_output_control);
|
GatherVertexOutputs(info.vs_info, regs.vs_output_control);
|
||||||
|
info.vs_info.step_rate_0 = regs.vgt_instance_step_rate_0;
|
||||||
|
info.vs_info.step_rate_1 = regs.vgt_instance_step_rate_1;
|
||||||
info.vs_info.emulate_depth_negative_one_to_one =
|
info.vs_info.emulate_depth_negative_one_to_one =
|
||||||
!instance.IsDepthClipControlSupported() &&
|
!instance.IsDepthClipControlSupported() &&
|
||||||
regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW;
|
regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW;
|
||||||
@ -223,6 +220,12 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
|||||||
.supports_shared_int64_atomics = instance_.IsSharedInt64AtomicsSupported(),
|
.supports_shared_int64_atomics = instance_.IsSharedInt64AtomicsSupported(),
|
||||||
.supports_workgroup_explicit_memory_layout =
|
.supports_workgroup_explicit_memory_layout =
|
||||||
instance_.IsWorkgroupMemoryExplicitLayoutSupported(),
|
instance_.IsWorkgroupMemoryExplicitLayoutSupported(),
|
||||||
|
.supports_amd_shader_explicit_vertex_parameter =
|
||||||
|
instance_.IsAmdShaderExplicitVertexParameterSupported(),
|
||||||
|
.supports_fragment_shader_barycentric = instance_.IsFragmentShaderBarycentricSupported(),
|
||||||
|
.has_incomplete_fragment_shader_barycentric =
|
||||||
|
instance_.IsFragmentShaderBarycentricSupported() &&
|
||||||
|
instance.GetDriverID() == vk::DriverId::eMoltenvk,
|
||||||
.needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() &&
|
.needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() &&
|
||||||
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
|
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
|
||||||
.needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary ||
|
.needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary ||
|
||||||
@ -288,24 +291,18 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||||||
auto& regs = liverpool->regs;
|
auto& regs = liverpool->regs;
|
||||||
auto& key = graphics_key;
|
auto& key = graphics_key;
|
||||||
|
|
||||||
const auto depth_format = instance.GetSupportedFormat(
|
key.z_format = regs.depth_buffer.DepthValid() ? regs.depth_buffer.z_info.format.Value()
|
||||||
LiverpoolToVK::DepthFormat(regs.depth_buffer.z_info.format,
|
: Liverpool::DepthBuffer::ZFormat::Invalid;
|
||||||
regs.depth_buffer.stencil_info.format),
|
key.stencil_format = regs.depth_buffer.StencilValid()
|
||||||
vk::FormatFeatureFlagBits2::eDepthStencilAttachment);
|
? regs.depth_buffer.stencil_info.format.Value()
|
||||||
if (regs.depth_buffer.DepthValid()) {
|
: Liverpool::DepthBuffer::StencilFormat::Invalid;
|
||||||
key.depth_format = depth_format;
|
key.depth_clamp_enable = !regs.depth_render_override.disable_viewport_clamp;
|
||||||
} else {
|
key.depth_clip_enable = regs.clipper_control.ZclipEnable();
|
||||||
key.depth_format = vk::Format::eUndefined;
|
key.clip_space = regs.clipper_control.clip_space;
|
||||||
}
|
key.provoking_vtx_last = regs.polygon_control.provoking_vtx_last;
|
||||||
if (regs.depth_buffer.StencilValid()) {
|
|
||||||
key.stencil_format = depth_format;
|
|
||||||
} else {
|
|
||||||
key.stencil_format = vk::Format::eUndefined;
|
|
||||||
}
|
|
||||||
|
|
||||||
key.prim_type = regs.primitive_type;
|
key.prim_type = regs.primitive_type;
|
||||||
key.polygon_mode = regs.polygon_control.PolyMode();
|
key.polygon_mode = regs.polygon_control.PolyMode();
|
||||||
key.clip_space = regs.clipper_control.clip_space;
|
key.logic_op = regs.color_control.rop3;
|
||||||
key.num_samples = regs.NumSamples();
|
key.num_samples = regs.NumSamples();
|
||||||
|
|
||||||
const bool skip_cb_binding =
|
const bool skip_cb_binding =
|
||||||
@ -460,10 +457,6 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||||||
// Stride will still be handled outside the pipeline using dynamic state.
|
// Stride will still be handled outside the pipeline using dynamic state.
|
||||||
u32 vertex_binding = 0;
|
u32 vertex_binding = 0;
|
||||||
for (const auto& attrib : fetch_shader->attributes) {
|
for (const auto& attrib : fetch_shader->attributes) {
|
||||||
if (attrib.UsesStepRates()) {
|
|
||||||
// Skip attribute binding as the data will be pulled by shader.
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const auto& buffer = attrib.GetSharp(*vs_info);
|
const auto& buffer = attrib.GetSharp(*vs_info);
|
||||||
ASSERT(vertex_binding < MaxVertexBufferCount);
|
ASSERT(vertex_binding < MaxVertexBufferCount);
|
||||||
key.vertex_buffer_formats[vertex_binding++] =
|
key.vertex_buffer_formats[vertex_binding++] =
|
||||||
@ -498,7 +491,7 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
} // namespace Vulkan
|
}
|
||||||
|
|
||||||
bool PipelineCache::RefreshComputeKey() {
|
bool PipelineCache::RefreshComputeKey() {
|
||||||
Shader::Backend::Bindings binding{};
|
Shader::Backend::Bindings binding{};
|
||||||
|
@ -20,12 +20,9 @@
|
|||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
static Shader::PushData MakeUserData(const AmdGpu::Liverpool::Regs& regs) {
|
static Shader::PushData MakeUserData(const AmdGpu::Liverpool::Regs& regs) {
|
||||||
Shader::PushData push_data{};
|
|
||||||
push_data.step0 = regs.vgt_instance_step_rate_0;
|
|
||||||
push_data.step1 = regs.vgt_instance_step_rate_1;
|
|
||||||
|
|
||||||
// TODO(roamic): Add support for multiple viewports and geometry shaders when ViewportIndex
|
// TODO(roamic): Add support for multiple viewports and geometry shaders when ViewportIndex
|
||||||
// is encountered and implemented in the recompiler.
|
// is encountered and implemented in the recompiler.
|
||||||
|
Shader::PushData push_data{};
|
||||||
push_data.xoffset = regs.viewport_control.xoffset_enable ? regs.viewports[0].xoffset : 0.f;
|
push_data.xoffset = regs.viewport_control.xoffset_enable ? regs.viewports[0].xoffset : 0.f;
|
||||||
push_data.xscale = regs.viewport_control.xscale_enable ? regs.viewports[0].xscale : 1.f;
|
push_data.xscale = regs.viewport_control.xscale_enable ? regs.viewports[0].xscale : 1.f;
|
||||||
push_data.yoffset = regs.viewport_control.yoffset_enable ? regs.viewports[0].yoffset : 0.f;
|
push_data.yoffset = regs.viewport_control.yoffset_enable ? regs.viewports[0].yoffset : 0.f;
|
||||||
@ -506,9 +503,13 @@ bool Rasterizer::IsComputeMetaClear(const Pipeline* pipeline) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Most of the time when a metadata is updated with a shader it gets cleared. It means
|
||||||
|
// we can skip the whole dispatch and update the tracked state instead. Also, it is not
|
||||||
|
// intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we
|
||||||
|
// will need its full emulation anyways.
|
||||||
const auto& info = pipeline->GetStage(Shader::LogicalStage::Compute);
|
const auto& info = pipeline->GetStage(Shader::LogicalStage::Compute);
|
||||||
|
|
||||||
// Assume if a shader reads and writes metas at the same time, it is a copy shader.
|
// Assume if a shader reads metadata, it is a copy shader.
|
||||||
for (const auto& desc : info.buffers) {
|
for (const auto& desc : info.buffers) {
|
||||||
const VAddr address = desc.GetSharp(info).base_address;
|
const VAddr address = desc.GetSharp(info).base_address;
|
||||||
if (!desc.IsSpecial() && !desc.is_written && texture_cache.IsMeta(address)) {
|
if (!desc.IsSpecial() && !desc.is_written && texture_cache.IsMeta(address)) {
|
||||||
@ -516,10 +517,15 @@ bool Rasterizer::IsComputeMetaClear(const Pipeline* pipeline) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Most of the time when a metadata is updated with a shader it gets cleared. It means
|
// Metadata surfaces are tiled and thus need address calculation to be written properly.
|
||||||
// we can skip the whole dispatch and update the tracked state instead. Also, it is not
|
// If a shader wants to encode HTILE, for example, from a depth image it will have to compute
|
||||||
// intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we
|
// proper tile address from dispatch invocation id. This address calculation contains an xor
|
||||||
// will need its full emulation anyways.
|
// operation so use it as a heuristic for metadata writes that are probably not clears.
|
||||||
|
if (info.has_bitwise_xor) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assume if a shader writes metadata without address calculation, it is a clear shader.
|
||||||
for (const auto& desc : info.buffers) {
|
for (const auto& desc : info.buffers) {
|
||||||
const VAddr address = desc.GetSharp(info).base_address;
|
const VAddr address = desc.GetSharp(info).base_address;
|
||||||
if (!desc.IsSpecial() && desc.is_written && texture_cache.ClearMeta(address)) {
|
if (!desc.IsSpecial() && desc.is_written && texture_cache.ClearMeta(address)) {
|
||||||
@ -1017,9 +1023,10 @@ void Rasterizer::UpdateDynamicState(const GraphicsPipeline& pipeline) const {
|
|||||||
UpdateViewportScissorState();
|
UpdateViewportScissorState();
|
||||||
UpdateDepthStencilState();
|
UpdateDepthStencilState();
|
||||||
UpdatePrimitiveState();
|
UpdatePrimitiveState();
|
||||||
|
UpdateRasterizationState();
|
||||||
|
|
||||||
auto& dynamic_state = scheduler.GetDynamicState();
|
auto& dynamic_state = scheduler.GetDynamicState();
|
||||||
dynamic_state.SetBlendConstants(&liverpool->regs.blend_constants.red);
|
dynamic_state.SetBlendConstants(liverpool->regs.blend_constants);
|
||||||
dynamic_state.SetColorWriteMasks(pipeline.GetWriteMasks());
|
dynamic_state.SetColorWriteMasks(pipeline.GetWriteMasks());
|
||||||
|
|
||||||
// Commit new dynamic state to the command buffer.
|
// Commit new dynamic state to the command buffer.
|
||||||
@ -1089,12 +1096,6 @@ void Rasterizer::UpdateViewportScissorState() const {
|
|||||||
viewport.maxDepth = zoffset + zscale;
|
viewport.maxDepth = zoffset + zscale;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!regs.depth_render_override.disable_viewport_clamp) {
|
|
||||||
// Apply depth clamp.
|
|
||||||
viewport.minDepth = std::max(viewport.minDepth, vp_d.zmin);
|
|
||||||
viewport.maxDepth = std::min(viewport.maxDepth, vp_d.zmax);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!instance.IsDepthRangeUnrestrictedSupported()) {
|
if (!instance.IsDepthRangeUnrestrictedSupported()) {
|
||||||
// Unrestricted depth range not supported by device. Restrict to valid range.
|
// Unrestricted depth range not supported by device. Restrict to valid range.
|
||||||
viewport.minDepth = std::max(viewport.minDepth, 0.f);
|
viewport.minDepth = std::max(viewport.minDepth, 0.f);
|
||||||
@ -1234,10 +1235,17 @@ void Rasterizer::UpdatePrimitiveState() const {
|
|||||||
const auto front_face = LiverpoolToVK::FrontFace(regs.polygon_control.front_face);
|
const auto front_face = LiverpoolToVK::FrontFace(regs.polygon_control.front_face);
|
||||||
|
|
||||||
dynamic_state.SetPrimitiveRestartEnabled(prim_restart);
|
dynamic_state.SetPrimitiveRestartEnabled(prim_restart);
|
||||||
|
dynamic_state.SetRasterizerDiscardEnabled(regs.clipper_control.dx_rasterization_kill);
|
||||||
dynamic_state.SetCullMode(cull_mode);
|
dynamic_state.SetCullMode(cull_mode);
|
||||||
dynamic_state.SetFrontFace(front_face);
|
dynamic_state.SetFrontFace(front_face);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Rasterizer::UpdateRasterizationState() const {
|
||||||
|
const auto& regs = liverpool->regs;
|
||||||
|
auto& dynamic_state = scheduler.GetDynamicState();
|
||||||
|
dynamic_state.SetLineWidth(regs.line_control.Width());
|
||||||
|
}
|
||||||
|
|
||||||
void Rasterizer::ScopeMarkerBegin(const std::string_view& str, bool from_guest) {
|
void Rasterizer::ScopeMarkerBegin(const std::string_view& str, bool from_guest) {
|
||||||
if ((from_guest && !Config::getVkGuestMarkersEnabled()) ||
|
if ((from_guest && !Config::getVkGuestMarkersEnabled()) ||
|
||||||
(!from_guest && !Config::getVkHostMarkersEnabled())) {
|
(!from_guest && !Config::getVkHostMarkersEnabled())) {
|
||||||
|
@ -94,6 +94,7 @@ private:
|
|||||||
void UpdateViewportScissorState() const;
|
void UpdateViewportScissorState() const;
|
||||||
void UpdateDepthStencilState() const;
|
void UpdateDepthStencilState() const;
|
||||||
void UpdatePrimitiveState() const;
|
void UpdatePrimitiveState() const;
|
||||||
|
void UpdateRasterizationState() const;
|
||||||
|
|
||||||
bool FilterDraw();
|
bool FilterDraw();
|
||||||
|
|
||||||
|
@ -308,6 +308,10 @@ void DynamicState::Commit(const Instance& instance, const vk::CommandBuffer& cmd
|
|||||||
cmdbuf.setPrimitiveRestartEnable(primitive_restart_enable);
|
cmdbuf.setPrimitiveRestartEnable(primitive_restart_enable);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (dirty_state.rasterizer_discard_enable) {
|
||||||
|
dirty_state.rasterizer_discard_enable = false;
|
||||||
|
cmdbuf.setRasterizerDiscardEnable(rasterizer_discard_enable);
|
||||||
|
}
|
||||||
if (dirty_state.cull_mode) {
|
if (dirty_state.cull_mode) {
|
||||||
dirty_state.cull_mode = false;
|
dirty_state.cull_mode = false;
|
||||||
cmdbuf.setCullMode(cull_mode);
|
cmdbuf.setCullMode(cull_mode);
|
||||||
@ -318,7 +322,7 @@ void DynamicState::Commit(const Instance& instance, const vk::CommandBuffer& cmd
|
|||||||
}
|
}
|
||||||
if (dirty_state.blend_constants) {
|
if (dirty_state.blend_constants) {
|
||||||
dirty_state.blend_constants = false;
|
dirty_state.blend_constants = false;
|
||||||
cmdbuf.setBlendConstants(blend_constants);
|
cmdbuf.setBlendConstants(blend_constants.data());
|
||||||
}
|
}
|
||||||
if (dirty_state.color_write_masks) {
|
if (dirty_state.color_write_masks) {
|
||||||
dirty_state.color_write_masks = false;
|
dirty_state.color_write_masks = false;
|
||||||
@ -326,6 +330,10 @@ void DynamicState::Commit(const Instance& instance, const vk::CommandBuffer& cmd
|
|||||||
cmdbuf.setColorWriteMaskEXT(0, color_write_masks);
|
cmdbuf.setColorWriteMaskEXT(0, color_write_masks);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (dirty_state.line_width) {
|
||||||
|
dirty_state.line_width = false;
|
||||||
|
cmdbuf.setLineWidth(line_width);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
@ -96,11 +96,13 @@ struct DynamicState {
|
|||||||
bool stencil_back_compare_mask : 1;
|
bool stencil_back_compare_mask : 1;
|
||||||
|
|
||||||
bool primitive_restart_enable : 1;
|
bool primitive_restart_enable : 1;
|
||||||
|
bool rasterizer_discard_enable : 1;
|
||||||
bool cull_mode : 1;
|
bool cull_mode : 1;
|
||||||
bool front_face : 1;
|
bool front_face : 1;
|
||||||
|
|
||||||
bool blend_constants : 1;
|
bool blend_constants : 1;
|
||||||
bool color_write_masks : 1;
|
bool color_write_masks : 1;
|
||||||
|
bool line_width : 1;
|
||||||
} dirty_state{};
|
} dirty_state{};
|
||||||
|
|
||||||
Viewports viewports{};
|
Viewports viewports{};
|
||||||
@ -130,11 +132,13 @@ struct DynamicState {
|
|||||||
u32 stencil_back_compare_mask{};
|
u32 stencil_back_compare_mask{};
|
||||||
|
|
||||||
bool primitive_restart_enable{};
|
bool primitive_restart_enable{};
|
||||||
|
bool rasterizer_discard_enable{};
|
||||||
vk::CullModeFlags cull_mode{};
|
vk::CullModeFlags cull_mode{};
|
||||||
vk::FrontFace front_face{};
|
vk::FrontFace front_face{};
|
||||||
|
|
||||||
float blend_constants[4]{};
|
std::array<float, 4> blend_constants{};
|
||||||
ColorWriteMasks color_write_masks{};
|
ColorWriteMasks color_write_masks{};
|
||||||
|
float line_width{};
|
||||||
|
|
||||||
/// Commits the dynamic state to the provided command buffer.
|
/// Commits the dynamic state to the provided command buffer.
|
||||||
void Commit(const Instance& instance, const vk::CommandBuffer& cmdbuf);
|
void Commit(const Instance& instance, const vk::CommandBuffer& cmdbuf);
|
||||||
@ -283,19 +287,33 @@ struct DynamicState {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetBlendConstants(const float blend_constants_[4]) {
|
void SetBlendConstants(const std::array<float, 4> blend_constants_) {
|
||||||
if (!std::equal(blend_constants, std::end(blend_constants), blend_constants_)) {
|
if (blend_constants != blend_constants_) {
|
||||||
std::memcpy(blend_constants, blend_constants_, sizeof(blend_constants));
|
blend_constants = blend_constants_;
|
||||||
dirty_state.blend_constants = true;
|
dirty_state.blend_constants = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SetRasterizerDiscardEnabled(const bool enabled) {
|
||||||
|
if (rasterizer_discard_enable != enabled) {
|
||||||
|
rasterizer_discard_enable = enabled;
|
||||||
|
dirty_state.rasterizer_discard_enable = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void SetColorWriteMasks(const ColorWriteMasks& color_write_masks_) {
|
void SetColorWriteMasks(const ColorWriteMasks& color_write_masks_) {
|
||||||
if (!std::ranges::equal(color_write_masks, color_write_masks_)) {
|
if (!std::ranges::equal(color_write_masks, color_write_masks_)) {
|
||||||
color_write_masks = color_write_masks_;
|
color_write_masks = color_write_masks_;
|
||||||
dirty_state.color_write_masks = true;
|
dirty_state.color_write_masks = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SetLineWidth(const float width) {
|
||||||
|
if (line_width != width) {
|
||||||
|
line_width = width;
|
||||||
|
dirty_state.line_width = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class Scheduler {
|
class Scheduler {
|
||||||
|
@ -27,10 +27,9 @@ enum ImageFlagBits : u32 {
|
|||||||
CpuDirty = 1 << 1, ///< Contents have been modified from the CPU
|
CpuDirty = 1 << 1, ///< Contents have been modified from the CPU
|
||||||
GpuDirty = 1 << 2, ///< Contents have been modified from the GPU (valid data in buffer cache)
|
GpuDirty = 1 << 2, ///< Contents have been modified from the GPU (valid data in buffer cache)
|
||||||
Dirty = MaybeCpuDirty | CpuDirty | GpuDirty,
|
Dirty = MaybeCpuDirty | CpuDirty | GpuDirty,
|
||||||
GpuModified = 1 << 3, ///< Contents have been modified from the GPU
|
GpuModified = 1 << 3, ///< Contents have been modified from the GPU
|
||||||
Registered = 1 << 6, ///< True when the image is registered
|
Registered = 1 << 6, ///< True when the image is registered
|
||||||
Picked = 1 << 7, ///< Temporary flag to mark the image as picked
|
Picked = 1 << 7, ///< Temporary flag to mark the image as picked
|
||||||
MetaRegistered = 1 << 8, ///< True when metadata for this surface is known and registered
|
|
||||||
};
|
};
|
||||||
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
|
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
|
||||||
|
|
||||||
|
@ -508,20 +508,16 @@ ImageView& TextureCache::FindRenderTarget(BaseDesc& desc) {
|
|||||||
UpdateImage(image_id);
|
UpdateImage(image_id);
|
||||||
|
|
||||||
// Register meta data for this color buffer
|
// Register meta data for this color buffer
|
||||||
if (!(image.flags & ImageFlagBits::MetaRegistered)) {
|
if (desc.info.meta_info.cmask_addr) {
|
||||||
if (desc.info.meta_info.cmask_addr) {
|
surface_metas.emplace(desc.info.meta_info.cmask_addr,
|
||||||
surface_metas.emplace(desc.info.meta_info.cmask_addr,
|
MetaDataInfo{.type = MetaDataInfo::Type::CMask});
|
||||||
MetaDataInfo{.type = MetaDataInfo::Type::CMask});
|
image.info.meta_info.cmask_addr = desc.info.meta_info.cmask_addr;
|
||||||
image.info.meta_info.cmask_addr = desc.info.meta_info.cmask_addr;
|
}
|
||||||
image.flags |= ImageFlagBits::MetaRegistered;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (desc.info.meta_info.fmask_addr) {
|
if (desc.info.meta_info.fmask_addr) {
|
||||||
surface_metas.emplace(desc.info.meta_info.fmask_addr,
|
surface_metas.emplace(desc.info.meta_info.fmask_addr,
|
||||||
MetaDataInfo{.type = MetaDataInfo::Type::FMask});
|
MetaDataInfo{.type = MetaDataInfo::Type::FMask});
|
||||||
image.info.meta_info.fmask_addr = desc.info.meta_info.fmask_addr;
|
image.info.meta_info.fmask_addr = desc.info.meta_info.fmask_addr;
|
||||||
image.flags |= ImageFlagBits::MetaRegistered;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return RegisterImageView(image_id, desc.view_info);
|
return RegisterImageView(image_id, desc.view_info);
|
||||||
@ -536,15 +532,11 @@ ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) {
|
|||||||
UpdateImage(image_id);
|
UpdateImage(image_id);
|
||||||
|
|
||||||
// Register meta data for this depth buffer
|
// Register meta data for this depth buffer
|
||||||
if (!(image.flags & ImageFlagBits::MetaRegistered)) {
|
if (desc.info.meta_info.htile_addr) {
|
||||||
if (desc.info.meta_info.htile_addr) {
|
surface_metas.emplace(desc.info.meta_info.htile_addr,
|
||||||
surface_metas.emplace(
|
MetaDataInfo{.type = MetaDataInfo::Type::HTile,
|
||||||
desc.info.meta_info.htile_addr,
|
.clear_mask = image.info.meta_info.htile_clear_mask});
|
||||||
MetaDataInfo{.type = MetaDataInfo::Type::HTile,
|
image.info.meta_info.htile_addr = desc.info.meta_info.htile_addr;
|
||||||
.clear_mask = image.info.meta_info.htile_clear_mask});
|
|
||||||
image.info.meta_info.htile_addr = desc.info.meta_info.htile_addr;
|
|
||||||
image.flags |= ImageFlagBits::MetaRegistered;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// If there is a stencil attachment, link depth and stencil.
|
// If there is a stencil attachment, link depth and stencil.
|
||||||
|
@ -161,10 +161,12 @@ public:
|
|||||||
/// Registers an image view for provided image
|
/// Registers an image view for provided image
|
||||||
ImageView& RegisterImageView(ImageId image_id, const ImageViewInfo& view_info);
|
ImageView& RegisterImageView(ImageId image_id, const ImageViewInfo& view_info);
|
||||||
|
|
||||||
|
/// Returns true if the specified address is a metadata surface.
|
||||||
bool IsMeta(VAddr address) const {
|
bool IsMeta(VAddr address) const {
|
||||||
return surface_metas.contains(address);
|
return surface_metas.contains(address);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if a slice of the specified metadata surface has been cleared.
|
||||||
bool IsMetaCleared(VAddr address, u32 slice) const {
|
bool IsMetaCleared(VAddr address, u32 slice) const {
|
||||||
const auto& it = surface_metas.find(address);
|
const auto& it = surface_metas.find(address);
|
||||||
if (it != surface_metas.end()) {
|
if (it != surface_metas.end()) {
|
||||||
@ -173,6 +175,7 @@ public:
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Clears all slices of the specified metadata surface.
|
||||||
bool ClearMeta(VAddr address) {
|
bool ClearMeta(VAddr address) {
|
||||||
auto it = surface_metas.find(address);
|
auto it = surface_metas.find(address);
|
||||||
if (it != surface_metas.end()) {
|
if (it != surface_metas.end()) {
|
||||||
@ -182,6 +185,7 @@ public:
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Updates the state of a slice of the specified metadata surface.
|
||||||
bool TouchMeta(VAddr address, u32 slice, bool is_clear) {
|
bool TouchMeta(VAddr address, u32 slice, bool is_clear) {
|
||||||
auto it = surface_metas.find(address);
|
auto it = surface_metas.find(address);
|
||||||
if (it != surface_metas.end()) {
|
if (it != surface_metas.end()) {
|
||||||
|
Loading…
Reference in New Issue
Block a user