Merge branch 'main' into ime-fixes

This commit is contained in:
Valdis Bogdāns 2025-07-16 10:52:16 +03:00 committed by GitHub
commit 2904b66c43
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
32 changed files with 390 additions and 373 deletions

2
externals/sirit vendored

@ -1 +1 @@
Subproject commit b4eccb336f1b1169af48dac1e04015985af86e3e Subproject commit 282083a595dcca86814dedab2f2b0363ef38f1ec

View File

@ -526,7 +526,14 @@ s32 PS4_SYSV_ABI sceAudio3dStrError() {
} }
s32 PS4_SYSV_ABI sceAudio3dTerminate() { s32 PS4_SYSV_ABI sceAudio3dTerminate() {
LOG_ERROR(Lib_Audio3d, "(STUBBED) called"); LOG_INFO(Lib_Audio3d, "called");
if (!state) {
return ORBIS_AUDIO3D_ERROR_NOT_READY;
}
AudioOut::sceAudioOutOutput(state->audio_out_handle, nullptr);
AudioOut::sceAudioOutClose(state->audio_out_handle);
state.release();
return ORBIS_OK; return ORBIS_OK;
} }

View File

@ -537,6 +537,7 @@ u64 MemoryManager::UnmapBytesFromEntry(VAddr virtual_addr, VirtualMemoryArea vma
vma_base_size - start_in_vma < size ? vma_base_size - start_in_vma : size; vma_base_size - start_in_vma < size ? vma_base_size - start_in_vma : size;
const bool has_backing = type == VMAType::Direct || type == VMAType::File; const bool has_backing = type == VMAType::Direct || type == VMAType::File;
const auto prot = vma_base.prot; const auto prot = vma_base.prot;
const bool readonly_file = prot == MemoryProt::CpuRead && type == VMAType::File;
if (type == VMAType::Free) { if (type == VMAType::Free) {
return adjusted_size; return adjusted_size;
@ -554,9 +555,8 @@ u64 MemoryManager::UnmapBytesFromEntry(VAddr virtual_addr, VirtualMemoryArea vma
vma.phys_base = 0; vma.phys_base = 0;
vma.disallow_merge = false; vma.disallow_merge = false;
vma.name = ""; vma.name = "";
const auto post_merge_it = MergeAdjacent(vma_map, new_it); MergeAdjacent(vma_map, new_it);
auto& post_merge_vma = post_merge_it->second;
bool readonly_file = post_merge_vma.prot == MemoryProt::CpuRead && type == VMAType::File;
if (type != VMAType::Reserved && type != VMAType::PoolReserved) { if (type != VMAType::Reserved && type != VMAType::PoolReserved) {
// If this mapping has GPU access, unmap from GPU. // If this mapping has GPU access, unmap from GPU.
if (IsValidGpuMapping(virtual_addr, size)) { if (IsValidGpuMapping(virtual_addr, size)) {

View File

@ -437,7 +437,7 @@ void SettingsDialog::LoadValuesFromConfig() {
toml::find_or<int>(data, "Settings", "consoleLanguage", 6))) % toml::find_or<int>(data, "Settings", "consoleLanguage", 6))) %
languageIndexes.size()); languageIndexes.size());
ui->emulatorLanguageComboBox->setCurrentIndex( ui->emulatorLanguageComboBox->setCurrentIndex(
languages[toml::find_or<std::string>(data, "GUI", "emulatorLanguage", "en_US")]); languages[m_gui_settings->GetValue(gui::gen_guiLanguage).toString().toStdString()]);
ui->hideCursorComboBox->setCurrentIndex(toml::find_or<int>(data, "Input", "cursorState", 1)); ui->hideCursorComboBox->setCurrentIndex(toml::find_or<int>(data, "Input", "cursorState", 1));
OnCursorStateChanged(toml::find_or<int>(data, "Input", "cursorState", 1)); OnCursorStateChanged(toml::find_or<int>(data, "Input", "cursorState", 1));
ui->idleTimeoutSpinBox->setValue(toml::find_or<int>(data, "Input", "cursorHideTimeout", 5)); ui->idleTimeoutSpinBox->setValue(toml::find_or<int>(data, "Input", "cursorHideTimeout", 5));

View File

@ -293,9 +293,17 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
if (stage == LogicalStage::Geometry) { if (stage == LogicalStage::Geometry) {
ctx.AddCapability(spv::Capability::Geometry); ctx.AddCapability(spv::Capability::Geometry);
} }
if (info.stage == Stage::Fragment && profile.needs_manual_interpolation) { if (info.stage == Stage::Fragment) {
ctx.AddExtension("SPV_KHR_fragment_shader_barycentric"); if (profile.supports_amd_shader_explicit_vertex_parameter) {
ctx.AddCapability(spv::Capability::FragmentBarycentricKHR); ctx.AddExtension("SPV_AMD_shader_explicit_vertex_parameter");
} else if (profile.supports_fragment_shader_barycentric) {
ctx.AddExtension("SPV_KHR_fragment_shader_barycentric");
ctx.AddCapability(spv::Capability::FragmentBarycentricKHR);
}
if (info.loads.GetAny(IR::Attribute::BaryCoordSmoothSample) ||
info.loads.GetAny(IR::Attribute::BaryCoordNoPerspSample)) {
ctx.AddCapability(spv::Capability::SampleRateShading);
}
} }
if (stage == LogicalStage::TessellationControl || stage == LogicalStage::TessellationEval) { if (stage == LogicalStage::TessellationControl || stage == LogicalStage::TessellationEval) {
ctx.AddCapability(spv::Capability::Tessellation); ctx.AddCapability(spv::Capability::Tessellation);

View File

@ -45,7 +45,7 @@ Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) {
return ctx.OpAccessChain(ctx.output_f32, ctx.cull_distances, cull_num); return ctx.OpAccessChain(ctx.output_f32, ctx.cull_distances, cull_num);
} }
default: default:
UNREACHABLE(); UNREACHABLE_MSG("Vertex output {}", u32(output));
} }
} }
@ -88,7 +88,7 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
case IR::Attribute::Depth: case IR::Attribute::Depth:
return ctx.frag_depth; return ctx.frag_depth;
default: default:
throw NotImplementedException("Write attribute {}", attr); UNREACHABLE_MSG("Write attribute {}", attr);
} }
} }
@ -111,7 +111,7 @@ std::pair<Id, bool> OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr
case IR::Attribute::Depth: case IR::Attribute::Depth:
return {ctx.F32[1], false}; return {ctx.F32[1], false};
default: default:
throw NotImplementedException("Write attribute {}", attr); UNREACHABLE_MSG("Write attribute {}", attr);
} }
} }
} // Anonymous namespace } // Anonymous namespace
@ -159,81 +159,61 @@ Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
return result; return result;
} }
static Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
if (IR::IsPosition(attr)) {
ASSERT(attr == IR::Attribute::Position0);
const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
const auto pointer{
ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, ctx.ConstU32(index), ctx.ConstU32(0u))};
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
}
if (IR::IsParam(attr)) {
const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
const auto param = ctx.input_params.at(param_id).id;
const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, ctx.ConstU32(index))};
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
}
UNREACHABLE();
}
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) { Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
if (ctx.info.l_stage == LogicalStage::Geometry) {
return EmitGetAttributeForGeometry(ctx, attr, comp, index);
} else if (ctx.info.l_stage == LogicalStage::TessellationControl ||
ctx.info.l_stage == LogicalStage::TessellationEval) {
if (IR::IsTessCoord(attr)) {
const u32 component = attr == IR::Attribute::TessellationEvaluationPointU ? 0 : 1;
const auto component_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
const auto pointer{
ctx.OpAccessChain(component_ptr, ctx.tess_coord, ctx.ConstU32(component))};
return ctx.OpLoad(ctx.F32[1], pointer);
}
UNREACHABLE();
}
if (IR::IsParam(attr)) { if (IR::IsParam(attr)) {
const u32 param_index{u32(attr) - u32(IR::Attribute::Param0)}; const u32 param_index{u32(attr) - u32(IR::Attribute::Param0)};
const auto& param{ctx.input_params.at(param_index)}; const auto& param{ctx.input_params.at(param_index)};
Id result; const Id value = [&] {
if (param.is_loaded) { if (param.is_array) {
// Attribute is either default or manually interpolated. The id points to an already ASSERT(param.num_components > 1);
// loaded vector. if (param.is_loaded) {
result = ctx.OpCompositeExtract(param.component_type, param.id, comp); return ctx.OpCompositeExtract(param.component_type, param.id_array[index],
} else if (param.num_components > 1) { comp);
// Attribute is a vector and we need to access a specific component. } else {
const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))}; return ctx.OpLoad(param.component_type,
result = ctx.OpLoad(param.component_type, pointer); ctx.OpAccessChain(param.pointer_type, param.id,
} else { ctx.ConstU32(index), ctx.ConstU32(comp)));
// Attribute is a single float or interger, simply load it. }
result = ctx.OpLoad(param.component_type, param.id); } else {
} ASSERT(!param.is_loaded);
if (param.is_integer) { if (param.num_components > 1) {
result = ctx.OpBitcast(ctx.F32[1], result); return ctx.OpLoad(
} param.component_type,
return result; ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp)));
} else {
return ctx.OpLoad(param.component_type, param.id);
}
}
}();
return param.is_integer ? ctx.OpBitcast(ctx.F32[1], value) : value;
}
if (IR::IsBarycentricCoord(attr) && ctx.profile.supports_fragment_shader_barycentric) {
++comp;
} }
switch (attr) { switch (attr) {
case IR::Attribute::FragCoord: { case IR::Attribute::Position0:
const Id coord = ctx.OpLoad( ASSERT(ctx.l_stage == LogicalStage::Geometry);
ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.frag_coord, ctx.ConstU32(comp))); return ctx.OpLoad(ctx.F32[1],
if (comp == 3) { ctx.OpAccessChain(ctx.input_f32, ctx.gl_in, ctx.ConstU32(index),
return ctx.OpFDiv(ctx.F32[1], ctx.ConstF32(1.f), coord); ctx.ConstU32(0U), ctx.ConstU32(comp)));
} case IR::Attribute::FragCoord:
return coord; return ctx.OpLoad(ctx.F32[1],
} ctx.OpAccessChain(ctx.input_f32, ctx.frag_coord, ctx.ConstU32(comp)));
case IR::Attribute::TessellationEvaluationPointU: case IR::Attribute::TessellationEvaluationPointU:
return ctx.OpLoad(ctx.F32[1], return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value)); ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value));
case IR::Attribute::TessellationEvaluationPointV: case IR::Attribute::TessellationEvaluationPointV:
return ctx.OpLoad(ctx.F32[1], return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.ConstU32(1U))); ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.ConstU32(1U)));
case IR::Attribute::BaryCoordSmooth:
return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.bary_coord_smooth,
ctx.ConstU32(comp)));
case IR::Attribute::BaryCoordSmoothSample:
return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.bary_coord_smooth_sample,
ctx.ConstU32(comp)));
case IR::Attribute::BaryCoordNoPersp:
return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.bary_coord_nopersp,
ctx.ConstU32(comp)));
default: default:
UNREACHABLE_MSG("Read attribute {}", attr); UNREACHABLE_MSG("Read attribute {}", attr);
} }

View File

@ -9,7 +9,7 @@ namespace Shader::Backend::SPIRV {
void EmitPrologue(EmitContext& ctx) { void EmitPrologue(EmitContext& ctx) {
if (ctx.stage == Stage::Fragment) { if (ctx.stage == Stage::Fragment) {
ctx.DefineInterpolatedAttribs(); ctx.DefineAmdPerVertexAttribs();
} }
if (ctx.info.loads.Get(IR::Attribute::WorkgroupIndex)) { if (ctx.info.loads.Get(IR::Attribute::WorkgroupIndex)) {
ctx.DefineWorkgroupIndex(); ctx.DefineWorkgroupIndex();

View File

@ -196,14 +196,15 @@ const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
} }
EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id, EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id,
u32 num_components, bool output) { u32 num_components, bool output,
bool loaded, bool array) {
switch (GetNumberClass(fmt)) { switch (GetNumberClass(fmt)) {
case AmdGpu::NumberClass::Float: case AmdGpu::NumberClass::Float:
return {id, output ? output_f32 : input_f32, F32[1], num_components, false}; return {id, output ? output_f32 : input_f32, F32[1], num_components, false, loaded, array};
case AmdGpu::NumberClass::Uint: case AmdGpu::NumberClass::Uint:
return {id, output ? output_u32 : input_u32, U32[1], num_components, true}; return {id, output ? output_u32 : input_u32, U32[1], num_components, true, loaded, array};
case AmdGpu::NumberClass::Sint: case AmdGpu::NumberClass::Sint:
return {id, output ? output_s32 : input_s32, S32[1], num_components, true}; return {id, output ? output_s32 : input_s32, S32[1], num_components, true, loaded, array};
default: default:
break; break;
} }
@ -298,33 +299,24 @@ void EmitContext::DefineBufferProperties() {
} }
} }
void EmitContext::DefineInterpolatedAttribs() { void EmitContext::DefineAmdPerVertexAttribs() {
if (!profile.needs_manual_interpolation) { if (!profile.supports_amd_shader_explicit_vertex_parameter) {
return; return;
} }
// Iterate all input attributes, load them and manually interpolate.
for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) { for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) {
const auto& input = runtime_info.fs_info.inputs[i]; const auto& input = runtime_info.fs_info.inputs[i];
auto& params = input_params[i]; if (input.IsDefault() || info.fs_interpolation[i].primary != Qualifier::PerVertex) {
if (input.is_flat || params.is_loaded) {
continue; continue;
} }
const Id p_array{OpLoad(TypeArray(F32[4], ConstU32(3U)), params.id)}; auto& param = input_params[i];
const Id p0{OpCompositeExtract(F32[4], p_array, 0U)}; const Id pointer = param.id;
const Id p1{OpCompositeExtract(F32[4], p_array, 1U)}; param.id_array[0] =
const Id p2{OpCompositeExtract(F32[4], p_array, 2U)}; OpInterpolateAtVertexAMD(F32[param.num_components], pointer, ConstU32(0U));
const Id p10{OpFSub(F32[4], p1, p0)}; param.id_array[1] =
const Id p20{OpFSub(F32[4], p2, p0)}; OpInterpolateAtVertexAMD(F32[param.num_components], pointer, ConstU32(1U));
const Id bary_coord{OpLoad(F32[3], IsLinear(info.interp_qualifiers[i]) param.id_array[2] =
? bary_coord_linear_id OpInterpolateAtVertexAMD(F32[param.num_components], pointer, ConstU32(2U));
: bary_coord_persp_id)}; param.is_loaded = true;
const Id bary_coord_y{OpCompositeExtract(F32[1], bary_coord, 1)};
const Id bary_coord_z{OpCompositeExtract(F32[1], bary_coord, 2)};
const Id p10_y{OpVectorTimesScalar(F32[4], p10, bary_coord_y)};
const Id p20_z{OpVectorTimesScalar(F32[4], p20, bary_coord_z)};
params.id = OpFAdd(F32[4], p0, OpFAdd(F32[4], p10_y, p20_z));
Name(params.id, fmt::format("fs_in_attr{}", i));
params.is_loaded = true;
} }
} }
@ -342,21 +334,6 @@ void EmitContext::DefineWorkgroupIndex() {
Name(workgroup_index_id, "workgroup_index"); Name(workgroup_index_id, "workgroup_index");
} }
Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
switch (default_value) {
case 0:
return ctx.ConstF32(0.f, 0.f, 0.f, 0.f);
case 1:
return ctx.ConstF32(0.f, 0.f, 0.f, 1.f);
case 2:
return ctx.ConstF32(1.f, 1.f, 1.f, 0.f);
case 3:
return ctx.ConstF32(1.f, 1.f, 1.f, 1.f);
default:
UNREACHABLE();
}
}
void EmitContext::DefineInputs() { void EmitContext::DefineInputs() {
if (info.uses_lane_id) { if (info.uses_lane_id) {
subgroup_local_invocation_id = DefineVariable( subgroup_local_invocation_id = DefineVariable(
@ -398,49 +375,71 @@ void EmitContext::DefineInputs() {
front_facing = front_facing =
DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input); DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
} }
if (profile.needs_manual_interpolation) { if (info.loads.GetAny(IR::Attribute::BaryCoordSmooth)) {
if (info.has_perspective_interp) { if (profile.supports_amd_shader_explicit_vertex_parameter) {
bary_coord_persp_id = bary_coord_smooth = DefineVariable(F32[2], spv::BuiltIn::BaryCoordSmoothAMD,
spv::StorageClass::Input);
} else if (profile.supports_fragment_shader_barycentric) {
bary_coord_smooth =
DefineVariable(F32[3], spv::BuiltIn::BaryCoordKHR, spv::StorageClass::Input); DefineVariable(F32[3], spv::BuiltIn::BaryCoordKHR, spv::StorageClass::Input);
} else {
bary_coord_smooth = ConstF32(0.f, 0.f);
} }
if (info.has_linear_interp) { }
bary_coord_linear_id = DefineVariable(F32[3], spv::BuiltIn::BaryCoordNoPerspKHR, if (info.loads.GetAny(IR::Attribute::BaryCoordSmoothSample)) {
spv::StorageClass::Input); if (profile.supports_amd_shader_explicit_vertex_parameter) {
bary_coord_smooth_sample = DefineVariable(
F32[2], spv::BuiltIn::BaryCoordSmoothSampleAMD, spv::StorageClass::Input);
} else if (profile.supports_fragment_shader_barycentric) {
bary_coord_smooth_sample =
DefineVariable(F32[3], spv::BuiltIn::BaryCoordKHR, spv::StorageClass::Input);
// Decorate(bary_coord_smooth_sample, spv::Decoration::Sample);
} else {
bary_coord_smooth_sample = ConstF32(0.f, 0.f);
}
}
if (info.loads.GetAny(IR::Attribute::BaryCoordNoPersp)) {
if (profile.supports_amd_shader_explicit_vertex_parameter) {
bary_coord_nopersp = DefineVariable(F32[2], spv::BuiltIn::BaryCoordNoPerspAMD,
spv::StorageClass::Input);
} else if (profile.supports_fragment_shader_barycentric) {
bary_coord_nopersp = DefineVariable(F32[3], spv::BuiltIn::BaryCoordNoPerspKHR,
spv::StorageClass::Input);
} else {
bary_coord_nopersp = ConstF32(0.f, 0.f);
} }
} }
for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) { for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) {
const auto& input = runtime_info.fs_info.inputs[i]; const auto& input = runtime_info.fs_info.inputs[i];
if (input.IsDefault()) { if (input.IsDefault()) {
input_params[i] = {
.id = MakeDefaultValue(*this, input.default_value),
.pointer_type = input_f32,
.component_type = F32[1],
.num_components = 4,
.is_integer = false,
.is_loaded = true,
};
continue; continue;
} }
const IR::Attribute param{IR::Attribute::Param0 + i}; const IR::Attribute param = IR::Attribute::Param0 + i;
const u32 num_components = info.loads.NumComponents(param); const u32 num_components = info.loads.NumComponents(param);
const Id type{F32[num_components]}; const auto [primary, auxiliary] = info.fs_interpolation[i];
Id attr_id{}; const Id type = F32[num_components];
if (profile.needs_manual_interpolation && !input.is_flat) { const Id attr_id = [&] {
attr_id = DefineInput(TypeArray(type, ConstU32(3U)), input.param_index); if (primary == Qualifier::PerVertex &&
Decorate(attr_id, spv::Decoration::PerVertexKHR); profile.supports_fragment_shader_barycentric) {
Name(attr_id, fmt::format("fs_in_attr{}_p", i)); return Name(DefineInput(TypeArray(type, ConstU32(3U)), input.param_index),
} else { fmt::format("fs_in_attr{}_p", i));
attr_id = DefineInput(type, input.param_index);
Name(attr_id, fmt::format("fs_in_attr{}", i));
if (input.is_flat) {
Decorate(attr_id, spv::Decoration::Flat);
} else if (IsLinear(info.interp_qualifiers[i])) {
Decorate(attr_id, spv::Decoration::NoPerspective);
} }
return Name(DefineInput(type, input.param_index), fmt::format("fs_in_attr{}", i));
}();
if (primary == Qualifier::PerVertex) {
Decorate(attr_id, profile.supports_amd_shader_explicit_vertex_parameter
? spv::Decoration::ExplicitInterpAMD
: spv::Decoration::PerVertexKHR);
} else if (primary != Qualifier::Smooth) {
Decorate(attr_id, primary == Qualifier::Flat ? spv::Decoration::Flat
: spv::Decoration::NoPerspective);
} }
input_params[i] = if (auxiliary != Qualifier::None) {
GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, false); Decorate(attr_id, auxiliary == Qualifier::Centroid ? spv::Decoration::Centroid
: spv::Decoration::Sample);
}
input_params[i] = GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components,
false, false, primary == Qualifier::PerVertex);
} }
break; break;
case LogicalStage::Compute: case LogicalStage::Compute:
@ -461,17 +460,16 @@ void EmitContext::DefineInputs() {
case LogicalStage::Geometry: { case LogicalStage::Geometry: {
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input); primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
const auto gl_per_vertex = const auto gl_per_vertex =
Name(TypeStruct(TypeVector(F32[1], 4), F32[1], TypeArray(F32[1], ConstU32(1u))), Name(TypeStruct(F32[4], F32[1], TypeArray(F32[1], ConstU32(1u))), "gl_PerVertex");
"gl_PerVertex");
MemberName(gl_per_vertex, 0, "gl_Position"); MemberName(gl_per_vertex, 0, "gl_Position");
MemberName(gl_per_vertex, 1, "gl_PointSize"); MemberName(gl_per_vertex, 1, "gl_PointSize");
MemberName(gl_per_vertex, 2, "gl_ClipDistance"); MemberName(gl_per_vertex, 2, "gl_ClipDistance");
MemberDecorate(gl_per_vertex, 0, spv::Decoration::BuiltIn, MemberDecorate(gl_per_vertex, 0, spv::Decoration::BuiltIn,
static_cast<std::uint32_t>(spv::BuiltIn::Position)); static_cast<u32>(spv::BuiltIn::Position));
MemberDecorate(gl_per_vertex, 1, spv::Decoration::BuiltIn, MemberDecorate(gl_per_vertex, 1, spv::Decoration::BuiltIn,
static_cast<std::uint32_t>(spv::BuiltIn::PointSize)); static_cast<u32>(spv::BuiltIn::PointSize));
MemberDecorate(gl_per_vertex, 2, spv::Decoration::BuiltIn, MemberDecorate(gl_per_vertex, 2, spv::Decoration::BuiltIn,
static_cast<std::uint32_t>(spv::BuiltIn::ClipDistance)); static_cast<u32>(spv::BuiltIn::ClipDistance));
Decorate(gl_per_vertex, spv::Decoration::Block); Decorate(gl_per_vertex, spv::Decoration::Block);
const auto num_verts_in = NumVertices(runtime_info.gs_info.in_primitive); const auto num_verts_in = NumVertices(runtime_info.gs_info.in_primitive);
const auto vertices_in = TypeArray(gl_per_vertex, ConstU32(num_verts_in)); const auto vertices_in = TypeArray(gl_per_vertex, ConstU32(num_verts_in));
@ -483,7 +481,8 @@ void EmitContext::DefineInputs() {
const Id type{TypeArray(F32[4], ConstU32(num_verts_in))}; const Id type{TypeArray(F32[4], ConstU32(num_verts_in))};
const Id id{DefineInput(type, param_id)}; const Id id{DefineInput(type, param_id)};
Name(id, fmt::format("gs_in_attr{}", param_id)); Name(id, fmt::format("gs_in_attr{}", param_id));
input_params[param_id] = {id, input_f32, F32[1], 4}; input_params[param_id] =
GetAttributeInfo(AmdGpu::NumberFormat::Float, id, 4, false, false, true);
} }
break; break;
} }
@ -665,7 +664,7 @@ void EmitContext::DefineOutputs() {
for (u32 attr_id = 0; attr_id < info.gs_copy_data.num_attrs; attr_id++) { for (u32 attr_id = 0; attr_id < info.gs_copy_data.num_attrs; attr_id++) {
const Id id{DefineOutput(F32[4], attr_id)}; const Id id{DefineOutput(F32[4], attr_id)};
Name(id, fmt::format("out_attr{}", attr_id)); Name(id, fmt::format("out_attr{}", attr_id));
output_params[attr_id] = {id, output_f32, F32[1], 4u}; output_params[attr_id] = GetAttributeInfo(AmdGpu::NumberFormat::Float, id, 4, true);
} }
break; break;
} }

View File

@ -45,7 +45,7 @@ public:
Id Def(const IR::Value& value); Id Def(const IR::Value& value);
void DefineBufferProperties(); void DefineBufferProperties();
void DefineInterpolatedAttribs(); void DefineAmdPerVertexAttribs();
void DefineWorkgroupIndex(); void DefineWorkgroupIndex();
[[nodiscard]] Id DefineInput(Id type, std::optional<u32> location = std::nullopt, [[nodiscard]] Id DefineInput(Id type, std::optional<u32> location = std::nullopt,
@ -279,8 +279,9 @@ public:
Id shared_memory_u32_type{}; Id shared_memory_u32_type{};
Id shared_memory_u64_type{}; Id shared_memory_u64_type{};
Id bary_coord_persp_id{}; Id bary_coord_smooth{};
Id bary_coord_linear_id{}; Id bary_coord_smooth_sample{};
Id bary_coord_nopersp{};
struct TextureDefinition { struct TextureDefinition {
const VectorIds* data_types; const VectorIds* data_types;
@ -355,12 +356,16 @@ public:
Id sampler_pointer_type{}; Id sampler_pointer_type{};
struct SpirvAttribute { struct SpirvAttribute {
Id id; union {
Id id;
std::array<Id, 3> id_array;
};
Id pointer_type; Id pointer_type;
Id component_type; Id component_type;
u32 num_components; u32 num_components;
bool is_integer{}; bool is_integer{};
bool is_loaded{}; bool is_loaded{};
bool is_array{};
}; };
Id input_attr_array; Id input_attr_array;
Id output_attr_array; Id output_attr_array;
@ -390,7 +395,7 @@ private:
void DefineFunctions(); void DefineFunctions();
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id, u32 num_components, SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id, u32 num_components,
bool output); bool output, bool loaded = false, bool array = false);
BufferSpv DefineBuffer(bool is_storage, bool is_written, u32 elem_shift, BufferType buffer_type, BufferSpv DefineBuffer(bool is_storage, bool is_written, u32 elem_shift, BufferType buffer_type,
Id data_type); Id data_type);

View File

@ -3,8 +3,6 @@
#pragma once #pragma once
#include <limits>
#include "common/bit_field.h"
#include "shader_recompiler/frontend/opcodes.h" #include "shader_recompiler/frontend/opcodes.h"
namespace Shader::Gcn { namespace Shader::Gcn {

View File

@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <magic_enum/magic_enum.hpp>
#include "common/assert.h" #include "common/assert.h"
#include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/frontend/translate/translate.h"
@ -680,9 +681,18 @@ void Translator::S_FF1_I32_B32(const GcnInst& inst) {
} }
void Translator::S_FF1_I32_B64(const GcnInst& inst) { void Translator::S_FF1_I32_B64(const GcnInst& inst) {
ASSERT(inst.src[0].field == OperandField::ScalarGPR); const auto src = [&] {
const IR::U32 result{ switch (inst.src[0].field) {
ir.BallotFindLsb(ir.Ballot(ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code))))}; case OperandField::ScalarGPR:
return ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code));
case OperandField::VccLo:
return ir.GetVcc();
default:
UNREACHABLE_MSG("unhandled operand type {}", magic_enum::enum_name(inst.src[0].field));
}
}();
const IR::U32 result{ir.BallotFindLsb(ir.Ballot(src))};
SetDst(inst.dst[0], result); SetDst(inst.dst[0], result);
} }

View File

@ -21,50 +21,39 @@
namespace Shader::Gcn { namespace Shader::Gcn {
Translator::Translator(Info& info_, const RuntimeInfo& runtime_info_, const Profile& profile_) static IR::VectorReg IterateBarycentrics(const RuntimeInfo& runtime_info, auto&& set_attribute) {
: info{info_}, runtime_info{runtime_info_}, profile{profile_}, if (runtime_info.stage != Stage::Fragment) {
next_vgpr_num{runtime_info.num_allocated_vgprs} { return IR::VectorReg::V0;
if (info.l_stage == LogicalStage::Fragment) {
dst_frag_vreg = GatherInterpQualifiers();
} }
}
IR::VectorReg Translator::GatherInterpQualifiers() {
u32 dst_vreg{}; u32 dst_vreg{};
if (runtime_info.fs_info.addr_flags.persp_sample_ena) { if (runtime_info.fs_info.addr_flags.persp_sample_ena) {
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveSample; // I set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmoothSample, 0); // I
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveSample; // J set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmoothSample, 1); // J
info.has_perspective_interp = true;
} }
if (runtime_info.fs_info.addr_flags.persp_center_ena) { if (runtime_info.fs_info.addr_flags.persp_center_ena) {
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCenter; // I set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmooth, 0); // I
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCenter; // J set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmooth, 1); // J
info.has_perspective_interp = true;
} }
if (runtime_info.fs_info.addr_flags.persp_centroid_ena) { if (runtime_info.fs_info.addr_flags.persp_centroid_ena) {
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCentroid; // I set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmoothCentroid, 0); // I
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCentroid; // J set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmoothCentroid, 1); // J
info.has_perspective_interp = true;
} }
if (runtime_info.fs_info.addr_flags.persp_pull_model_ena) { if (runtime_info.fs_info.addr_flags.persp_pull_model_ena) {
++dst_vreg; // I/W set_attribute(dst_vreg++, IR::Attribute::BaryCoordPullModel, 0); // I/W
++dst_vreg; // J/W set_attribute(dst_vreg++, IR::Attribute::BaryCoordPullModel, 1); // J/W
++dst_vreg; // 1/W set_attribute(dst_vreg++, IR::Attribute::BaryCoordPullModel, 2); // 1/W
} }
if (runtime_info.fs_info.addr_flags.linear_sample_ena) { if (runtime_info.fs_info.addr_flags.linear_sample_ena) {
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearSample; // I set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPerspSample, 0); // I
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearSample; // J set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPerspSample, 1); // J
info.has_linear_interp = true;
} }
if (runtime_info.fs_info.addr_flags.linear_center_ena) { if (runtime_info.fs_info.addr_flags.linear_center_ena) {
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCenter; // I set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPersp, 0); // I
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCenter; // J set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPersp, 1); // J
info.has_linear_interp = true;
} }
if (runtime_info.fs_info.addr_flags.linear_centroid_ena) { if (runtime_info.fs_info.addr_flags.linear_centroid_ena) {
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCentroid; // I set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPerspCentroid, 0); // I
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCentroid; // J set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPerspCentroid, 1); // J
info.has_linear_interp = true;
} }
if (runtime_info.fs_info.addr_flags.line_stipple_tex_ena) { if (runtime_info.fs_info.addr_flags.line_stipple_tex_ena) {
++dst_vreg; ++dst_vreg;
@ -72,6 +61,14 @@ IR::VectorReg Translator::GatherInterpQualifiers() {
return IR::VectorReg(dst_vreg); return IR::VectorReg(dst_vreg);
} }
Translator::Translator(Info& info_, const RuntimeInfo& runtime_info_, const Profile& profile_)
: info{info_}, runtime_info{runtime_info_}, profile{profile_},
next_vgpr_num{runtime_info.num_allocated_vgprs} {
IterateBarycentrics(runtime_info, [this](u32 vreg, IR::Attribute attrib, u32) {
vgpr_to_interp[vreg] = attrib;
});
}
void Translator::EmitPrologue(IR::Block* first_block) { void Translator::EmitPrologue(IR::Block* first_block) {
ir = IR::IREmitter(*first_block, first_block->begin()); ir = IR::IREmitter(*first_block, first_block->begin());
@ -127,7 +124,10 @@ void Translator::EmitPrologue(IR::Block* first_block) {
} }
break; break;
case LogicalStage::Fragment: case LogicalStage::Fragment:
dst_vreg = dst_frag_vreg; dst_vreg =
IterateBarycentrics(runtime_info, [this](u32 vreg, IR::Attribute attrib, u32 comp) {
ir.SetVectorReg(IR::VectorReg(vreg), ir.GetAttribute(attrib, comp));
});
if (runtime_info.fs_info.addr_flags.pos_x_float_ena) { if (runtime_info.fs_info.addr_flags.pos_x_float_ena) {
if (runtime_info.fs_info.en_flags.pos_x_float_ena) { if (runtime_info.fs_info.en_flags.pos_x_float_ena) {
ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 0)); ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 0));
@ -151,7 +151,8 @@ void Translator::EmitPrologue(IR::Block* first_block) {
} }
if (runtime_info.fs_info.addr_flags.pos_w_float_ena) { if (runtime_info.fs_info.addr_flags.pos_w_float_ena) {
if (runtime_info.fs_info.en_flags.pos_w_float_ena) { if (runtime_info.fs_info.en_flags.pos_w_float_ena) {
ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 3)); ir.SetVectorReg(dst_vreg++,
ir.FPRecip(ir.GetAttribute(IR::Attribute::FragCoord, 3)));
} else { } else {
ir.SetVectorReg(dst_vreg++, ir.Imm32(0.0f)); ir.SetVectorReg(dst_vreg++, ir.Imm32(0.0f));
} }

View File

@ -265,6 +265,7 @@ public:
// Vector interpolation // Vector interpolation
// VINTRP // VINTRP
void V_INTERP_P1_F32(const GcnInst& inst);
void V_INTERP_P2_F32(const GcnInst& inst); void V_INTERP_P2_F32(const GcnInst& inst);
void V_INTERP_MOV_F32(const GcnInst& inst); void V_INTERP_MOV_F32(const GcnInst& inst);
@ -323,7 +324,6 @@ private:
void LogMissingOpcode(const GcnInst& inst); void LogMissingOpcode(const GcnInst& inst);
IR::VectorReg GetScratchVgpr(u32 offset); IR::VectorReg GetScratchVgpr(u32 offset);
IR::VectorReg GatherInterpQualifiers();
private: private:
IR::IREmitter ir; IR::IREmitter ir;
@ -332,8 +332,7 @@ private:
const Profile& profile; const Profile& profile;
u32 next_vgpr_num; u32 next_vgpr_num;
std::unordered_map<u32, IR::VectorReg> vgpr_map; std::unordered_map<u32, IR::VectorReg> vgpr_map;
std::array<IR::Interpolation, MaxInterpVgpr> vgpr_to_interp{}; std::array<IR::Attribute, MaxInterpVgpr> vgpr_to_interp{};
IR::VectorReg dst_frag_vreg{};
bool opcode_missing = false; bool opcode_missing = false;
}; };

View File

@ -5,11 +5,32 @@
namespace Shader::Gcn { namespace Shader::Gcn {
using Interpolation = Info::Interpolation;
static Interpolation GetInterpolation(IR::Attribute attribute) {
switch (attribute) {
case IR::Attribute::BaryCoordNoPersp:
return {Qualifier::NoPerspective, Qualifier::None};
case IR::Attribute::BaryCoordNoPerspCentroid:
return {Qualifier::NoPerspective, Qualifier::Centroid};
case IR::Attribute::BaryCoordNoPerspSample:
return {Qualifier::NoPerspective, Qualifier::Sample};
case IR::Attribute::BaryCoordSmooth:
return {Qualifier::Smooth, Qualifier::None};
case IR::Attribute::BaryCoordSmoothCentroid:
return {Qualifier::Smooth, Qualifier::Centroid};
case IR::Attribute::BaryCoordSmoothSample:
return {Qualifier::Smooth, Qualifier::Sample};
default:
UNREACHABLE_MSG("Unhandled barycentric attribute {}", NameOf(attribute));
}
}
void Translator::EmitVectorInterpolation(const GcnInst& inst) { void Translator::EmitVectorInterpolation(const GcnInst& inst) {
switch (inst.opcode) { switch (inst.opcode) {
// VINTRP // VINTRP
case Opcode::V_INTERP_P1_F32: case Opcode::V_INTERP_P1_F32:
return; return V_INTERP_P1_F32(inst);
case Opcode::V_INTERP_P2_F32: case Opcode::V_INTERP_P2_F32:
return V_INTERP_P2_F32(inst); return V_INTERP_P2_F32(inst);
case Opcode::V_INTERP_MOV_F32: case Opcode::V_INTERP_MOV_F32:
@ -21,19 +42,57 @@ void Translator::EmitVectorInterpolation(const GcnInst& inst) {
// VINTRP // VINTRP
void Translator::V_INTERP_P1_F32(const GcnInst& inst) {
if (!profile.needs_manual_interpolation) {
return;
}
// VDST = P10 * VSRC + P0
const u32 attr_index = inst.control.vintrp.attr;
const IR::Attribute attrib = IR::Attribute::Param0 + attr_index;
const IR::F32 p0 = ir.GetAttribute(attrib, inst.control.vintrp.chan, 0);
const IR::F32 p1 = ir.GetAttribute(attrib, inst.control.vintrp.chan, 1);
const IR::F32 i = GetSrc<IR::F32>(inst.src[0]);
const IR::F32 result = ir.FPFma(ir.FPSub(p1, p0), i, p0);
SetDst(inst.dst[0], result);
}
void Translator::V_INTERP_P2_F32(const GcnInst& inst) { void Translator::V_INTERP_P2_F32(const GcnInst& inst) {
const u32 attr_index = inst.control.vintrp.attr; const u32 attr_index = inst.control.vintrp.attr;
const auto& attr = runtime_info.fs_info.inputs.at(attr_index); const IR::Attribute attrib = IR::Attribute::Param0 + attr_index;
info.interp_qualifiers[attr_index] = vgpr_to_interp[inst.src[0].code]; const auto& attr = runtime_info.fs_info.inputs[attr_index];
const IR::Attribute attrib{IR::Attribute::Param0 + attr_index}; auto& interp = info.fs_interpolation[attr_index];
SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan)); ASSERT(!attr.IsDefault() && !attr.is_flat);
if (!profile.needs_manual_interpolation) {
interp = GetInterpolation(vgpr_to_interp[inst.src[0].code]);
SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan));
return;
}
// VDST = P20 * VSRC + VDST
const IR::F32 p0 = ir.GetAttribute(attrib, inst.control.vintrp.chan, 0);
const IR::F32 p2 = ir.GetAttribute(attrib, inst.control.vintrp.chan, 2);
const IR::F32 j = GetSrc<IR::F32>(inst.src[0]);
const IR::F32 result = ir.FPFma(ir.FPSub(p2, p0), j, GetSrc<IR::F32>(inst.dst[0]));
interp.primary = Qualifier::PerVertex;
SetDst(inst.dst[0], result);
} }
void Translator::V_INTERP_MOV_F32(const GcnInst& inst) { void Translator::V_INTERP_MOV_F32(const GcnInst& inst) {
const u32 attr_index = inst.control.vintrp.attr; const u32 attr_index = inst.control.vintrp.attr;
const auto& attr = runtime_info.fs_info.inputs.at(attr_index); const IR::Attribute attrib = IR::Attribute::Param0 + attr_index;
const IR::Attribute attrib{IR::Attribute::Param0 + attr_index}; const auto& attr = runtime_info.fs_info.inputs[attr_index];
SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan)); auto& interp = info.fs_interpolation[attr_index];
ASSERT(attr.is_flat);
if (profile.supports_amd_shader_explicit_vertex_parameter ||
(profile.supports_fragment_shader_barycentric &&
!profile.has_incomplete_fragment_shader_barycentric)) {
// VSRC 0=P10, 1=P20, 2=P0
interp.primary = Qualifier::PerVertex;
SetDst(inst.dst[0],
ir.GetAttribute(attrib, inst.control.vintrp.chan, (inst.src[0].code + 1) % 3));
} else {
interp.primary = Qualifier::Flat;
SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan));
}
} }
} // namespace Shader::Gcn } // namespace Shader::Gcn

View File

@ -1,5 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma once #pragma once
#include <span> #include <span>
@ -135,6 +136,16 @@ struct PushData {
static_assert(sizeof(PushData) <= 128, static_assert(sizeof(PushData) <= 128,
"PushData size is greater than minimum size guaranteed by Vulkan spec"); "PushData size is greater than minimum size guaranteed by Vulkan spec");
enum class Qualifier : u8 {
None,
Smooth,
NoPerspective,
PerVertex,
Flat,
Centroid,
Sample,
};
/** /**
* Contains general information generated by the shader recompiler for an input program. * Contains general information generated by the shader recompiler for an input program.
*/ */
@ -194,7 +205,11 @@ struct Info {
PersistentSrtInfo srt_info; PersistentSrtInfo srt_info;
std::vector<u32> flattened_ud_buf; std::vector<u32> flattened_ud_buf;
std::array<IR::Interpolation, 32> interp_qualifiers{}; struct Interpolation {
Qualifier primary;
Qualifier auxiliary;
};
std::array<Interpolation, IR::NumParams> fs_interpolation{};
IR::ScalarReg tess_consts_ptr_base = IR::ScalarReg::Max; IR::ScalarReg tess_consts_ptr_base = IR::ScalarReg::Max;
s32 tess_consts_dword_offset = -1; s32 tess_consts_dword_offset = -1;
@ -207,10 +222,9 @@ struct Info {
VAddr pgm_base; VAddr pgm_base;
bool has_storage_images{}; bool has_storage_images{};
bool has_discard{}; bool has_discard{};
bool has_bitwise_xor{};
bool has_image_gather{}; bool has_image_gather{};
bool has_image_query{}; bool has_image_query{};
bool has_perspective_interp{};
bool has_linear_interp{};
bool uses_buffer_atomic_float_min_max{}; bool uses_buffer_atomic_float_min_max{};
bool uses_image_atomic_float_min_max{}; bool uses_image_atomic_float_min_max{};
bool uses_lane_id{}; bool uses_lane_id{};

View File

@ -130,6 +130,20 @@ std::string NameOf(Attribute attribute) {
return "LocalInvocationIndex"; return "LocalInvocationIndex";
case Attribute::FragCoord: case Attribute::FragCoord:
return "FragCoord"; return "FragCoord";
case Attribute::BaryCoordNoPersp:
return "BaryCoordNoPersp";
case Attribute::BaryCoordNoPerspCentroid:
return "BaryCoordNoPerspCentroid";
case Attribute::BaryCoordNoPerspSample:
return "BaryCoordNoPerspSample";
case Attribute::BaryCoordSmooth:
return "BaryCoordSmooth";
case Attribute::BaryCoordSmoothCentroid:
return "BaryCoordSmoothCentroid";
case Attribute::BaryCoordSmoothSample:
return "BaryCoordSmoothSample";
case Attribute::BaryCoordPullModel:
return "BaryCoordPullModel";
case Attribute::InvocationId: case Attribute::InvocationId:
return "InvocationId"; return "InvocationId";
case Attribute::PatchVertices: case Attribute::PatchVertices:

View File

@ -73,24 +73,21 @@ enum class Attribute : u64 {
LocalInvocationId = 76, LocalInvocationId = 76,
LocalInvocationIndex = 77, LocalInvocationIndex = 77,
FragCoord = 78, FragCoord = 78,
InvocationId = 81, // TCS id in output patch and instanced geometry shader id BaryCoordNoPersp = 79,
PatchVertices = 82, BaryCoordNoPerspCentroid = 80,
TessellationEvaluationPointU = 83, BaryCoordNoPerspSample = 81,
TessellationEvaluationPointV = 84, BaryCoordSmooth = 82,
PackedHullInvocationInfo = 85, // contains patch id within the VGT and invocation ID BaryCoordSmoothCentroid = 83,
BaryCoordSmoothSample = 84,
BaryCoordPullModel = 85,
InvocationId = 86, // TCS id in output patch and instanced geometry shader id
PatchVertices = 87,
TessellationEvaluationPointU = 88,
TessellationEvaluationPointV = 89,
PackedHullInvocationInfo = 90, // contains patch id within the VGT and invocation ID
Max, Max,
}; };
enum class Interpolation {
Invalid = 0,
PerspectiveSample = 1,
PerspectiveCenter = 2,
PerspectiveCentroid = 3,
LinearSample = 4,
LinearCenter = 5,
LinearCentroid = 6,
};
constexpr size_t NumAttributes = static_cast<size_t>(Attribute::Max); constexpr size_t NumAttributes = static_cast<size_t>(Attribute::Max);
constexpr size_t NumRenderTargets = 8; constexpr size_t NumRenderTargets = 8;
constexpr size_t NumParams = 32; constexpr size_t NumParams = 32;
@ -112,13 +109,9 @@ constexpr bool IsMrt(Attribute attribute) noexcept {
return attribute >= Attribute::RenderTarget0 && attribute <= Attribute::RenderTarget7; return attribute >= Attribute::RenderTarget0 && attribute <= Attribute::RenderTarget7;
} }
constexpr bool IsLinear(Interpolation interp) noexcept { constexpr bool IsBarycentricCoord(Attribute attribute) noexcept {
return interp >= Interpolation::LinearSample && interp <= Interpolation::LinearCentroid; return attribute >= Attribute::BaryCoordNoPersp &&
} attribute <= Attribute::BaryCoordSmoothSample;
constexpr bool IsPerspective(Interpolation interp) noexcept {
return interp >= Interpolation::PerspectiveSample &&
interp <= Interpolation::PerspectiveCentroid;
} }
[[nodiscard]] std::string NameOf(Attribute attribute); [[nodiscard]] std::string NameOf(Attribute attribute);

View File

@ -455,11 +455,12 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
// Read image sharp. // Read image sharp.
const auto tsharp = TrackSharp(tsharp_handle, info); const auto tsharp = TrackSharp(tsharp_handle, info);
const auto inst_info = inst.Flags<IR::TextureInstInfo>(); const auto inst_info = inst.Flags<IR::TextureInstInfo>();
const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite; const bool is_atomic = IsImageAtomicInstruction(inst);
const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite || is_atomic;
const ImageResource image_res = { const ImageResource image_res = {
.sharp_idx = tsharp, .sharp_idx = tsharp,
.is_depth = bool(inst_info.is_depth), .is_depth = bool(inst_info.is_depth),
.is_atomic = IsImageAtomicInstruction(inst), .is_atomic = is_atomic,
.is_array = bool(inst_info.is_array), .is_array = bool(inst_info.is_array),
.is_written = is_written, .is_written = is_written,
.is_r128 = bool(inst_info.is_r128), .is_r128 = bool(inst_info.is_r128),

View File

@ -95,6 +95,9 @@ void Visit(Info& info, const IR::Inst& inst) {
case IR::Opcode::DiscardCond: case IR::Opcode::DiscardCond:
info.has_discard = true; info.has_discard = true;
break; break;
case IR::Opcode::BitwiseXor32:
info.has_bitwise_xor = true;
break;
case IR::Opcode::ImageGather: case IR::Opcode::ImageGather:
case IR::Opcode::ImageGatherDref: case IR::Opcode::ImageGatherDref:
info.has_image_gather = true; info.has_image_gather = true;

View File

@ -10,16 +10,10 @@ namespace Shader {
struct Profile { struct Profile {
u32 supported_spirv{0x00010000}; u32 supported_spirv{0x00010000};
u32 subgroup_size{}; u32 subgroup_size{};
bool unified_descriptor_binding{};
bool support_descriptor_aliasing{};
bool support_int8{}; bool support_int8{};
bool support_int16{}; bool support_int16{};
bool support_int64{}; bool support_int64{};
bool support_float64{}; bool support_float64{};
bool support_vertex_instance_id{};
bool support_float_controls{};
bool support_separate_denorm_behavior{};
bool support_separate_rounding_mode{};
bool support_fp32_denorm_preserve{}; bool support_fp32_denorm_preserve{};
bool support_fp32_denorm_flush{}; bool support_fp32_denorm_flush{};
bool support_fp32_round_to_zero{}; bool support_fp32_round_to_zero{};
@ -33,6 +27,9 @@ struct Profile {
bool supports_buffer_int64_atomics{}; bool supports_buffer_int64_atomics{};
bool supports_shared_int64_atomics{}; bool supports_shared_int64_atomics{};
bool supports_workgroup_explicit_memory_layout{}; bool supports_workgroup_explicit_memory_layout{};
bool supports_amd_shader_explicit_vertex_parameter{};
bool supports_fragment_shader_barycentric{};
bool has_incomplete_fragment_shader_barycentric{};
bool has_broken_spirv_clamp{}; bool has_broken_spirv_clamp{};
bool lower_left_origin_mode{}; bool lower_left_origin_mode{};
bool needs_manual_interpolation{}; bool needs_manual_interpolation{};

View File

@ -1009,7 +1009,6 @@ struct Liverpool {
return RemapSwizzle(info.format, mrt_swizzle); return RemapSwizzle(info.format, mrt_swizzle);
} }
private:
[[nodiscard]] NumberFormat GetFixedNumberFormat() const { [[nodiscard]] NumberFormat GetFixedNumberFormat() const {
// There is a small difference between T# and CB number types, account for it. // There is a small difference between T# and CB number types, account for it.
return info.number_type == NumberFormat::SnormNz ? NumberFormat::Srgb return info.number_type == NumberFormat::SnormNz ? NumberFormat::Srgb

View File

@ -807,8 +807,8 @@ vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat
vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color_buffer) { vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color_buffer) {
const auto comp_swizzle = color_buffer.Swizzle(); const auto comp_swizzle = color_buffer.Swizzle();
const auto format = color_buffer.GetDataFmt(); const auto format = color_buffer.info.format.Value();
const auto number_type = color_buffer.GetNumberFmt(); const auto number_type = color_buffer.GetFixedNumberFormat();
const auto& c0 = color_buffer.clear_word0; const auto& c0 = color_buffer.clear_word0;
const auto& c1 = color_buffer.clear_word1; const auto& c1 = color_buffer.clear_word1;

View File

@ -137,34 +137,18 @@ GraphicsPipeline::GraphicsPipeline(
const vk::PipelineMultisampleStateCreateInfo multisampling = { const vk::PipelineMultisampleStateCreateInfo multisampling = {
.rasterizationSamples = .rasterizationSamples =
LiverpoolToVK::NumSamples(key.num_samples, instance.GetFramebufferSampleCounts()), LiverpoolToVK::NumSamples(key.num_samples, instance.GetFramebufferSampleCounts()),
.sampleShadingEnable = false, .sampleShadingEnable =
fs_info.addr_flags.persp_sample_ena || fs_info.addr_flags.linear_sample_ena,
}; };
const vk::DepthClampRangeEXT depth_clamp_range = { const vk::PipelineViewportDepthClipControlCreateInfoEXT clip_control = {
.minDepthClamp = key.min_depth_clamp, .negativeOneToOne = key.clip_space == Liverpool::ClipSpace::MinusWToW,
.maxDepthClamp = key.max_depth_clamp,
}; };
vk::StructureChain viewport_chain = { const vk::PipelineViewportStateCreateInfo viewport_info = {
vk::PipelineViewportStateCreateInfo{}, .pNext = instance.IsDepthClipControlSupported() ? &clip_control : nullptr,
vk::PipelineViewportDepthClipControlCreateInfoEXT{
.negativeOneToOne = key.clip_space == Liverpool::ClipSpace::MinusWToW,
},
vk::PipelineViewportDepthClampControlCreateInfoEXT{
.depthClampMode = key.depth_clamp_user_defined_range
? vk::DepthClampModeEXT::eUserDefinedRange
: vk::DepthClampModeEXT::eViewportRange,
.pDepthClampRange = &depth_clamp_range,
},
}; };
if (!instance.IsDepthClampControlSupported()) {
viewport_chain.unlink<vk::PipelineViewportDepthClampControlCreateInfoEXT>();
}
if (!instance.IsDepthClipControlSupported()) {
viewport_chain.unlink<vk::PipelineViewportDepthClipControlCreateInfoEXT>();
}
boost::container::static_vector<vk::DynamicState, 32> dynamic_states = { boost::container::static_vector<vk::DynamicState, 32> dynamic_states = {
vk::DynamicState::eViewportWithCount, vk::DynamicState::eScissorWithCount, vk::DynamicState::eViewportWithCount, vk::DynamicState::eScissorWithCount,
vk::DynamicState::eBlendConstants, vk::DynamicState::eDepthTestEnable, vk::DynamicState::eBlendConstants, vk::DynamicState::eDepthTestEnable,
@ -339,7 +323,7 @@ GraphicsPipeline::GraphicsPipeline(
.pVertexInputState = !instance.IsVertexInputDynamicState() ? &vertex_input_info : nullptr, .pVertexInputState = !instance.IsVertexInputDynamicState() ? &vertex_input_info : nullptr,
.pInputAssemblyState = &input_assembly, .pInputAssemblyState = &input_assembly,
.pTessellationState = &tessellation_state, .pTessellationState = &tessellation_state,
.pViewportState = &viewport_chain.get(), .pViewportState = &viewport_info,
.pRasterizationState = &raster_chain.get(), .pRasterizationState = &raster_chain.get(),
.pMultisampleState = &multisampling, .pMultisampleState = &multisampling,
.pColorBlendState = &color_blending, .pColorBlendState = &color_blending,

View File

@ -48,9 +48,6 @@ struct GraphicsPipelineKey {
Liverpool::DepthBuffer::ZFormat z_format : 2; Liverpool::DepthBuffer::ZFormat z_format : 2;
Liverpool::DepthBuffer::StencilFormat stencil_format : 1; Liverpool::DepthBuffer::StencilFormat stencil_format : 1;
u32 depth_clamp_enable : 1; u32 depth_clamp_enable : 1;
u32 depth_clamp_user_defined_range : 1;
float min_depth_clamp;
float max_depth_clamp;
}; };
struct { struct {
AmdGpu::PrimitiveType prim_type : 5; AmdGpu::PrimitiveType prim_type : 5;

View File

@ -271,10 +271,14 @@ bool Instance::CreateDevice() {
custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
depth_clip_control = add_extension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME); depth_clip_control = add_extension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME);
depth_clip_enable = add_extension(VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME); depth_clip_enable = add_extension(VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME);
depth_clamp_control = add_extension(VK_EXT_DEPTH_CLAMP_CONTROL_EXTENSION_NAME);
vertex_input_dynamic_state = add_extension(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); vertex_input_dynamic_state = add_extension(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME); list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME);
fragment_shader_barycentric = add_extension(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME); amd_shader_explicit_vertex_parameter =
add_extension(VK_AMD_SHADER_EXPLICIT_VERTEX_PARAMETER_EXTENSION_NAME);
if (!amd_shader_explicit_vertex_parameter) {
fragment_shader_barycentric =
add_extension(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME);
}
legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME); legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME);
provoking_vertex = add_extension(VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME); provoking_vertex = add_extension(VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME);
shader_stencil_export = add_extension(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME); shader_stencil_export = add_extension(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME);
@ -426,9 +430,6 @@ bool Instance::CreateDevice() {
vk::PhysicalDeviceDepthClipEnableFeaturesEXT{ vk::PhysicalDeviceDepthClipEnableFeaturesEXT{
.depthClipEnable = true, .depthClipEnable = true,
}, },
vk::PhysicalDeviceDepthClampControlFeaturesEXT{
.depthClampControl = true,
},
vk::PhysicalDeviceRobustness2FeaturesEXT{ vk::PhysicalDeviceRobustness2FeaturesEXT{
.robustBufferAccess2 = robustness2_features.robustBufferAccess2, .robustBufferAccess2 = robustness2_features.robustBufferAccess2,
.robustImageAccess2 = robustness2_features.robustImageAccess2, .robustImageAccess2 = robustness2_features.robustImageAccess2,
@ -504,9 +505,6 @@ bool Instance::CreateDevice() {
if (!depth_clip_enable) { if (!depth_clip_enable) {
device_chain.unlink<vk::PhysicalDeviceDepthClipEnableFeaturesEXT>(); device_chain.unlink<vk::PhysicalDeviceDepthClipEnableFeaturesEXT>();
} }
if (!depth_clamp_control) {
device_chain.unlink<vk::PhysicalDeviceDepthClampControlFeaturesEXT>();
}
if (!robustness2) { if (!robustness2) {
device_chain.unlink<vk::PhysicalDeviceRobustness2FeaturesEXT>(); device_chain.unlink<vk::PhysicalDeviceRobustness2FeaturesEXT>();
} }

View File

@ -114,11 +114,6 @@ public:
return depth_clip_enable; return depth_clip_enable;
} }
/// Returns true when VK_EXT_depth_clamp_control is supported
bool IsDepthClampControlSupported() const {
return depth_clamp_control;
}
/// Returns true when VK_EXT_depth_range_unrestricted is supported /// Returns true when VK_EXT_depth_range_unrestricted is supported
bool IsDepthRangeUnrestrictedSupported() const { bool IsDepthRangeUnrestrictedSupported() const {
return depth_range_unrestricted; return depth_range_unrestricted;
@ -150,6 +145,11 @@ public:
return fragment_shader_barycentric; return fragment_shader_barycentric;
} }
/// Returns true when VK_AMD_shader_explicit_vertex_parameter is supported.
bool IsAmdShaderExplicitVertexParameterSupported() const {
return amd_shader_explicit_vertex_parameter;
}
/// Returns true when VK_EXT_primitive_topology_list_restart is supported. /// Returns true when VK_EXT_primitive_topology_list_restart is supported.
bool IsListRestartSupported() const { bool IsListRestartSupported() const {
return list_restart; return list_restart;
@ -418,9 +418,9 @@ private:
u32 queue_family_index{0}; u32 queue_family_index{0};
bool custom_border_color{}; bool custom_border_color{};
bool fragment_shader_barycentric{}; bool fragment_shader_barycentric{};
bool amd_shader_explicit_vertex_parameter{};
bool depth_clip_control{}; bool depth_clip_control{};
bool depth_clip_enable{}; bool depth_clip_enable{};
bool depth_clamp_control{};
bool depth_range_unrestricted{}; bool depth_range_unrestricted{};
bool dynamic_state_3{}; bool dynamic_state_3{};
bool vertex_input_dynamic_state{}; bool vertex_input_dynamic_state{};

View File

@ -220,6 +220,12 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
.supports_shared_int64_atomics = instance_.IsSharedInt64AtomicsSupported(), .supports_shared_int64_atomics = instance_.IsSharedInt64AtomicsSupported(),
.supports_workgroup_explicit_memory_layout = .supports_workgroup_explicit_memory_layout =
instance_.IsWorkgroupMemoryExplicitLayoutSupported(), instance_.IsWorkgroupMemoryExplicitLayoutSupported(),
.supports_amd_shader_explicit_vertex_parameter =
instance_.IsAmdShaderExplicitVertexParameterSupported(),
.supports_fragment_shader_barycentric = instance_.IsFragmentShaderBarycentricSupported(),
.has_incomplete_fragment_shader_barycentric =
instance_.IsFragmentShaderBarycentricSupported() &&
instance.GetDriverID() == vk::DriverId::eMoltenvk,
.needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() && .needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() &&
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
.needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary || .needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary ||
@ -290,6 +296,7 @@ bool PipelineCache::RefreshGraphicsKey() {
key.stencil_format = regs.depth_buffer.StencilValid() key.stencil_format = regs.depth_buffer.StencilValid()
? regs.depth_buffer.stencil_info.format.Value() ? regs.depth_buffer.stencil_info.format.Value()
: Liverpool::DepthBuffer::StencilFormat::Invalid; : Liverpool::DepthBuffer::StencilFormat::Invalid;
key.depth_clamp_enable = !regs.depth_render_override.disable_viewport_clamp;
key.depth_clip_enable = regs.clipper_control.ZclipEnable(); key.depth_clip_enable = regs.clipper_control.ZclipEnable();
key.clip_space = regs.clipper_control.clip_space; key.clip_space = regs.clipper_control.clip_space;
key.provoking_vtx_last = regs.polygon_control.provoking_vtx_last; key.provoking_vtx_last = regs.polygon_control.provoking_vtx_last;
@ -298,8 +305,6 @@ bool PipelineCache::RefreshGraphicsKey() {
key.logic_op = regs.color_control.rop3; key.logic_op = regs.color_control.rop3;
key.num_samples = regs.NumSamples(); key.num_samples = regs.NumSamples();
RefreshDepthClampRange();
const bool skip_cb_binding = const bool skip_cb_binding =
regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable; regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable;
@ -488,62 +493,6 @@ bool PipelineCache::RefreshGraphicsKey() {
return true; return true;
} }
void PipelineCache::RefreshDepthClampRange() {
auto& regs = liverpool->regs;
auto& key = graphics_key;
key.depth_clamp_enable = !regs.depth_render_override.disable_viewport_clamp;
if (key.z_format == Liverpool::DepthBuffer::ZFormat::Invalid || !key.depth_clamp_enable) {
return;
}
bool depth_clamp_can_use_viewport_range = true;
bool depth_clamp_is_same_on_all_viewports = true;
float zmin = std::numeric_limits<float>::max();
float zmax = std::numeric_limits<float>::max();
const auto& vp_ctl = regs.viewport_control;
for (u32 i = 0; i < Liverpool::NumViewports; i++) {
const auto& vp = regs.viewports[i];
const auto& vp_d = regs.viewport_depths[i];
if (vp.xscale == 0) {
continue;
}
const auto zoffset = vp_ctl.zoffset_enable ? vp.zoffset : 0.f;
const auto zscale = vp_ctl.zscale_enable ? vp.zscale : 1.f;
float min_depth;
float max_depth;
if (regs.clipper_control.clip_space == AmdGpu::Liverpool::ClipSpace::MinusWToW) {
min_depth = zoffset - zscale;
max_depth = zoffset + zscale;
} else {
min_depth = zoffset;
max_depth = zoffset + zscale;
}
if (zmin == std::numeric_limits<float>::max()) {
zmin = vp_d.zmin;
zmax = vp_d.zmax;
}
depth_clamp_is_same_on_all_viewports &= (zmin == vp_d.zmin && zmax == vp_d.zmax);
depth_clamp_can_use_viewport_range &= (min_depth == vp_d.zmin && max_depth == vp_d.zmax);
}
if (zmin == std::numeric_limits<float>::max()) {
return;
}
if (!depth_clamp_can_use_viewport_range && !depth_clamp_is_same_on_all_viewports) {
LOG_ERROR(Render_Vulkan,
"Viewport depth clamping configuration cannot be accurately emulated");
}
key.depth_clamp_user_defined_range = !depth_clamp_can_use_viewport_range;
if (key.depth_clamp_user_defined_range) {
key.min_depth_clamp = zmin;
key.max_depth_clamp = zmax;
}
}
bool PipelineCache::RefreshComputeKey() { bool PipelineCache::RefreshComputeKey() {
Shader::Backend::Bindings binding{}; Shader::Backend::Bindings binding{};
const auto& cs_pgm = liverpool->GetCsRegs(); const auto& cs_pgm = liverpool->GetCsRegs();

View File

@ -76,8 +76,6 @@ private:
bool RefreshGraphicsKey(); bool RefreshGraphicsKey();
bool RefreshComputeKey(); bool RefreshComputeKey();
void RefreshDepthClampRange();
void DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage, size_t perm_idx, void DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage, size_t perm_idx,
std::string_view ext); std::string_view ext);
std::optional<std::vector<u32>> GetShaderPatch(u64 hash, Shader::Stage stage, size_t perm_idx, std::optional<std::vector<u32>> GetShaderPatch(u64 hash, Shader::Stage stage, size_t perm_idx,

View File

@ -503,9 +503,13 @@ bool Rasterizer::IsComputeMetaClear(const Pipeline* pipeline) {
return false; return false;
} }
// Most of the time when a metadata is updated with a shader it gets cleared. It means
// we can skip the whole dispatch and update the tracked state instead. Also, it is not
// intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we
// will need its full emulation anyways.
const auto& info = pipeline->GetStage(Shader::LogicalStage::Compute); const auto& info = pipeline->GetStage(Shader::LogicalStage::Compute);
// Assume if a shader reads and writes metas at the same time, it is a copy shader. // Assume if a shader reads metadata, it is a copy shader.
for (const auto& desc : info.buffers) { for (const auto& desc : info.buffers) {
const VAddr address = desc.GetSharp(info).base_address; const VAddr address = desc.GetSharp(info).base_address;
if (!desc.IsSpecial() && !desc.is_written && texture_cache.IsMeta(address)) { if (!desc.IsSpecial() && !desc.is_written && texture_cache.IsMeta(address)) {
@ -513,10 +517,15 @@ bool Rasterizer::IsComputeMetaClear(const Pipeline* pipeline) {
} }
} }
// Most of the time when a metadata is updated with a shader it gets cleared. It means // Metadata surfaces are tiled and thus need address calculation to be written properly.
// we can skip the whole dispatch and update the tracked state instead. Also, it is not // If a shader wants to encode HTILE, for example, from a depth image it will have to compute
// intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we // proper tile address from dispatch invocation id. This address calculation contains an xor
// will need its full emulation anyways. // operation so use it as a heuristic for metadata writes that are probably not clears.
if (info.has_bitwise_xor) {
return false;
}
// Assume if a shader writes metadata without address calculation, it is a clear shader.
for (const auto& desc : info.buffers) { for (const auto& desc : info.buffers) {
const VAddr address = desc.GetSharp(info).base_address; const VAddr address = desc.GetSharp(info).base_address;
if (!desc.IsSpecial() && desc.is_written && texture_cache.ClearMeta(address)) { if (!desc.IsSpecial() && desc.is_written && texture_cache.ClearMeta(address)) {

View File

@ -27,10 +27,9 @@ enum ImageFlagBits : u32 {
CpuDirty = 1 << 1, ///< Contents have been modified from the CPU CpuDirty = 1 << 1, ///< Contents have been modified from the CPU
GpuDirty = 1 << 2, ///< Contents have been modified from the GPU (valid data in buffer cache) GpuDirty = 1 << 2, ///< Contents have been modified from the GPU (valid data in buffer cache)
Dirty = MaybeCpuDirty | CpuDirty | GpuDirty, Dirty = MaybeCpuDirty | CpuDirty | GpuDirty,
GpuModified = 1 << 3, ///< Contents have been modified from the GPU GpuModified = 1 << 3, ///< Contents have been modified from the GPU
Registered = 1 << 6, ///< True when the image is registered Registered = 1 << 6, ///< True when the image is registered
Picked = 1 << 7, ///< Temporary flag to mark the image as picked Picked = 1 << 7, ///< Temporary flag to mark the image as picked
MetaRegistered = 1 << 8, ///< True when metadata for this surface is known and registered
}; };
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)

View File

@ -508,20 +508,16 @@ ImageView& TextureCache::FindRenderTarget(BaseDesc& desc) {
UpdateImage(image_id); UpdateImage(image_id);
// Register meta data for this color buffer // Register meta data for this color buffer
if (!(image.flags & ImageFlagBits::MetaRegistered)) { if (desc.info.meta_info.cmask_addr) {
if (desc.info.meta_info.cmask_addr) { surface_metas.emplace(desc.info.meta_info.cmask_addr,
surface_metas.emplace(desc.info.meta_info.cmask_addr, MetaDataInfo{.type = MetaDataInfo::Type::CMask});
MetaDataInfo{.type = MetaDataInfo::Type::CMask}); image.info.meta_info.cmask_addr = desc.info.meta_info.cmask_addr;
image.info.meta_info.cmask_addr = desc.info.meta_info.cmask_addr; }
image.flags |= ImageFlagBits::MetaRegistered;
}
if (desc.info.meta_info.fmask_addr) { if (desc.info.meta_info.fmask_addr) {
surface_metas.emplace(desc.info.meta_info.fmask_addr, surface_metas.emplace(desc.info.meta_info.fmask_addr,
MetaDataInfo{.type = MetaDataInfo::Type::FMask}); MetaDataInfo{.type = MetaDataInfo::Type::FMask});
image.info.meta_info.fmask_addr = desc.info.meta_info.fmask_addr; image.info.meta_info.fmask_addr = desc.info.meta_info.fmask_addr;
image.flags |= ImageFlagBits::MetaRegistered;
}
} }
return RegisterImageView(image_id, desc.view_info); return RegisterImageView(image_id, desc.view_info);
@ -536,15 +532,11 @@ ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) {
UpdateImage(image_id); UpdateImage(image_id);
// Register meta data for this depth buffer // Register meta data for this depth buffer
if (!(image.flags & ImageFlagBits::MetaRegistered)) { if (desc.info.meta_info.htile_addr) {
if (desc.info.meta_info.htile_addr) { surface_metas.emplace(desc.info.meta_info.htile_addr,
surface_metas.emplace( MetaDataInfo{.type = MetaDataInfo::Type::HTile,
desc.info.meta_info.htile_addr, .clear_mask = image.info.meta_info.htile_clear_mask});
MetaDataInfo{.type = MetaDataInfo::Type::HTile, image.info.meta_info.htile_addr = desc.info.meta_info.htile_addr;
.clear_mask = image.info.meta_info.htile_clear_mask});
image.info.meta_info.htile_addr = desc.info.meta_info.htile_addr;
image.flags |= ImageFlagBits::MetaRegistered;
}
} }
// If there is a stencil attachment, link depth and stencil. // If there is a stencil attachment, link depth and stencil.

View File

@ -161,10 +161,12 @@ public:
/// Registers an image view for provided image /// Registers an image view for provided image
ImageView& RegisterImageView(ImageId image_id, const ImageViewInfo& view_info); ImageView& RegisterImageView(ImageId image_id, const ImageViewInfo& view_info);
/// Returns true if the specified address is a metadata surface.
bool IsMeta(VAddr address) const { bool IsMeta(VAddr address) const {
return surface_metas.contains(address); return surface_metas.contains(address);
} }
/// Returns true if a slice of the specified metadata surface has been cleared.
bool IsMetaCleared(VAddr address, u32 slice) const { bool IsMetaCleared(VAddr address, u32 slice) const {
const auto& it = surface_metas.find(address); const auto& it = surface_metas.find(address);
if (it != surface_metas.end()) { if (it != surface_metas.end()) {
@ -173,6 +175,7 @@ public:
return false; return false;
} }
/// Clears all slices of the specified metadata surface.
bool ClearMeta(VAddr address) { bool ClearMeta(VAddr address) {
auto it = surface_metas.find(address); auto it = surface_metas.find(address);
if (it != surface_metas.end()) { if (it != surface_metas.end()) {
@ -182,6 +185,7 @@ public:
return false; return false;
} }
/// Updates the state of a slice of the specified metadata surface.
bool TouchMeta(VAddr address, u32 slice, bool is_clear) { bool TouchMeta(VAddr address, u32 slice, bool is_clear) {
auto it = surface_metas.find(address); auto it = surface_metas.find(address);
if (it != surface_metas.end()) { if (it != surface_metas.end()) {