mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-12-10 05:38:49 +00:00
shader_recompiler: Improve shader exports accuracy (part 1) (#3447)
* video_core: support for RT layer outputs - support for RT layer outputs - refactor for handling of export attributes - move output->attribute mapping to a separate header * export: Rework render target exports - Centralize all code related to MRT exports into a single function to make it easier to follow - Apply swizzle to output RGBA colors instead of the render target channel. This fixes swizzles on formats with < 4 channels For example with render target format R8_UNORM and COMP_SWAP ALT_REV the previous code would output frag_color.a = color.r; instead of frag_color.r = color.a; which would result in incorrect output in some cases * vk_pipeline_cache: Apply swizzle to write masks --------- Co-authored-by: polyproxy <47796739+polybiusproxy@users.noreply.github.com>
This commit is contained in:
@@ -272,6 +272,9 @@ void SetupCapabilities(const Info& info, const Profile& profile, const RuntimeIn
|
|||||||
if (info.has_image_query) {
|
if (info.has_image_query) {
|
||||||
ctx.AddCapability(spv::Capability::ImageQuery);
|
ctx.AddCapability(spv::Capability::ImageQuery);
|
||||||
}
|
}
|
||||||
|
if (info.has_layer_output) {
|
||||||
|
ctx.AddCapability(spv::Capability::ShaderLayer);
|
||||||
|
}
|
||||||
if ((info.uses_image_atomic_float_min_max && profile.supports_image_fp32_atomic_min_max) ||
|
if ((info.uses_image_atomic_float_min_max && profile.supports_image_fp32_atomic_min_max) ||
|
||||||
(info.uses_buffer_atomic_float_min_max && profile.supports_buffer_fp32_atomic_min_max)) {
|
(info.uses_buffer_atomic_float_min_max && profile.supports_buffer_fp32_atomic_min_max)) {
|
||||||
ctx.AddExtension("SPV_EXT_shader_atomic_float_min_max");
|
ctx.AddExtension("SPV_EXT_shader_atomic_float_min_max");
|
||||||
|
|||||||
@@ -16,39 +16,6 @@
|
|||||||
namespace Shader::Backend::SPIRV {
|
namespace Shader::Backend::SPIRV {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) {
|
|
||||||
switch (output) {
|
|
||||||
case VsOutput::ClipDist0:
|
|
||||||
case VsOutput::ClipDist1:
|
|
||||||
case VsOutput::ClipDist2:
|
|
||||||
case VsOutput::ClipDist3:
|
|
||||||
case VsOutput::ClipDist4:
|
|
||||||
case VsOutput::ClipDist5:
|
|
||||||
case VsOutput::ClipDist6:
|
|
||||||
case VsOutput::ClipDist7: {
|
|
||||||
const u32 index = u32(output) - u32(VsOutput::ClipDist0);
|
|
||||||
const Id clip_num{ctx.ConstU32(index)};
|
|
||||||
ASSERT_MSG(Sirit::ValidId(ctx.clip_distances), "Clip distance used but not defined");
|
|
||||||
return ctx.OpAccessChain(ctx.output_f32, ctx.clip_distances, clip_num);
|
|
||||||
}
|
|
||||||
case VsOutput::CullDist0:
|
|
||||||
case VsOutput::CullDist1:
|
|
||||||
case VsOutput::CullDist2:
|
|
||||||
case VsOutput::CullDist3:
|
|
||||||
case VsOutput::CullDist4:
|
|
||||||
case VsOutput::CullDist5:
|
|
||||||
case VsOutput::CullDist6:
|
|
||||||
case VsOutput::CullDist7: {
|
|
||||||
const u32 index = u32(output) - u32(VsOutput::CullDist0);
|
|
||||||
const Id cull_num{ctx.ConstU32(index)};
|
|
||||||
ASSERT_MSG(Sirit::ValidId(ctx.cull_distances), "Cull distance used but not defined");
|
|
||||||
return ctx.OpAccessChain(ctx.output_f32, ctx.cull_distances, cull_num);
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
UNREACHABLE_MSG("Vertex output {}", u32(output));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
|
Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
|
||||||
if (IR::IsParam(attr)) {
|
if (IR::IsParam(attr)) {
|
||||||
const u32 attr_index{u32(attr) - u32(IR::Attribute::Param0)};
|
const u32 attr_index{u32(attr) - u32(IR::Attribute::Param0)};
|
||||||
@@ -76,15 +43,14 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
switch (attr) {
|
switch (attr) {
|
||||||
case IR::Attribute::Position0: {
|
case IR::Attribute::Position0:
|
||||||
return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, ctx.ConstU32(element));
|
return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, ctx.ConstU32(element));
|
||||||
}
|
case IR::Attribute::ClipDistance:
|
||||||
case IR::Attribute::Position1:
|
return ctx.OpAccessChain(ctx.output_f32, ctx.clip_distances, ctx.ConstU32(element));
|
||||||
case IR::Attribute::Position2:
|
case IR::Attribute::CullDistance:
|
||||||
case IR::Attribute::Position3: {
|
return ctx.OpAccessChain(ctx.output_f32, ctx.cull_distances, ctx.ConstU32(element));
|
||||||
const u32 index = u32(attr) - u32(IR::Attribute::Position1);
|
case IR::Attribute::RenderTargetId:
|
||||||
return VsOutputAttrPointer(ctx, ctx.runtime_info.vs_info.outputs[index][element]);
|
return ctx.output_layer;
|
||||||
}
|
|
||||||
case IR::Attribute::Depth:
|
case IR::Attribute::Depth:
|
||||||
return ctx.frag_depth;
|
return ctx.frag_depth;
|
||||||
default:
|
default:
|
||||||
@@ -105,11 +71,13 @@ std::pair<Id, bool> OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr
|
|||||||
}
|
}
|
||||||
switch (attr) {
|
switch (attr) {
|
||||||
case IR::Attribute::Position0:
|
case IR::Attribute::Position0:
|
||||||
case IR::Attribute::Position1:
|
case IR::Attribute::ClipDistance:
|
||||||
case IR::Attribute::Position2:
|
case IR::Attribute::CullDistance:
|
||||||
case IR::Attribute::Position3:
|
|
||||||
case IR::Attribute::Depth:
|
case IR::Attribute::Depth:
|
||||||
return {ctx.F32[1], false};
|
return {ctx.F32[1], false};
|
||||||
|
case IR::Attribute::RenderTargetId:
|
||||||
|
case IR::Attribute::ViewportId:
|
||||||
|
return {ctx.S32[1], true};
|
||||||
default:
|
default:
|
||||||
UNREACHABLE_MSG("Write attribute {}", attr);
|
UNREACHABLE_MSG("Write attribute {}", attr);
|
||||||
}
|
}
|
||||||
@@ -270,14 +238,10 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element) {
|
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element) {
|
||||||
if (attr == IR::Attribute::Position1) {
|
|
||||||
LOG_WARNING(Render_Vulkan, "Ignoring pos1 export");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const Id pointer{OutputAttrPointer(ctx, attr, element)};
|
const Id pointer{OutputAttrPointer(ctx, attr, element)};
|
||||||
const auto component_type{OutputAttrComponentType(ctx, attr)};
|
const auto [component_type, is_integer]{OutputAttrComponentType(ctx, attr)};
|
||||||
if (component_type.second) {
|
if (is_integer) {
|
||||||
ctx.OpStore(pointer, ctx.OpBitcast(component_type.first, value));
|
ctx.OpStore(pointer, ctx.OpBitcast(component_type, value));
|
||||||
} else {
|
} else {
|
||||||
ctx.OpStore(pointer, value);
|
ctx.OpStore(pointer, value);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -539,24 +539,26 @@ void EmitContext::DefineInputs() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmitContext::DefineVertexBlock() {
|
||||||
|
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
||||||
|
if (info.stores.GetAny(IR::Attribute::ClipDistance)) {
|
||||||
|
clip_distances = DefineVariable(TypeArray(F32[1], ConstU32(8U)), spv::BuiltIn::ClipDistance,
|
||||||
|
spv::StorageClass::Output);
|
||||||
|
}
|
||||||
|
if (info.stores.GetAny(IR::Attribute::CullDistance)) {
|
||||||
|
cull_distances = DefineVariable(TypeArray(F32[1], ConstU32(8U)), spv::BuiltIn::CullDistance,
|
||||||
|
spv::StorageClass::Output);
|
||||||
|
}
|
||||||
|
if (info.stores.GetAny(IR::Attribute::RenderTargetId)) {
|
||||||
|
output_layer = DefineVariable(S32[1], spv::BuiltIn::Layer, spv::StorageClass::Output);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void EmitContext::DefineOutputs() {
|
void EmitContext::DefineOutputs() {
|
||||||
switch (l_stage) {
|
switch (l_stage) {
|
||||||
case LogicalStage::Vertex: {
|
case LogicalStage::Vertex: {
|
||||||
// No point in defining builtin outputs (i.e. position) unless next stage is fragment?
|
DefineVertexBlock();
|
||||||
// Might cause problems linking with tcs
|
if (stage == Shader::Stage::Local) {
|
||||||
|
|
||||||
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
|
||||||
const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
|
|
||||||
info.stores.Get(IR::Attribute::Position2) ||
|
|
||||||
info.stores.Get(IR::Attribute::Position3);
|
|
||||||
if (has_extra_pos_stores) {
|
|
||||||
const Id type{TypeArray(F32[1], ConstU32(8U))};
|
|
||||||
clip_distances =
|
|
||||||
DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output);
|
|
||||||
cull_distances =
|
|
||||||
DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output);
|
|
||||||
}
|
|
||||||
if (stage == Stage::Local) {
|
|
||||||
const u32 num_attrs = Common::AlignUp(runtime_info.ls_info.ls_stride, 16) >> 4;
|
const u32 num_attrs = Common::AlignUp(runtime_info.ls_info.ls_stride, 16) >> 4;
|
||||||
if (num_attrs > 0) {
|
if (num_attrs > 0) {
|
||||||
const Id type{TypeArray(F32[4], ConstU32(num_attrs))};
|
const Id type{TypeArray(F32[4], ConstU32(num_attrs))};
|
||||||
@@ -615,17 +617,7 @@ void EmitContext::DefineOutputs() {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case LogicalStage::TessellationEval: {
|
case LogicalStage::TessellationEval: {
|
||||||
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
DefineVertexBlock();
|
||||||
const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
|
|
||||||
info.stores.Get(IR::Attribute::Position2) ||
|
|
||||||
info.stores.Get(IR::Attribute::Position3);
|
|
||||||
if (has_extra_pos_stores) {
|
|
||||||
const Id type{TypeArray(F32[1], ConstU32(8U))};
|
|
||||||
clip_distances =
|
|
||||||
DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output);
|
|
||||||
cull_distances =
|
|
||||||
DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output);
|
|
||||||
}
|
|
||||||
for (u32 i = 0; i < IR::NumParams; i++) {
|
for (u32 i = 0; i < IR::NumParams; i++) {
|
||||||
const IR::Attribute param{IR::Attribute::Param0 + i};
|
const IR::Attribute param{IR::Attribute::Param0 + i};
|
||||||
if (!info.stores.GetAny(param)) {
|
if (!info.stores.GetAny(param)) {
|
||||||
@@ -665,8 +657,7 @@ void EmitContext::DefineOutputs() {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case LogicalStage::Geometry: {
|
case LogicalStage::Geometry: {
|
||||||
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
DefineVertexBlock();
|
||||||
|
|
||||||
for (u32 attr_id = 0; attr_id < info.gs_copy_data.num_attrs; attr_id++) {
|
for (u32 attr_id = 0; attr_id < info.gs_copy_data.num_attrs; attr_id++) {
|
||||||
const Id id{DefineOutput(F32[4], attr_id)};
|
const Id id{DefineOutput(F32[4], attr_id)};
|
||||||
Name(id, fmt::format("out_attr{}", attr_id));
|
Name(id, fmt::format("out_attr{}", attr_id));
|
||||||
|
|||||||
@@ -245,6 +245,7 @@ public:
|
|||||||
boost::container::small_vector<Id, 16> interfaces;
|
boost::container::small_vector<Id, 16> interfaces;
|
||||||
|
|
||||||
Id output_position{};
|
Id output_position{};
|
||||||
|
Id output_layer{};
|
||||||
Id primitive_id{};
|
Id primitive_id{};
|
||||||
Id vertex_index{};
|
Id vertex_index{};
|
||||||
Id instance_id{};
|
Id instance_id{};
|
||||||
@@ -388,6 +389,7 @@ private:
|
|||||||
void DefineArithmeticTypes();
|
void DefineArithmeticTypes();
|
||||||
void DefineInterfaces();
|
void DefineInterfaces();
|
||||||
void DefineInputs();
|
void DefineInputs();
|
||||||
|
void DefineVertexBlock();
|
||||||
void DefineOutputs();
|
void DefineOutputs();
|
||||||
void DefinePushDataBlock();
|
void DefinePushDataBlock();
|
||||||
void DefineBuffers();
|
void DefineBuffers();
|
||||||
|
|||||||
@@ -2,134 +2,113 @@
|
|||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include "shader_recompiler/frontend/translate/translate.h"
|
#include "shader_recompiler/frontend/translate/translate.h"
|
||||||
|
#include "shader_recompiler/ir/position.h"
|
||||||
#include "shader_recompiler/ir/reinterpret.h"
|
#include "shader_recompiler/ir/reinterpret.h"
|
||||||
#include "shader_recompiler/runtime_info.h"
|
#include "shader_recompiler/runtime_info.h"
|
||||||
|
|
||||||
namespace Shader::Gcn {
|
namespace Shader::Gcn {
|
||||||
|
|
||||||
u32 SwizzleMrtComponent(const PsColorBuffer& color_buffer, u32 comp) {
|
static AmdGpu::NumberFormat NumberFormatCompressed(
|
||||||
const auto [r, g, b, a] = color_buffer.swizzle;
|
AmdGpu::Liverpool::ShaderExportFormat export_format) {
|
||||||
const std::array swizzle_array = {r, g, b, a};
|
switch (export_format) {
|
||||||
const auto swizzled_comp_type = static_cast<u32>(swizzle_array[comp]);
|
|
||||||
constexpr auto min_comp_type = static_cast<u32>(AmdGpu::CompSwizzle::Red);
|
|
||||||
return swizzled_comp_type >= min_comp_type ? swizzled_comp_type - min_comp_type : comp;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Translator::ExportMrtValue(IR::Attribute attribute, u32 comp, const IR::F32& value,
|
|
||||||
const PsColorBuffer& color_buffer) {
|
|
||||||
auto converted = ApplyWriteNumberConversion(ir, value, color_buffer.num_conversion);
|
|
||||||
if (color_buffer.needs_unorm_fixup) {
|
|
||||||
// FIXME: Fix-up for GPUs where float-to-unorm rounding is off from expected.
|
|
||||||
converted = ir.FPSub(converted, ir.Imm32(1.f / 127500.f));
|
|
||||||
}
|
|
||||||
ir.SetAttribute(attribute, converted, comp);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Translator::ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value) {
|
|
||||||
u32 color_buffer_idx =
|
|
||||||
static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::RenderTarget0);
|
|
||||||
if (runtime_info.fs_info.dual_source_blending && attribute == IR::Attribute::RenderTarget1) {
|
|
||||||
color_buffer_idx = 0;
|
|
||||||
}
|
|
||||||
const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx];
|
|
||||||
|
|
||||||
AmdGpu::NumberFormat num_format;
|
|
||||||
switch (color_buffer.export_format) {
|
|
||||||
case AmdGpu::Liverpool::ShaderExportFormat::Zero:
|
|
||||||
// No export
|
|
||||||
return;
|
|
||||||
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_FP16:
|
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_FP16:
|
||||||
num_format = AmdGpu::NumberFormat::Float;
|
return AmdGpu::NumberFormat::Float;
|
||||||
break;
|
|
||||||
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_UNORM16:
|
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_UNORM16:
|
||||||
num_format = AmdGpu::NumberFormat::Unorm;
|
return AmdGpu::NumberFormat::Unorm;
|
||||||
break;
|
|
||||||
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SNORM16:
|
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SNORM16:
|
||||||
num_format = AmdGpu::NumberFormat::Snorm;
|
return AmdGpu::NumberFormat::Snorm;
|
||||||
break;
|
|
||||||
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_UINT16:
|
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_UINT16:
|
||||||
num_format = AmdGpu::NumberFormat::Uint;
|
return AmdGpu::NumberFormat::Uint;
|
||||||
break;
|
|
||||||
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SINT16:
|
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SINT16:
|
||||||
num_format = AmdGpu::NumberFormat::Sint;
|
return AmdGpu::NumberFormat::Sint;
|
||||||
break;
|
|
||||||
default:
|
default:
|
||||||
UNREACHABLE_MSG("Unimplemented compressed MRT export format {}",
|
UNREACHABLE_MSG("Unimplemented compressed MRT export format {}",
|
||||||
static_cast<u32>(color_buffer.export_format));
|
static_cast<u32>(export_format));
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto unpacked_value = ir.Unpack2x16(num_format, value);
|
|
||||||
const IR::F32 r = IR::F32{ir.CompositeExtract(unpacked_value, 0)};
|
|
||||||
const IR::F32 g = IR::F32{ir.CompositeExtract(unpacked_value, 1)};
|
|
||||||
|
|
||||||
const auto swizzled_r = SwizzleMrtComponent(color_buffer, idx * 2);
|
|
||||||
const auto swizzled_g = SwizzleMrtComponent(color_buffer, idx * 2 + 1);
|
|
||||||
|
|
||||||
ExportMrtValue(attribute, swizzled_r, r, color_buffer);
|
|
||||||
ExportMrtValue(attribute, swizzled_g, g, color_buffer);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::ExportMrtUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value) {
|
static u32 MaskFromExportFormat(u8 mask, AmdGpu::Liverpool::ShaderExportFormat export_format) {
|
||||||
u32 color_buffer_idx =
|
switch (export_format) {
|
||||||
static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::RenderTarget0);
|
|
||||||
if (runtime_info.fs_info.dual_source_blending && attribute == IR::Attribute::RenderTarget1) {
|
|
||||||
color_buffer_idx = 0;
|
|
||||||
}
|
|
||||||
const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx];
|
|
||||||
const auto swizzled_comp = SwizzleMrtComponent(color_buffer, comp);
|
|
||||||
|
|
||||||
switch (color_buffer.export_format) {
|
|
||||||
case AmdGpu::Liverpool::ShaderExportFormat::Zero:
|
|
||||||
// No export
|
|
||||||
return;
|
|
||||||
case AmdGpu::Liverpool::ShaderExportFormat::R_32:
|
case AmdGpu::Liverpool::ShaderExportFormat::R_32:
|
||||||
// Red only
|
// Red only
|
||||||
if (swizzled_comp != 0) {
|
return mask & 1;
|
||||||
return;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case AmdGpu::Liverpool::ShaderExportFormat::GR_32:
|
case AmdGpu::Liverpool::ShaderExportFormat::GR_32:
|
||||||
// Red and Green only
|
// Red and Green only
|
||||||
if (swizzled_comp != 0 && swizzled_comp != 1) {
|
return mask & 3;
|
||||||
return;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case AmdGpu::Liverpool::ShaderExportFormat::AR_32:
|
case AmdGpu::Liverpool::ShaderExportFormat::AR_32:
|
||||||
// Red and Alpha only
|
// Red and Alpha only
|
||||||
if (swizzled_comp != 0 && swizzled_comp != 3) {
|
return mask & 9;
|
||||||
return;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_32:
|
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_32:
|
||||||
// All components
|
// All components
|
||||||
break;
|
return mask;
|
||||||
default:
|
default:
|
||||||
UNREACHABLE_MSG("Unimplemented uncompressed MRT export format {}",
|
UNREACHABLE_MSG("Unimplemented uncompressed MRT export format {}",
|
||||||
static_cast<u32>(color_buffer.export_format));
|
static_cast<u32>(export_format));
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
ExportMrtValue(attribute, swizzled_comp, value, color_buffer);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::ExportCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value) {
|
void Translator::ExportRenderTarget(const GcnInst& inst) {
|
||||||
if (IsMrt(attribute)) {
|
const auto& exp = inst.control.exp;
|
||||||
ExportMrtCompressed(attribute, idx, value);
|
const IR::Attribute mrt{exp.target};
|
||||||
return;
|
info.mrt_mask |= 1u << static_cast<u8>(mrt);
|
||||||
}
|
|
||||||
const IR::Value unpacked_value = ir.Unpack2x16(AmdGpu::NumberFormat::Float, value);
|
|
||||||
const IR::F32 r = IR::F32{ir.CompositeExtract(unpacked_value, 0)};
|
|
||||||
const IR::F32 g = IR::F32{ir.CompositeExtract(unpacked_value, 1)};
|
|
||||||
ir.SetAttribute(attribute, r, idx * 2);
|
|
||||||
ir.SetAttribute(attribute, g, idx * 2 + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Translator::ExportUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value) {
|
// Dual source blending uses MRT1 for exporting src1
|
||||||
if (IsMrt(attribute)) {
|
u32 color_buffer_idx = static_cast<u32>(mrt) - static_cast<u32>(IR::Attribute::RenderTarget0);
|
||||||
ExportMrtUncompressed(attribute, comp, value);
|
if (runtime_info.fs_info.dual_source_blending && mrt == IR::Attribute::RenderTarget1) {
|
||||||
|
color_buffer_idx = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx];
|
||||||
|
if (color_buffer.export_format == AmdGpu::Liverpool::ShaderExportFormat::Zero || exp.en == 0) {
|
||||||
|
// No export
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
ir.SetAttribute(attribute, value, comp);
|
|
||||||
|
std::array<IR::F32, 4> components{};
|
||||||
|
if (exp.compr) {
|
||||||
|
// Components are float16 packed into a VGPR
|
||||||
|
const auto num_format = NumberFormatCompressed(color_buffer.export_format);
|
||||||
|
// Export R, G
|
||||||
|
if (exp.en & 1) {
|
||||||
|
const IR::Value unpacked_value =
|
||||||
|
ir.Unpack2x16(num_format, ir.GetVectorReg(IR::VectorReg(inst.src[0].code)));
|
||||||
|
components[0] = IR::F32{ir.CompositeExtract(unpacked_value, 0)};
|
||||||
|
components[1] = IR::F32{ir.CompositeExtract(unpacked_value, 1)};
|
||||||
|
}
|
||||||
|
// Export B, A
|
||||||
|
if ((exp.en >> 2) & 1) {
|
||||||
|
const IR::Value unpacked_value =
|
||||||
|
ir.Unpack2x16(num_format, ir.GetVectorReg(IR::VectorReg(inst.src[1].code)));
|
||||||
|
components[2] = IR::F32{ir.CompositeExtract(unpacked_value, 0)};
|
||||||
|
components[3] = IR::F32{ir.CompositeExtract(unpacked_value, 1)};
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Components are float32 into separate VGPRS
|
||||||
|
u32 mask = MaskFromExportFormat(exp.en, color_buffer.export_format);
|
||||||
|
for (u32 i = 0; i < 4; i++, mask >>= 1) {
|
||||||
|
if ((mask & 1) == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
components[i] = ir.GetVectorReg<IR::F32>(IR::VectorReg(inst.src[i].code));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Swizzle components and export
|
||||||
|
for (u32 i = 0; i < 4; ++i) {
|
||||||
|
const u32 comp_swizzle = static_cast<u32>(color_buffer.swizzle.array[i]);
|
||||||
|
constexpr u32 min_swizzle = static_cast<u32>(AmdGpu::CompSwizzle::Red);
|
||||||
|
const auto swizzled_comp =
|
||||||
|
components[comp_swizzle >= min_swizzle ? comp_swizzle - min_swizzle : i];
|
||||||
|
if (swizzled_comp.IsEmpty()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto converted = ApplyWriteNumberConversion(ir, swizzled_comp, color_buffer.num_conversion);
|
||||||
|
if (color_buffer.needs_unorm_fixup) {
|
||||||
|
// FIXME: Fix-up for GPUs where float-to-unorm rounding is off from expected.
|
||||||
|
converted = ir.FPSub(converted, ir.Imm32(1.f / 127500.f));
|
||||||
|
}
|
||||||
|
ir.SetAttribute(mrt, converted, i);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::EmitExport(const GcnInst& inst) {
|
void Translator::EmitExport(const GcnInst& inst) {
|
||||||
@@ -139,40 +118,27 @@ void Translator::EmitExport(const GcnInst& inst) {
|
|||||||
|
|
||||||
const auto& exp = inst.control.exp;
|
const auto& exp = inst.control.exp;
|
||||||
const IR::Attribute attrib{exp.target};
|
const IR::Attribute attrib{exp.target};
|
||||||
|
if (IR::IsMrt(attrib)) {
|
||||||
|
return ExportRenderTarget(inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT_MSG(!exp.compr, "Compressed exports only supported for render targets");
|
||||||
if (attrib == IR::Attribute::Depth && exp.en != 0 && exp.en != 1) {
|
if (attrib == IR::Attribute::Depth && exp.en != 0 && exp.en != 1) {
|
||||||
LOG_WARNING(Render_Vulkan, "Unsupported depth export");
|
LOG_WARNING(Render_Vulkan, "Unsupported depth export");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::array vsrc = {
|
u32 mask = exp.en;
|
||||||
IR::VectorReg(inst.src[0].code),
|
for (u32 i = 0; i < 4; i++, mask >>= 1) {
|
||||||
IR::VectorReg(inst.src[1].code),
|
if ((mask & 1) == 0) {
|
||||||
IR::VectorReg(inst.src[2].code),
|
continue;
|
||||||
IR::VectorReg(inst.src[3].code),
|
|
||||||
};
|
|
||||||
|
|
||||||
// Components are float16 packed into a VGPR
|
|
||||||
if (exp.compr) {
|
|
||||||
// Export R, G
|
|
||||||
if (exp.en & 1) {
|
|
||||||
ExportCompressed(attrib, 0, ir.GetVectorReg<IR::U32>(vsrc[0]));
|
|
||||||
}
|
}
|
||||||
// Export B, A
|
const auto value = ir.GetVectorReg<IR::F32>(IR::VectorReg(inst.src[i].code));
|
||||||
if ((exp.en >> 2) & 1) {
|
if (IsPosition(attrib)) {
|
||||||
ExportCompressed(attrib, 1, ir.GetVectorReg<IR::U32>(vsrc[1]));
|
IR::ExportPosition(ir, runtime_info.vs_info, attrib, i, value);
|
||||||
|
} else {
|
||||||
|
ir.SetAttribute(attrib, value, i);
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
// Components are float32 into separate VGPRS
|
|
||||||
u32 mask = exp.en;
|
|
||||||
for (u32 i = 0; i < 4; i++, mask >>= 1) {
|
|
||||||
if ((mask & 1) == 0) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
ExportUncompressed(attrib, i, ir.GetVectorReg<IR::F32>(vsrc[i]));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (IR::IsMrt(attrib)) {
|
|
||||||
info.mrt_mask |= 1u << u8(attrib);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -317,13 +317,7 @@ private:
|
|||||||
IR::F32 SelectCubeResult(const IR::F32& x, const IR::F32& y, const IR::F32& z,
|
IR::F32 SelectCubeResult(const IR::F32& x, const IR::F32& y, const IR::F32& z,
|
||||||
const IR::F32& x_res, const IR::F32& y_res, const IR::F32& z_res);
|
const IR::F32& x_res, const IR::F32& y_res, const IR::F32& z_res);
|
||||||
|
|
||||||
void ExportMrtValue(IR::Attribute attribute, u32 comp, const IR::F32& value,
|
void ExportRenderTarget(const GcnInst& inst);
|
||||||
const PsColorBuffer& color_buffer);
|
|
||||||
void ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value);
|
|
||||||
void ExportMrtUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value);
|
|
||||||
void ExportCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value);
|
|
||||||
void ExportUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value);
|
|
||||||
|
|
||||||
void LogMissingOpcode(const GcnInst& inst);
|
void LogMissingOpcode(const GcnInst& inst);
|
||||||
|
|
||||||
IR::VectorReg GetScratchVgpr(u32 offset);
|
IR::VectorReg GetScratchVgpr(u32 offset);
|
||||||
|
|||||||
@@ -210,6 +210,7 @@ struct Info {
|
|||||||
bool has_bitwise_xor{};
|
bool has_bitwise_xor{};
|
||||||
bool has_image_gather{};
|
bool has_image_gather{};
|
||||||
bool has_image_query{};
|
bool has_image_query{};
|
||||||
|
bool has_layer_output{};
|
||||||
bool uses_buffer_atomic_float_min_max{};
|
bool uses_buffer_atomic_float_min_max{};
|
||||||
bool uses_image_atomic_float_min_max{};
|
bool uses_image_atomic_float_min_max{};
|
||||||
bool uses_lane_id{};
|
bool uses_lane_id{};
|
||||||
|
|||||||
@@ -4,6 +4,7 @@
|
|||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "shader_recompiler/ir/ir_emitter.h"
|
#include "shader_recompiler/ir/ir_emitter.h"
|
||||||
#include "shader_recompiler/ir/opcodes.h"
|
#include "shader_recompiler/ir/opcodes.h"
|
||||||
|
#include "shader_recompiler/ir/position.h"
|
||||||
#include "shader_recompiler/ir/program.h"
|
#include "shader_recompiler/ir/program.h"
|
||||||
#include "shader_recompiler/ir/reg.h"
|
#include "shader_recompiler/ir/reg.h"
|
||||||
#include "shader_recompiler/recompiler.h"
|
#include "shader_recompiler/recompiler.h"
|
||||||
@@ -142,11 +143,12 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
|||||||
ASSERT(it != info.gs_copy_data.attr_map.cend());
|
ASSERT(it != info.gs_copy_data.attr_map.cend());
|
||||||
const auto& [attr, comp] = it->second;
|
const auto& [attr, comp] = it->second;
|
||||||
|
|
||||||
inst.ReplaceOpcode(IR::Opcode::SetAttribute);
|
inst.Invalidate();
|
||||||
inst.ClearArgs();
|
if (IsPosition(attr)) {
|
||||||
inst.SetArg(0, IR::Value{attr});
|
ExportPosition(ir, runtime_info.gs_info, attr, comp, data);
|
||||||
inst.SetArg(1, data);
|
} else {
|
||||||
inst.SetArg(2, ir.Imm32(comp));
|
ir.SetAttribute(attr, data, comp);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
|
|||||||
@@ -160,6 +160,10 @@ void CollectShaderInfoPass(IR::Program& program, const Profile& profile) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (info.stores.GetAny(IR::Attribute::RenderTargetId)) {
|
||||||
|
info.has_layer_output = true;
|
||||||
|
}
|
||||||
|
|
||||||
// In case Flatbuf has not already been bound by IR and is needed
|
// In case Flatbuf has not already been bound by IR and is needed
|
||||||
// to query buffer sizes, bind it now.
|
// to query buffer sizes, bind it now.
|
||||||
if (!profile.supports_robust_buffer_access && !info.uses_dma) {
|
if (!profile.supports_robust_buffer_access && !info.uses_dma) {
|
||||||
|
|||||||
53
src/shader_recompiler/ir/position.h
Normal file
53
src/shader_recompiler/ir/position.h
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "shader_recompiler/ir/ir_emitter.h"
|
||||||
|
#include "shader_recompiler/runtime_info.h"
|
||||||
|
|
||||||
|
namespace Shader::IR {
|
||||||
|
|
||||||
|
/// Maps special position export to builtin attribute stores
|
||||||
|
inline void ExportPosition(IREmitter& ir, const auto& stage, Attribute attribute, u32 comp,
|
||||||
|
const IR::F32& value) {
|
||||||
|
if (attribute == Attribute::Position0) {
|
||||||
|
ir.SetAttribute(attribute, value, comp);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const u32 index = u32(attribute) - u32(Attribute::Position1);
|
||||||
|
const auto output = stage.outputs[index][comp];
|
||||||
|
switch (output) {
|
||||||
|
case Output::ClipDist0:
|
||||||
|
case Output::ClipDist1:
|
||||||
|
case Output::ClipDist2:
|
||||||
|
case Output::ClipDist3:
|
||||||
|
case Output::ClipDist4:
|
||||||
|
case Output::ClipDist5:
|
||||||
|
case Output::ClipDist6:
|
||||||
|
case Output::ClipDist7: {
|
||||||
|
const u32 index = u32(output) - u32(Output::ClipDist0);
|
||||||
|
ir.SetAttribute(IR::Attribute::ClipDistance, value, index);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case Output::CullDist0:
|
||||||
|
case Output::CullDist1:
|
||||||
|
case Output::CullDist2:
|
||||||
|
case Output::CullDist3:
|
||||||
|
case Output::CullDist4:
|
||||||
|
case Output::CullDist5:
|
||||||
|
case Output::CullDist6:
|
||||||
|
case Output::CullDist7: {
|
||||||
|
const u32 index = u32(output) - u32(Output::CullDist0);
|
||||||
|
ir.SetAttribute(IR::Attribute::CullDistance, value, index);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case Output::GsMrtIndex:
|
||||||
|
ir.SetAttribute(IR::Attribute::RenderTargetId, value);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNREACHABLE_MSG("Unhandled output {} on attribute {}", u32(output), u32(attribute));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Shader::IR
|
||||||
@@ -52,7 +52,7 @@ struct ExportRuntimeInfo {
|
|||||||
auto operator<=>(const ExportRuntimeInfo&) const noexcept = default;
|
auto operator<=>(const ExportRuntimeInfo&) const noexcept = default;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class VsOutput : u8 {
|
enum class Output : u8 {
|
||||||
None,
|
None,
|
||||||
PointSprite,
|
PointSprite,
|
||||||
EdgeFlag,
|
EdgeFlag,
|
||||||
@@ -77,11 +77,11 @@ enum class VsOutput : u8 {
|
|||||||
ClipDist6,
|
ClipDist6,
|
||||||
ClipDist7,
|
ClipDist7,
|
||||||
};
|
};
|
||||||
using VsOutputMap = std::array<VsOutput, 4>;
|
using OutputMap = std::array<Output, 4>;
|
||||||
|
|
||||||
struct VertexRuntimeInfo {
|
struct VertexRuntimeInfo {
|
||||||
u32 num_outputs;
|
u32 num_outputs;
|
||||||
std::array<VsOutputMap, 3> outputs;
|
std::array<OutputMap, 3> outputs;
|
||||||
bool emulate_depth_negative_one_to_one{};
|
bool emulate_depth_negative_one_to_one{};
|
||||||
bool clip_disable{};
|
bool clip_disable{};
|
||||||
u32 step_rate_0;
|
u32 step_rate_0;
|
||||||
@@ -145,6 +145,8 @@ struct HullRuntimeInfo {
|
|||||||
static constexpr auto GsMaxOutputStreams = 4u;
|
static constexpr auto GsMaxOutputStreams = 4u;
|
||||||
using GsOutputPrimTypes = std::array<AmdGpu::GsOutputPrimitiveType, GsMaxOutputStreams>;
|
using GsOutputPrimTypes = std::array<AmdGpu::GsOutputPrimitiveType, GsMaxOutputStreams>;
|
||||||
struct GeometryRuntimeInfo {
|
struct GeometryRuntimeInfo {
|
||||||
|
u32 num_outputs;
|
||||||
|
std::array<OutputMap, 3> outputs;
|
||||||
u32 num_invocations{};
|
u32 num_invocations{};
|
||||||
u32 output_vertices{};
|
u32 output_vertices{};
|
||||||
u32 in_vertex_data_size{};
|
u32 in_vertex_data_size{};
|
||||||
@@ -179,7 +181,7 @@ struct PsColorBuffer {
|
|||||||
u32 pad : 20;
|
u32 pad : 20;
|
||||||
AmdGpu::CompMapping swizzle;
|
AmdGpu::CompMapping swizzle;
|
||||||
|
|
||||||
auto operator<=>(const PsColorBuffer&) const noexcept = default;
|
bool operator==(const PsColorBuffer& other) const noexcept = default;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct FragmentRuntimeInfo {
|
struct FragmentRuntimeInfo {
|
||||||
@@ -189,11 +191,11 @@ struct FragmentRuntimeInfo {
|
|||||||
bool is_flat;
|
bool is_flat;
|
||||||
u8 default_value;
|
u8 default_value;
|
||||||
|
|
||||||
[[nodiscard]] bool IsDefault() const {
|
bool IsDefault() const {
|
||||||
return is_default && !is_flat;
|
return is_default && !is_flat;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto operator<=>(const PsInput&) const noexcept = default;
|
bool operator==(const PsInput&) const noexcept = default;
|
||||||
};
|
};
|
||||||
AmdGpu::Liverpool::PsInput en_flags;
|
AmdGpu::Liverpool::PsInput en_flags;
|
||||||
AmdGpu::Liverpool::PsInput addr_flags;
|
AmdGpu::Liverpool::PsInput addr_flags;
|
||||||
|
|||||||
@@ -104,13 +104,18 @@ enum class NumberConversion : u32 {
|
|||||||
Uint32ToUnorm = 6,
|
Uint32ToUnorm = 6,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct CompMapping {
|
union CompMapping {
|
||||||
CompSwizzle r;
|
struct {
|
||||||
CompSwizzle g;
|
CompSwizzle r;
|
||||||
CompSwizzle b;
|
CompSwizzle g;
|
||||||
CompSwizzle a;
|
CompSwizzle b;
|
||||||
|
CompSwizzle a;
|
||||||
|
};
|
||||||
|
std::array<CompSwizzle, 4> array;
|
||||||
|
|
||||||
auto operator<=>(const CompMapping& other) const = default;
|
bool operator==(const CompMapping& other) const {
|
||||||
|
return array == other.array;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
[[nodiscard]] std::array<T, 4> Apply(const std::array<T, 4>& data) const {
|
[[nodiscard]] std::array<T, 4> Apply(const std::array<T, 4>& data) const {
|
||||||
|
|||||||
@@ -412,6 +412,7 @@ bool Instance::CreateDevice() {
|
|||||||
.hostQueryReset = vk12_features.hostQueryReset,
|
.hostQueryReset = vk12_features.hostQueryReset,
|
||||||
.timelineSemaphore = vk12_features.timelineSemaphore,
|
.timelineSemaphore = vk12_features.timelineSemaphore,
|
||||||
.bufferDeviceAddress = vk12_features.bufferDeviceAddress,
|
.bufferDeviceAddress = vk12_features.bufferDeviceAddress,
|
||||||
|
.shaderOutputLayer = vk12_features.shaderOutputLayer,
|
||||||
},
|
},
|
||||||
vk::PhysicalDeviceVulkan13Features{
|
vk::PhysicalDeviceVulkan13Features{
|
||||||
.robustImageAccess = vk13_features.robustImageAccess,
|
.robustImageAccess = vk13_features.robustImageAccess,
|
||||||
|
|||||||
@@ -23,8 +23,8 @@ extern std::unique_ptr<Vulkan::Presenter> presenter;
|
|||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
using Shader::LogicalStage;
|
using Shader::LogicalStage;
|
||||||
|
using Shader::Output;
|
||||||
using Shader::Stage;
|
using Shader::Stage;
|
||||||
using Shader::VsOutput;
|
|
||||||
|
|
||||||
constexpr static auto SpirvVersion1_6 = 0x00010600U;
|
constexpr static auto SpirvVersion1_6 = 0x00010600U;
|
||||||
|
|
||||||
@@ -35,49 +35,55 @@ constexpr static std::array DescriptorHeapSizes = {
|
|||||||
vk::DescriptorPoolSize{vk::DescriptorType::eSampler, 1024},
|
vk::DescriptorPoolSize{vk::DescriptorType::eSampler, 1024},
|
||||||
};
|
};
|
||||||
|
|
||||||
void GatherVertexOutputs(Shader::VertexRuntimeInfo& info,
|
static u32 MapOutputs(std::span<Shader::OutputMap, 3> outputs,
|
||||||
const AmdGpu::Liverpool::VsOutputControl& ctl) {
|
const AmdGpu::Liverpool::VsOutputControl& ctl) {
|
||||||
const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) {
|
u32 num_outputs = 0;
|
||||||
if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None ||
|
|
||||||
w != VsOutput::None) {
|
if (ctl.vs_out_misc_enable) {
|
||||||
info.outputs[info.num_outputs++] = Shader::VsOutputMap{x, y, z, w};
|
auto& misc_vec = outputs[num_outputs++];
|
||||||
}
|
misc_vec[0] = ctl.use_vtx_point_size ? Output::PointSprite : Output::None;
|
||||||
};
|
misc_vec[1] = ctl.use_vtx_edge_flag
|
||||||
// VS_OUT_MISC_VEC
|
? Output::EdgeFlag
|
||||||
add_output(ctl.use_vtx_point_size ? VsOutput::PointSprite : VsOutput::None,
|
: (ctl.use_vtx_gs_cut_flag ? Output::GsCutFlag : Output::None);
|
||||||
ctl.use_vtx_edge_flag
|
misc_vec[2] = ctl.use_vtx_kill_flag
|
||||||
? VsOutput::EdgeFlag
|
? Output::KillFlag
|
||||||
: (ctl.use_vtx_gs_cut_flag ? VsOutput::GsCutFlag : VsOutput::None),
|
: (ctl.use_vtx_render_target_idx ? Output::GsMrtIndex : Output::None);
|
||||||
ctl.use_vtx_kill_flag
|
misc_vec[3] = ctl.use_vtx_viewport_idx ? Output::GsVpIndex : Output::None;
|
||||||
? VsOutput::KillFlag
|
}
|
||||||
: (ctl.use_vtx_render_target_idx ? VsOutput::GsMrtIndex : VsOutput::None),
|
|
||||||
ctl.use_vtx_viewport_idx ? VsOutput::GsVpIndex : VsOutput::None);
|
if (ctl.vs_out_ccdist0_enable) {
|
||||||
// VS_OUT_CCDIST0
|
auto& ccdist0 = outputs[num_outputs++];
|
||||||
add_output(ctl.IsClipDistEnabled(0)
|
ccdist0[0] = ctl.IsClipDistEnabled(0)
|
||||||
? VsOutput::ClipDist0
|
? Output::ClipDist0
|
||||||
: (ctl.IsCullDistEnabled(0) ? VsOutput::CullDist0 : VsOutput::None),
|
: (ctl.IsCullDistEnabled(0) ? Output::CullDist0 : Output::None);
|
||||||
ctl.IsClipDistEnabled(1)
|
ccdist0[1] = ctl.IsClipDistEnabled(1)
|
||||||
? VsOutput::ClipDist1
|
? Output::ClipDist1
|
||||||
: (ctl.IsCullDistEnabled(1) ? VsOutput::CullDist1 : VsOutput::None),
|
: (ctl.IsCullDistEnabled(1) ? Output::CullDist1 : Output::None);
|
||||||
ctl.IsClipDistEnabled(2)
|
ccdist0[2] = ctl.IsClipDistEnabled(2)
|
||||||
? VsOutput::ClipDist2
|
? Output::ClipDist2
|
||||||
: (ctl.IsCullDistEnabled(2) ? VsOutput::CullDist2 : VsOutput::None),
|
: (ctl.IsCullDistEnabled(2) ? Output::CullDist2 : Output::None);
|
||||||
ctl.IsClipDistEnabled(3)
|
ccdist0[3] = ctl.IsClipDistEnabled(3)
|
||||||
? VsOutput::ClipDist3
|
? Output::ClipDist3
|
||||||
: (ctl.IsCullDistEnabled(3) ? VsOutput::CullDist3 : VsOutput::None));
|
: (ctl.IsCullDistEnabled(3) ? Output::CullDist3 : Output::None);
|
||||||
// VS_OUT_CCDIST1
|
}
|
||||||
add_output(ctl.IsClipDistEnabled(4)
|
|
||||||
? VsOutput::ClipDist4
|
if (ctl.vs_out_ccdist1_enable) {
|
||||||
: (ctl.IsCullDistEnabled(4) ? VsOutput::CullDist4 : VsOutput::None),
|
auto& ccdist1 = outputs[num_outputs++];
|
||||||
ctl.IsClipDistEnabled(5)
|
ccdist1[0] = ctl.IsClipDistEnabled(4)
|
||||||
? VsOutput::ClipDist5
|
? Output::ClipDist4
|
||||||
: (ctl.IsCullDistEnabled(5) ? VsOutput::CullDist5 : VsOutput::None),
|
: (ctl.IsCullDistEnabled(4) ? Output::CullDist4 : Output::None);
|
||||||
ctl.IsClipDistEnabled(6)
|
ccdist1[1] = ctl.IsClipDistEnabled(5)
|
||||||
? VsOutput::ClipDist6
|
? Output::ClipDist5
|
||||||
: (ctl.IsCullDistEnabled(6) ? VsOutput::CullDist6 : VsOutput::None),
|
: (ctl.IsCullDistEnabled(5) ? Output::CullDist5 : Output::None);
|
||||||
ctl.IsClipDistEnabled(7)
|
ccdist1[2] = ctl.IsClipDistEnabled(6)
|
||||||
? VsOutput::ClipDist7
|
? Output::ClipDist6
|
||||||
: (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None));
|
: (ctl.IsCullDistEnabled(6) ? Output::CullDist6 : Output::None);
|
||||||
|
ccdist1[3] = ctl.IsClipDistEnabled(7)
|
||||||
|
? Output::ClipDist7
|
||||||
|
: (ctl.IsCullDistEnabled(7) ? Output::CullDist7 : Output::None);
|
||||||
|
}
|
||||||
|
|
||||||
|
return num_outputs;
|
||||||
}
|
}
|
||||||
|
|
||||||
const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_stage) {
|
const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_stage) {
|
||||||
@@ -116,9 +122,9 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
|
|||||||
}
|
}
|
||||||
case Stage::Vertex: {
|
case Stage::Vertex: {
|
||||||
BuildCommon(regs.vs_program);
|
BuildCommon(regs.vs_program);
|
||||||
GatherVertexOutputs(info.vs_info, regs.vs_output_control);
|
|
||||||
info.vs_info.step_rate_0 = regs.vgt_instance_step_rate_0;
|
info.vs_info.step_rate_0 = regs.vgt_instance_step_rate_0;
|
||||||
info.vs_info.step_rate_1 = regs.vgt_instance_step_rate_1;
|
info.vs_info.step_rate_1 = regs.vgt_instance_step_rate_1;
|
||||||
|
info.vs_info.num_outputs = MapOutputs(info.vs_info.outputs, regs.vs_output_control);
|
||||||
info.vs_info.emulate_depth_negative_one_to_one =
|
info.vs_info.emulate_depth_negative_one_to_one =
|
||||||
!instance.IsDepthClipControlSupported() &&
|
!instance.IsDepthClipControlSupported() &&
|
||||||
regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW;
|
regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW;
|
||||||
@@ -133,6 +139,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
|
|||||||
case Stage::Geometry: {
|
case Stage::Geometry: {
|
||||||
BuildCommon(regs.gs_program);
|
BuildCommon(regs.gs_program);
|
||||||
auto& gs_info = info.gs_info;
|
auto& gs_info = info.gs_info;
|
||||||
|
gs_info.num_outputs = MapOutputs(gs_info.outputs, regs.vs_output_control);
|
||||||
gs_info.output_vertices = regs.vgt_gs_max_vert_out;
|
gs_info.output_vertices = regs.vgt_gs_max_vert_out;
|
||||||
gs_info.num_invocations =
|
gs_info.num_invocations =
|
||||||
regs.vgt_gs_instance_cnt.IsEnabled() ? regs.vgt_gs_instance_cnt.count : 1;
|
regs.vgt_gs_instance_cnt.IsEnabled() ? regs.vgt_gs_instance_cnt.count : 1;
|
||||||
@@ -466,7 +473,8 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!regs.color_target_mask.GetMask(cb) || (key.mrt_mask & (1u << cb)) == 0) {
|
const u32 target_mask = regs.color_target_mask.GetMask(cb);
|
||||||
|
if (!target_mask || (key.mrt_mask & (1u << cb)) == 0) {
|
||||||
// Attachment is masked out by either color_target_mask or shader mrt_mask. In the case
|
// Attachment is masked out by either color_target_mask or shader mrt_mask. In the case
|
||||||
// of the latter we need to change format to undefined, and either way we need to
|
// of the latter we need to change format to undefined, and either way we need to
|
||||||
// increment the index for the null attachment binding.
|
// increment the index for the null attachment binding.
|
||||||
@@ -477,7 +485,16 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||||||
key.blend_controls[remapped_cb] = regs.blend_control[cb];
|
key.blend_controls[remapped_cb] = regs.blend_control[cb];
|
||||||
key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable &&
|
key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable &&
|
||||||
!col_buf.info.blend_bypass);
|
!col_buf.info.blend_bypass);
|
||||||
key.write_masks[remapped_cb] = vk::ColorComponentFlags{regs.color_target_mask.GetMask(cb)};
|
// Apply swizzle to target mask
|
||||||
|
for (u32 i = 0; i < 4; i++) {
|
||||||
|
if (target_mask & (1 << i)) {
|
||||||
|
const auto swizzled_comp =
|
||||||
|
static_cast<u32>(key.color_buffers[remapped_cb].swizzle.array[i]);
|
||||||
|
constexpr u32 min_comp = static_cast<u32>(AmdGpu::CompSwizzle::Red);
|
||||||
|
const u32 comp = swizzled_comp >= min_comp ? swizzled_comp - min_comp : i;
|
||||||
|
key.write_masks[remapped_cb] |= vk::ColorComponentFlagBits{1u << comp};
|
||||||
|
}
|
||||||
|
}
|
||||||
key.cb_shader_mask.SetMask(remapped_cb, regs.color_shader_mask.GetMask(cb));
|
key.cb_shader_mask.SetMask(remapped_cb, regs.color_shader_mask.GetMask(cb));
|
||||||
++remapped_cb;
|
++remapped_cb;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -112,6 +112,7 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) {
|
|||||||
RenderState state;
|
RenderState state;
|
||||||
state.width = instance.GetMaxFramebufferWidth();
|
state.width = instance.GetMaxFramebufferWidth();
|
||||||
state.height = instance.GetMaxFramebufferHeight();
|
state.height = instance.GetMaxFramebufferHeight();
|
||||||
|
state.num_layers = std::numeric_limits<u32>::max();
|
||||||
|
|
||||||
cb_descs.clear();
|
cb_descs.clear();
|
||||||
db_desc.reset();
|
db_desc.reset();
|
||||||
@@ -161,6 +162,7 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) {
|
|||||||
const auto mip = image_view.info.range.base.level;
|
const auto mip = image_view.info.range.base.level;
|
||||||
state.width = std::min<u32>(state.width, std::max(image.info.size.width >> mip, 1u));
|
state.width = std::min<u32>(state.width, std::max(image.info.size.width >> mip, 1u));
|
||||||
state.height = std::min<u32>(state.height, std::max(image.info.size.height >> mip, 1u));
|
state.height = std::min<u32>(state.height, std::max(image.info.size.height >> mip, 1u));
|
||||||
|
state.num_layers = std::min<u32>(state.num_layers, image_view.info.range.extent.layers);
|
||||||
state.color_attachments[state.num_color_attachments++] = {
|
state.color_attachments[state.num_color_attachments++] = {
|
||||||
.imageView = *image_view.image_view,
|
.imageView = *image_view.image_view,
|
||||||
.imageLayout = vk::ImageLayout::eUndefined,
|
.imageLayout = vk::ImageLayout::eUndefined,
|
||||||
@@ -194,6 +196,7 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) {
|
|||||||
state.height = std::min<u32>(state.height, image.info.size.height);
|
state.height = std::min<u32>(state.height, image.info.size.height);
|
||||||
state.has_depth = regs.depth_buffer.DepthValid();
|
state.has_depth = regs.depth_buffer.DepthValid();
|
||||||
state.has_stencil = regs.depth_buffer.StencilValid();
|
state.has_stencil = regs.depth_buffer.StencilValid();
|
||||||
|
state.num_layers = std::min<u32>(state.num_layers, image_view.info.range.extent.layers);
|
||||||
if (state.has_depth) {
|
if (state.has_depth) {
|
||||||
state.depth_attachment = {
|
state.depth_attachment = {
|
||||||
.imageView = *image_view.image_view,
|
.imageView = *image_view.image_view,
|
||||||
@@ -217,6 +220,10 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) {
|
|||||||
texture_cache.TouchMeta(htile_address, slice, false);
|
texture_cache.TouchMeta(htile_address, slice, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (state.num_layers == std::numeric_limits<u32>::max()) {
|
||||||
|
state.num_layers = 1;
|
||||||
|
}
|
||||||
|
|
||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,7 @@
|
|||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/debug.h"
|
#include "common/debug.h"
|
||||||
|
#include "common/logging/log.h"
|
||||||
#include "imgui/renderer/texture_manager.h"
|
#include "imgui/renderer/texture_manager.h"
|
||||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
@@ -40,7 +41,7 @@ void Scheduler::BeginRendering(const RenderState& new_state) {
|
|||||||
.offset = {0, 0},
|
.offset = {0, 0},
|
||||||
.extent = {render_state.width, render_state.height},
|
.extent = {render_state.width, render_state.height},
|
||||||
},
|
},
|
||||||
.layerCount = 1,
|
.layerCount = render_state.num_layers,
|
||||||
.colorAttachmentCount = render_state.num_color_attachments,
|
.colorAttachmentCount = render_state.num_color_attachments,
|
||||||
.pColorAttachments = render_state.num_color_attachments > 0
|
.pColorAttachments = render_state.num_color_attachments > 0
|
||||||
? render_state.color_attachments.data()
|
? render_state.color_attachments.data()
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ struct RenderState {
|
|||||||
vk::RenderingAttachmentInfo depth_attachment{};
|
vk::RenderingAttachmentInfo depth_attachment{};
|
||||||
vk::RenderingAttachmentInfo stencil_attachment{};
|
vk::RenderingAttachmentInfo stencil_attachment{};
|
||||||
u32 num_color_attachments{};
|
u32 num_color_attachments{};
|
||||||
|
u32 num_layers{1};
|
||||||
bool has_depth{};
|
bool has_depth{};
|
||||||
bool has_stencil{};
|
bool has_stencil{};
|
||||||
u32 width{};
|
u32 width{};
|
||||||
|
|||||||
Reference in New Issue
Block a user