diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_quad_rect.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_quad_rect.cpp index 0969b91d6..5325470b7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_quad_rect.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_quad_rect.cpp @@ -130,21 +130,37 @@ struct QuadRectListEmitter : public Sirit::Module { } /// Emits a passthrough quad tessellation control shader that outputs 4 control points. - void EmitPassthroughTCS() { + void EmitQuadListTCS() { DefineEntry(spv::ExecutionModel::TessellationControl); + const Id array_type{TypeArray(int_id, Int(4))}; + const Id values{ConstantComposite(array_type, Int(1), Int(2), Int(0), Int(3))}; + const Id indices{AddLocalVariable(TypePointer(spv::StorageClass::Function, array_type), + spv::StorageClass::Function, values)}; + + // Set passthrough tessellation factors + const Id output_float{TypePointer(spv::StorageClass::Output, float_id)}; + for (int i = 0; i < 4; i++) { + const Id ptr{OpAccessChain(output_float, gl_tess_level_outer, Int(i))}; + OpStore(ptr, float_one); + } + for (int i = 0; i < 2; i++) { + const Id ptr{OpAccessChain(output_float, gl_tess_level_inner, Int(i))}; + OpStore(ptr, float_one); + } const Id input_vec4{TypePointer(spv::StorageClass::Input, vec4_id)}; const Id output_vec4{TypePointer(spv::StorageClass::Output, vec4_id)}; + const Id func_int{TypePointer(spv::StorageClass::Function, int_id)}; const Id invocation_id{OpLoad(int_id, gl_invocation_id)}; + const Id index{OpLoad(int_id, OpAccessChain(func_int, indices, invocation_id))}; // gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position; - const Id in_position{ - OpLoad(vec4_id, OpAccessChain(input_vec4, gl_in, invocation_id, Int(0)))}; + const Id in_position{OpLoad(vec4_id, OpAccessChain(input_vec4, gl_in, index, Int(0)))}; OpStore(OpAccessChain(output_vec4, gl_out, invocation_id, Int(0)), in_position); for (int i = 0; i < num_attribs; i++) { // out_paramN[gl_InvocationID] = in_paramN[gl_InvocationID]; - const Id in_param{OpLoad(vec4_id, OpAccessChain(input_vec4, inputs[i], invocation_id))}; + const Id in_param{OpLoad(vec4_id, OpAccessChain(input_vec4, inputs[i], index))}; OpStore(OpAccessChain(output_vec4, outputs[i], invocation_id), in_param); } @@ -296,8 +312,8 @@ std::vector EmitAuxilaryTessShader(AuxShaderType type, size_t num_attribs) case AuxShaderType::RectListTCS: ctx.EmitRectListTCS(); break; - case AuxShaderType::PassthoughTCS: - ctx.EmitPassthroughTCS(); + case AuxShaderType::QuadListTCS: + ctx.EmitQuadListTCS(); break; case AuxShaderType::PassthroughTES: ctx.EmitPassthroughTES(); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_quad_rect.h b/src/shader_recompiler/backend/spirv/emit_spirv_quad_rect.h index 4723a798d..35ac18a55 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_quad_rect.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_quad_rect.h @@ -10,7 +10,7 @@ namespace Shader::Backend::SPIRV { enum class AuxShaderType : u32 { RectListTCS, - PassthoughTCS, + QuadListTCS, PassthroughTES, }; diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index f265fb68d..75f06365f 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -238,32 +238,14 @@ u32 BufferCache::BindIndexBuffer(bool& is_indexed, u32 index_offset) { // Emulate QuadList and Polygon primitive types with CPU made index buffer. const auto& regs = liverpool->regs; if (!is_indexed) { - bool needs_index_buffer = false; - if (regs.primitive_type == AmdGpu::PrimitiveType::QuadList || - regs.primitive_type == AmdGpu::PrimitiveType::Polygon) { - needs_index_buffer = true; - } - - if (!needs_index_buffer) { + if (regs.primitive_type != AmdGpu::PrimitiveType::Polygon) { return regs.num_indices; } // Emit indices. const u32 index_size = 3 * regs.num_indices; const auto [data, offset] = stream_buffer.Map(index_size); - - switch (regs.primitive_type) { - case AmdGpu::PrimitiveType::QuadList: - Vulkan::LiverpoolToVK::EmitQuadToTriangleListIndices(data, regs.num_indices); - break; - case AmdGpu::PrimitiveType::Polygon: - Vulkan::LiverpoolToVK::EmitPolygonToTriangleListIndices(data, regs.num_indices); - break; - default: - UNREACHABLE(); - break; - } - + Vulkan::LiverpoolToVK::EmitPolygonToTriangleListIndices(data, regs.num_indices); stream_buffer.Commit(); // Bind index buffer. @@ -282,31 +264,6 @@ u32 BufferCache::BindIndexBuffer(bool& is_indexed, u32 index_offset) { VAddr index_address = regs.index_base_address.Address(); index_address += index_offset * index_size; - if (regs.primitive_type == AmdGpu::PrimitiveType::QuadList) { - // Convert indices. - const u32 new_index_size = regs.num_indices * index_size * 6 / 4; - const auto [data, offset] = stream_buffer.Map(new_index_size); - const auto index_ptr = reinterpret_cast(index_address); - switch (index_type) { - case vk::IndexType::eUint16: - Vulkan::LiverpoolToVK::ConvertQuadToTriangleListIndices(data, index_ptr, - regs.num_indices); - break; - case vk::IndexType::eUint32: - Vulkan::LiverpoolToVK::ConvertQuadToTriangleListIndices(data, index_ptr, - regs.num_indices); - break; - default: - UNREACHABLE_MSG("Unsupported QuadList index type {}", vk::to_string(index_type)); - break; - } - stream_buffer.Commit(); - - // Bind index buffer. - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.bindIndexBuffer(stream_buffer.Handle(), offset, index_type); - return new_index_size / index_size; - } if (regs.primitive_type == AmdGpu::PrimitiveType::Polygon) { UNREACHABLE(); } diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index df89bcfc5..25ff88b9d 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -116,10 +116,10 @@ vk::PrimitiveTopology PrimitiveType(AmdGpu::PrimitiveType type) { return vk::PrimitiveTopology::eTriangleStripWithAdjacency; case AmdGpu::PrimitiveType::PatchPrimitive: return vk::PrimitiveTopology::ePatchList; - case AmdGpu::PrimitiveType::QuadList: case AmdGpu::PrimitiveType::Polygon: // Needs to generate index buffer on the fly. return vk::PrimitiveTopology::eTriangleList; + case AmdGpu::PrimitiveType::QuadList: case AmdGpu::PrimitiveType::RectList: return vk::PrimitiveTopology::ePatchList; default: diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.h b/src/video_core/renderer_vulkan/liverpool_to_vk.h index 72bddc6b6..d5f8e693b 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.h +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h @@ -70,34 +70,6 @@ vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color vk::SampleCountFlagBits NumSamples(u32 num_samples, vk::SampleCountFlags supported_flags); -static constexpr u16 NumVerticesPerQuad = 4; - -inline void EmitQuadToTriangleListIndices(u8* out_ptr, u32 num_vertices) { - u16* out_data = reinterpret_cast(out_ptr); - for (u16 i = 0; i < num_vertices; i += NumVerticesPerQuad) { - *out_data++ = i; - *out_data++ = i + 1; - *out_data++ = i + 2; - *out_data++ = i; - *out_data++ = i + 2; - *out_data++ = i + 3; - } -} - -template -void ConvertQuadToTriangleListIndices(u8* out_ptr, const u8* in_ptr, u32 num_vertices) { - T* out_data = reinterpret_cast(out_ptr); - const T* in_data = reinterpret_cast(in_ptr); - for (u16 i = 0; i < num_vertices; i += NumVerticesPerQuad) { - *out_data++ = in_data[i]; - *out_data++ = in_data[i + 1]; - *out_data++ = in_data[i + 2]; - *out_data++ = in_data[i]; - *out_data++ = in_data[i + 2]; - *out_data++ = in_data[i + 3]; - } -} - inline void EmitPolygonToTriangleListIndices(u8* out_ptr, u32 num_vertices) { u16* out_data = reinterpret_cast(out_ptr); for (u16 i = 1; i < num_vertices - 1; i++) { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 18e41ed9d..9a4f51acc 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -107,10 +107,11 @@ GraphicsPipeline::GraphicsPipeline( key.primitive_restart_index == 0xFFFFFFFF, "Primitive restart index other than -1 is not supported yet"); const bool is_rect_list = key.prim_type == AmdGpu::PrimitiveType::RectList; + const bool is_quad_list = key.prim_type == AmdGpu::PrimitiveType::QuadList; const size_t num_fs_inputs = runtime_infos[u32(Shader::LogicalStage::Fragment)].fs_info.num_inputs; const vk::PipelineTessellationStateCreateInfo tessellation_state = { - .patchControlPoints = is_rect_list ? 3U : key.patch_control_points, + .patchControlPoints = is_rect_list ? 3U : (is_quad_list ? 4U : key.patch_control_points), }; const vk::PipelineRasterizationStateCreateInfo raster_state = { @@ -234,9 +235,9 @@ GraphicsPipeline::GraphicsPipeline( .module = modules[stage], .pName = "main", }); - } else if (is_rect_list) { - auto tcs = Shader::Backend::SPIRV::EmitAuxilaryTessShader(AuxShaderType::RectListTCS, - num_fs_inputs); + } else if (is_rect_list || is_quad_list) { + const auto type = is_quad_list ? AuxShaderType::QuadListTCS : AuxShaderType::RectListTCS; + auto tcs = Shader::Backend::SPIRV::EmitAuxilaryTessShader(type, num_fs_inputs); shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eTessellationControl, .module = CompileSPV(tcs, instance.GetDevice()), @@ -250,7 +251,7 @@ GraphicsPipeline::GraphicsPipeline( .module = modules[stage], .pName = "main", }); - } else if (is_rect_list) { + } else if (is_rect_list || is_quad_list) { auto tes = Shader::Backend::SPIRV::EmitAuxilaryTessShader(AuxShaderType::PassthroughTES, num_fs_inputs); shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{ diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 415654db0..5a1e3c27f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -18,7 +18,7 @@ class TextureCache; namespace Vulkan { -static constexpr u32 MaxShaderStages = 5; +static constexpr u32 MaxShaderStages = static_cast(Shader::LogicalStage::NumLogicalStages); static constexpr u32 MaxVertexBufferCount = 32; class Instance; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 7d8829888..68ae06777 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -237,9 +237,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, s32(vertex_offset), instance_offset); } else { - const u32 num_vertices = - regs.primitive_type == AmdGpu::PrimitiveType::RectList ? 3 : regs.num_indices; - cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), vertex_offset, + cmdbuf.draw(num_indices, regs.num_instances.NumInstances(), vertex_offset, instance_offset); } @@ -255,18 +253,14 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 } const auto& regs = liverpool->regs; - if (regs.primitive_type == AmdGpu::PrimitiveType::QuadList || - regs.primitive_type == AmdGpu::PrimitiveType::Polygon) { - // We use a generated index buffer to convert quad lists and polygons to triangles. Since it + if (regs.primitive_type == AmdGpu::PrimitiveType::Polygon) { + // We use a generated index buffer to convert polygons to triangles. Since it // changes type of the draw, arguments are not valid for this case. We need to run a // conversion pass to repack the indirect arguments buffer first. LOG_WARNING(Render_Vulkan, "Primitive type is not supported for indirect draw"); return; } - ASSERT_MSG(regs.primitive_type != AmdGpu::PrimitiveType::RectList, - "Unsupported primitive type for indirect draw"); - const GraphicsPipeline* pipeline = pipeline_cache.GetGraphicsPipeline(); if (!pipeline) { return;