diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 3a5f77f29..b499ed3f9 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -218,9 +218,9 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { if (info.has_image_query) { ctx.AddCapability(spv::Capability::ImageQuery); } - // if (program.info.stores_frag_depth) { - // ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); - // } + if (info.stores.Get(IR::Attribute::Depth)) { + ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); + } break; default: throw NotImplementedException("Stage {}", u32(program.info.stage)); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 0fd2c5822..fdc886d70 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -395,6 +395,10 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) { image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) { return spv::ImageFormat::Rgba8ui; } + if (image.GetDataFmt() == AmdGpu::DataFormat::Format10_11_11 && + image.GetNumberFmt() == AmdGpu::NumberFormat::Float) { + return spv::ImageFormat::R11fG11fB10f; + } UNREACHABLE(); } diff --git a/src/shader_recompiler/frontend/instruction.h b/src/shader_recompiler/frontend/instruction.h index d1d10efb6..f83f43db5 100644 --- a/src/shader_recompiler/frontend/instruction.h +++ b/src/shader_recompiler/frontend/instruction.h @@ -76,11 +76,11 @@ struct SMRD { }; struct InstControlSOPK { - BitField<0, 16, u32> simm; + s16 simm; }; struct InstControlSOPP { - BitField<0, 16, u32> simm; + s16 simm; }; struct InstControlVOP3 { diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index c74fa9368..42709dc62 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -35,6 +35,8 @@ void Translator::EmitScalarAlu(const GcnInst& inst) { return S_CMP(ConditionOp::EQ, true, inst); case Opcode::S_CMP_EQ_U32: return S_CMP(ConditionOp::EQ, false, inst); + case Opcode::S_CMP_GE_U32: + return S_CMP(ConditionOp::GE, false, inst); case Opcode::S_OR_B64: return S_OR_B64(NegateMode::None, false, inst); case Opcode::S_NOR_B64: @@ -77,6 +79,10 @@ void Translator::EmitScalarAlu(const GcnInst& inst) { return S_ADD_U32(inst); case Opcode::S_ADDC_U32: return S_ADDC_U32(inst); + case Opcode::S_ADDK_I32: + return S_ADDK_I32(inst); + case Opcode::S_MULK_I32: + return S_MULK_I32(inst); case Opcode::S_SUB_U32: case Opcode::S_SUB_I32: return S_SUB_U32(inst); @@ -88,7 +94,7 @@ void Translator::EmitScalarAlu(const GcnInst& inst) { } void Translator::S_MOVK(const GcnInst& inst) { - const auto simm16 = inst.control.sopk.simm.Value(); + const auto simm16 = inst.control.sopk.simm; if (simm16 & (1 << 15)) { // TODO: need to verify the case of imm sign extension UNREACHABLE(); @@ -96,6 +102,16 @@ void Translator::S_MOVK(const GcnInst& inst) { SetDst(inst.dst[0], ir.Imm32(simm16)); } +void Translator::S_ADDK_I32(const GcnInst& inst) { + const s32 simm16 = inst.control.sopk.simm; + SetDst(inst.dst[0], ir.IAdd(GetSrc(inst.dst[0]), ir.Imm32(simm16))); +} + +void Translator::S_MULK_I32(const GcnInst& inst) { + const s32 simm16 = inst.control.sopk.simm; + SetDst(inst.dst[0], ir.IMul(GetSrc(inst.dst[0]), ir.Imm32(simm16))); +} + void Translator::S_MOV(const GcnInst& inst) { SetDst(inst.dst[0], GetSrc(inst.src[0])); } diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 39f1543b9..23920b647 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -92,6 +92,8 @@ public: void S_SUB_U32(const GcnInst& inst); void S_GETPC_B64(u32 pc, const GcnInst& inst); void S_ADDC_U32(const GcnInst& inst); + void S_MULK_I32(const GcnInst& inst); + void S_ADDK_I32(const GcnInst& inst); // Scalar Memory void S_LOAD_DWORD(int num_dwords, const GcnInst& inst); @@ -157,6 +159,7 @@ public: void V_BCNT_U32_B32(const GcnInst& inst); void V_COS_F32(const GcnInst& inst); void V_MAX3_F32(const GcnInst& inst); + void V_MAX3_U32(const GcnInst& inst); void V_CVT_I32_F32(const GcnInst& inst); void V_MIN_I32(const GcnInst& inst); void V_MUL_LO_U32(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 4910c699b..6f417af9f 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -215,6 +215,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_RNDNE_F32(inst); case Opcode::V_MAX3_F32: return V_MAX3_F32(inst); + case Opcode::V_MAX3_U32: + return V_MAX3_U32(inst); case Opcode::V_TRUNC_F32: return V_TRUNC_F32(inst); case Opcode::V_CEIL_F32: @@ -764,6 +766,13 @@ void Translator::V_MAX3_F32(const GcnInst& inst) { SetDst(inst.dst[0], ir.FPMax(src0, ir.FPMax(src1, src2))); } +void Translator::V_MAX3_U32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 src2{GetSrc(inst.src[2])}; + SetDst(inst.dst[0], ir.UMax(src0, ir.UMax(src1, src2))); +} + void Translator::V_CVT_I32_F32(const GcnInst& inst) { const IR::F32 src0{GetSrc(inst.src[0], true)}; SetDst(inst.dst[0], ir.ConvertFToS(32, src0)); diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 7bbcc125a..1403fc8b2 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -94,7 +94,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) { IR::VectorReg dst_reg{inst.dst[0].code}; - const IR::ScalarReg tsharp_reg{inst.src[2].code}; + const IR::ScalarReg tsharp_reg{inst.src[2].code * 4}; const auto flags = ImageResFlags(inst.control.mimg.dmask); const bool has_mips = flags.test(ImageResComponent::MipCount); const IR::U32 lod = ir.GetVectorReg(IR::VectorReg(inst.src[0].code)); diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 1871243fa..aab73652b 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -1,6 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later - +#pragma clang optimize off #include #include #include @@ -435,8 +435,8 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, } } else { const u32 stride = buffer.GetStride(); - ASSERT_MSG(stride >= 4, "non-formatting load_buffer_* is not implemented for stride {}", - stride); + //ASSERT_MSG(stride >= 4, "non-formatting load_buffer_* is not implemented for stride {}", + // stride); } IR::U32 address = ir.Imm32(inst_info.inst_offset.Value()); @@ -477,18 +477,14 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip const auto result = IR::BreadthFirstSearch(&inst, pred); ASSERT_MSG(result, "Unable to find image sharp source"); const IR::Inst* producer = result.value(); - auto [tsharp_handle, ssharp_handle] = [&] -> std::pair { - if (producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2) { - return std::make_pair(producer->Arg(0).InstRecursive(), - producer->Arg(1).InstRecursive()); - } - return std::make_pair(producer, nullptr); - }(); + const bool has_sampler = producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2; + const auto tsharp_handle = has_sampler ? producer->Arg(0).InstRecursive() : producer; // Read image sharp. const auto tsharp = TrackSharp(tsharp_handle); const auto image = info.ReadUd(tsharp.sgpr_base, tsharp.dword_offset); const auto inst_info = inst.Flags(); + ASSERT(image.GetType() != AmdGpu::ImageType::Buffer); u32 image_binding = descriptors.Add(ImageResource{ .sgpr_base = tsharp.sgpr_base, .dword_offset = tsharp.dword_offset, @@ -499,15 +495,28 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip }); // Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions - if (ssharp_handle) { - const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle); - const auto ssharp = TrackSharp(ssharp_ud); - const u32 sampler_binding = descriptors.Add(SamplerResource{ - .sgpr_base = ssharp.sgpr_base, - .dword_offset = ssharp.dword_offset, - .associated_image = image_binding, - .disable_aniso = disable_aniso, - }); + if (has_sampler) { + u32 sampler_binding{}; + const IR::Value& handle = producer->Arg(1); + // Inline sampler resource. + if (handle.IsImmediate()) { + sampler_binding = descriptors.Add(SamplerResource{ + .sgpr_base = std::numeric_limits::max(), + .dword_offset = 0, + .inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()}, + }); + } else { + // Normal sampler resource. + const auto ssharp_handle = handle.InstRecursive(); + const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle); + const auto ssharp = TrackSharp(ssharp_ud); + sampler_binding = descriptors.Add(SamplerResource{ + .sgpr_base = ssharp.sgpr_base, + .dword_offset = ssharp.dword_offset, + .associated_image = image_binding, + .disable_aniso = disable_aniso, + }); + } image_binding |= (sampler_binding << 16); } @@ -610,7 +619,7 @@ void ResourceTrackingPass(IR::Program& program) { // Iterate resource instructions and patch them after finding the sharp. auto& info = program.info; Descriptors descriptors{info.buffers, info.images, info.samplers}; - for (IR::Block* const block : program.post_order_blocks) { + for (IR::Block* const block : program.blocks) { for (IR::Inst& inst : block->Instructions()) { if (IsBufferInstruction(inst)) { PatchBufferInstruction(*block, inst, info, descriptors); diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 8824e344b..5c72d7e6b 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -97,8 +97,11 @@ using ImageResourceList = boost::container::static_vector; struct SamplerResource { u32 sgpr_base; u32 dword_offset; + AmdGpu::Sampler inline_sampler{}; u32 associated_image : 4; u32 disable_aniso : 1; + + constexpr AmdGpu::Sampler GetSsharp(const Info& info) const noexcept; }; using SamplerResourceList = boost::container::static_vector; @@ -196,6 +199,10 @@ constexpr AmdGpu::Buffer BufferResource::GetVsharp(const Info& info) const noexc return inline_cbuf ? inline_cbuf : info.ReadUd(sgpr_base, dword_offset); } +constexpr AmdGpu::Sampler SamplerResource::GetSsharp(const Info& info) const noexcept { + return inline_sampler ? inline_sampler : info.ReadUd(sgpr_base, dword_offset); +} + } // namespace Shader template <> diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index df7eec826..af1963eec 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -403,9 +403,11 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanWaitVoLabel([&] { return wait_reg_mem->Test(); }); } while (!wait_reg_mem->Test()) { + mapped_queues[GfxQueueId].cs_state = regs.cs_program; TracyFiberLeave; co_yield {}; TracyFiberEnter(dcb_task_name); + regs.cs_program = mapped_queues[GfxQueueId].cs_state; } break; } @@ -506,9 +508,11 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { const auto* wait_reg_mem = reinterpret_cast(header); ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me); while (!wait_reg_mem->Test()) { + mapped_queues[vqid].cs_state = regs.cs_program; TracyFiberLeave; co_yield {}; TracyFiberEnter(acb_task_name); + regs.cs_program = mapped_queues[vqid].cs_state; } break; } @@ -529,7 +533,6 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { } void Liverpool::SubmitGfx(std::span dcb, std::span ccb) { - static constexpr u32 GfxQueueId = 0u; auto& queue = mapped_queues[GfxQueueId]; auto task = ProcessGraphics(dcb, ccb); diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index db2ee91c3..b0285809c 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -36,6 +36,7 @@ namespace AmdGpu { [[maybe_unused]] std::array CONCAT2(pad, __LINE__) struct Liverpool { + static constexpr u32 GfxQueueId = 0u; static constexpr u32 NumGfxRings = 1u; // actually 2, but HP is reserved by system software static constexpr u32 NumComputePipes = 7u; // actually 8, but #7 is reserved by system software static constexpr u32 NumQueuesPerPipe = 8u; @@ -1061,6 +1062,7 @@ private: struct GpuQueue { std::mutex m_access{}; std::queue submits{}; + ComputeProgram cs_state{}; }; std::array mapped_queues{}; diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index d9e090888..7d3465af1 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -324,6 +324,7 @@ enum class BorderColor : u64 { // Table 8.12 Sampler Resource Definition struct Sampler { union { + u64 raw0; BitField<0, 3, ClampMode> clamp_x; BitField<3, 3, ClampMode> clamp_y; BitField<6, 3, ClampMode> clamp_z; @@ -343,6 +344,7 @@ struct Sampler { BitField<60, 4, u64> perf_z; }; union { + u64 raw1; BitField<0, 14, u64> lod_bias; BitField<14, 6, u64> lod_bias_sec; BitField<20, 2, Filter> xy_mag_filter; @@ -357,6 +359,10 @@ struct Sampler { BitField<62, 2, BorderColor> border_color_type; }; + operator bool() const noexcept { + return raw0 != 0 || raw1 != 0; + } + float LodBias() const noexcept { return static_cast(static_cast((lod_bias.Value() ^ 0x2000u) - 0x2000u)) / 256.0f; diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index fc7943e62..21ade4010 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -297,6 +297,7 @@ std::span GetAllFormats() { vk::Format::eBc3UnormBlock, vk::Format::eBc4UnormBlock, vk::Format::eBc5UnormBlock, + vk::Format::eBc5SnormBlock, vk::Format::eBc7SrgbBlock, vk::Format::eBc7UnormBlock, vk::Format::eD16Unorm, @@ -308,6 +309,7 @@ std::span GetAllFormats() { vk::Format::eR8G8B8A8Srgb, vk::Format::eR8G8B8A8Uint, vk::Format::eR8G8B8A8Unorm, + vk::Format::eR8G8B8A8Snorm, vk::Format::eR8G8B8A8Uscaled, vk::Format::eR8G8Snorm, vk::Format::eR8G8Uint, @@ -384,6 +386,9 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu if (data_format == AmdGpu::DataFormat::FormatBc5 && num_format == AmdGpu::NumberFormat::Unorm) { return vk::Format::eBc5UnormBlock; } + if (data_format == AmdGpu::DataFormat::FormatBc5 && num_format == AmdGpu::NumberFormat::Snorm) { + return vk::Format::eBc5SnormBlock; + } if (data_format == AmdGpu::DataFormat::Format16_16_16_16 && num_format == AmdGpu::NumberFormat::Sint) { return vk::Format::eR16G16B16A16Sint; @@ -518,6 +523,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu num_format == AmdGpu::NumberFormat::SnormNz) { return vk::Format::eR16G16B16A16Snorm; } + if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && + num_format == AmdGpu::NumberFormat::Snorm) { + return vk::Format::eR8G8B8A8Snorm; + } UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format)); } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 51bb7f831..34f1e9cc4 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -148,7 +148,7 @@ bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s } } for (const auto& sampler : info.samplers) { - const auto ssharp = info.ReadUd(sampler.sgpr_base, sampler.dword_offset); + const auto ssharp = sampler.GetSsharp(info); const auto vk_sampler = texture_cache.GetSampler(ssharp); image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); set_writes.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index eb5522688..7b00a9111 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -386,7 +386,7 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& } } for (const auto& sampler : stage.samplers) { - auto ssharp = stage.ReadUd(sampler.sgpr_base, sampler.dword_offset); + auto ssharp = sampler.GetSsharp(stage); if (sampler.disable_aniso) { const auto& tsharp = tsharps[sampler.associated_image]; if (tsharp.base_level == 0 && tsharp.last_level == 0) { diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 09a9180e1..0355aea72 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -261,6 +261,7 @@ bool Instance::CreateDevice() { .shaderStorageImageExtendedFormats = features.shaderStorageImageExtendedFormats, .shaderStorageImageMultisample = features.shaderStorageImageMultisample, .shaderClipDistance = features.shaderClipDistance, + .shaderInt64 = features.shaderInt64, .shaderInt16 = features.shaderInt16, }, }, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 67994485a..afd589d51 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -302,6 +302,14 @@ std::unique_ptr PipelineCache::CreateComputePipeline() { block_pool.ReleaseContents(); inst_pool.ReleaseContents(); + if (compute_key == 0xa71733ca || compute_key == 0xa55ad01d) { + return nullptr; + } + + if (compute_key == 4248155022) { + printf("test\n"); + } + // Recompile shader to IR. try { LOG_INFO(Render_Vulkan, "Compiling cs shader {:#x}", compute_key); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index a3be8352c..ff5e97d5b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -23,7 +23,7 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_, : instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_}, liverpool{liverpool_}, memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool}, - vertex_index_buffer{instance, scheduler, VertexIndexFlags, 1_GB, BufferType::Upload} { + vertex_index_buffer{instance, scheduler, VertexIndexFlags, 2_GB, BufferType::Upload} { if (!Config::nullGpu()) { liverpool->BindRasterizer(this); } @@ -91,6 +91,7 @@ void Rasterizer::DispatchDirect() { UNREACHABLE(); } + scheduler.EndRendering(); cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle()); cmdbuf.dispatch(cs_program.dim_x, cs_program.dim_y, cs_program.dim_z); } diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 7e89800e2..fb64285f1 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -54,9 +54,7 @@ void Scheduler::EndRendering() { for (size_t i = 0; i < render_state.num_color_attachments; ++i) { barriers.push_back(vk::ImageMemoryBarrier{ .srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite, - .dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite | - vk::AccessFlagBits::eColorAttachmentRead | - vk::AccessFlagBits::eColorAttachmentWrite, + .dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite, .oldLayout = vk::ImageLayout::eColorAttachmentOptimal, .newLayout = vk::ImageLayout::eColorAttachmentOptimal, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 116f7896d..444472411 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -226,7 +226,7 @@ void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) { while (requested_upper_bound > wait_bound && wait_cursor < *invalidation_mark) { auto& watch = previous_watches[wait_cursor]; wait_bound = watch.upper_bound; - scheduler.Wait(watch.tick); + //scheduler.Wait(watch.tick); ++wait_cursor; } } diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 6651282a3..0ceb264f1 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -48,9 +48,9 @@ vk::ComponentSwizzle ConvertComponentSwizzle(u32 dst_sel) { } bool IsIdentityMapping(u32 dst_sel, u32 num_components) { - return (num_components == 1 && (dst_sel == 0b100 || dst_sel == 0b001000000100)) || - (num_components == 2 && (dst_sel == 0b101100 || dst_sel == 0b001000101100)) || - (num_components == 3 && (dst_sel == 0b110101100 || dst_sel == 0b001110101100)) || + return (num_components == 1 && dst_sel == 0b100) || + (num_components == 2 && dst_sel == 0b101100) || + (num_components == 3 && dst_sel == 0b110101100) || (num_components == 4 && dst_sel == 0b111110101100); }