From f31c92ffd1f7eca6046e59486ad9de413c99774f Mon Sep 17 00:00:00 2001 From: offtkp Date: Wed, 4 Sep 2024 12:31:12 +0300 Subject: [PATCH 01/11] Get rid of unnecessary jump --- src/core/cpu_patches.cpp | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/src/core/cpu_patches.cpp b/src/core/cpu_patches.cpp index f31ff18cb..91b3bcd40 100644 --- a/src/core/cpu_patches.cpp +++ b/src/core/cpu_patches.cpp @@ -315,14 +315,12 @@ static void GenerateBLSI(const ZydisDecodedOperand* operands, Xbyak::CodeGenerat SaveRegisters(c, {scratch}); // BLSI sets CF to zero if source is zero, otherwise it sets CF to one. - Xbyak::Label set_carry, clear_carry, end; + Xbyak::Label clear_carry, end; c.mov(scratch, *src); c.neg(scratch); // NEG, like BLSI, clears CF if the source is zero and sets it otherwise - c.jc(set_carry); - c.jmp(clear_carry); + c.jnc(clear_carry); - c.L(set_carry); c.and_(scratch, *src); c.stc(); // setting/clearing carry needs to happen after the AND because that clears CF c.jmp(end); @@ -345,15 +343,13 @@ static void GenerateBLSMSK(const ZydisDecodedOperand* operands, Xbyak::CodeGener SaveRegisters(c, {scratch}); - Xbyak::Label set_carry, clear_carry, end; + Xbyak::Label clear_carry, end; // BLSMSK sets CF to zero if source is NOT zero, otherwise it sets CF to one. c.mov(scratch, *src); c.test(scratch, scratch); - c.jz(set_carry); - c.jmp(clear_carry); + c.jnz(clear_carry); - c.L(set_carry); c.dec(scratch); c.xor_(scratch, *src); c.stc(); @@ -378,15 +374,13 @@ static void GenerateBLSR(const ZydisDecodedOperand* operands, Xbyak::CodeGenerat SaveRegisters(c, {scratch}); - Xbyak::Label set_carry, clear_carry, end; + Xbyak::Label clear_carry, end; // BLSR sets CF to zero if source is NOT zero, otherwise it sets CF to one. c.mov(scratch, *src); c.test(scratch, scratch); - c.jz(set_carry); - c.jmp(clear_carry); + c.jnz(clear_carry); - c.L(set_carry); c.dec(scratch); c.and_(scratch, *src); c.stc(); From bb29224daf288261869680ee8b48cd72a0d8dc9d Mon Sep 17 00:00:00 2001 From: baggins183 Date: Fri, 6 Sep 2024 13:47:47 -0700 Subject: [PATCH 02/11] Implement V_MOVREL variants (#745) * shader_recompiler: Implement V_MOVRELS_B32, V_MOVRELD_B32, V_MOVRELSD_B32 Generates a ton of OpSelects to hardcode reading or writing from each possible vgpr depending on the value of m0 Future work is to do range analysis to put an upper bound on m0 and check fewer registers. * fix runtime info after rebase --- .../backend/spirv/emit_spirv.cpp | 8 +++ .../backend/spirv/emit_spirv_instructions.h | 2 + .../frontend/translate/translate.cpp | 8 +-- .../frontend/translate/translate.h | 7 ++- .../frontend/translate/vector_alu.cpp | 55 +++++++++++++++++++ src/shader_recompiler/ir/ir_emitter.cpp | 8 +++ src/shader_recompiler/ir/ir_emitter.h | 2 + src/shader_recompiler/ir/opcodes.inc | 2 + .../ir/passes/ssa_rewrite_pass.cpp | 20 ++++++- src/shader_recompiler/runtime_info.h | 1 + .../renderer_vulkan/vk_pipeline_cache.cpp | 3 + 11 files changed, 110 insertions(+), 6 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index c681be97c..b0298cbb0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -327,6 +327,10 @@ void EmitGetVccHi(EmitContext& ctx) { UNREACHABLE_MSG("Unreachable instruction"); } +void EmitGetM0(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + void EmitSetScc(EmitContext& ctx) { UNREACHABLE_MSG("Unreachable instruction"); } @@ -351,4 +355,8 @@ void EmitSetVccHi(EmitContext& ctx) { UNREACHABLE_MSG("Unreachable instruction"); } +void EmitSetM0(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index ce4d3f137..0cd59175d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -36,12 +36,14 @@ void EmitGetVcc(EmitContext& ctx); void EmitGetSccLo(EmitContext& ctx); void EmitGetVccLo(EmitContext& ctx); void EmitGetVccHi(EmitContext& ctx); +void EmitGetM0(EmitContext& ctx); void EmitSetScc(EmitContext& ctx); void EmitSetExec(EmitContext& ctx); void EmitSetVcc(EmitContext& ctx); void EmitSetSccLo(EmitContext& ctx); void EmitSetVccLo(EmitContext& ctx); void EmitSetVccHi(EmitContext& ctx); +void EmitSetM0(EmitContext& ctx); void EmitFPCmpClass32(EmitContext& ctx); void EmitPrologue(EmitContext& ctx); void EmitEpilogue(EmitContext& ctx); diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index b33746c7b..4e0c110c2 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -153,10 +153,11 @@ T Translator::GetSrc(const InstOperand& operand) { break; case OperandField::M0: if constexpr (is_float) { - UNREACHABLE(); + value = ir.BitCast(ir.GetM0()); } else { - return m0_value; + value = ir.GetM0(); } + break; default: UNREACHABLE(); } @@ -296,8 +297,7 @@ void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) { case OperandField::VccHi: return ir.SetVccHi(result); case OperandField::M0: - m0_value = result; - break; + return ir.SetM0(result); default: UNREACHABLE(); } diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 0c1f3a587..d6887818d 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -192,6 +192,9 @@ public: void V_MBCNT_U32_B32(bool is_low, const GcnInst& inst); void V_BFM_B32(const GcnInst& inst); void V_FFBH_U32(const GcnInst& inst); + void V_MOVRELS_B32(const GcnInst& inst); + void V_MOVRELD_B32(const GcnInst& inst); + void V_MOVRELSD_B32(const GcnInst& inst); // Vector Memory void BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst); @@ -233,6 +236,9 @@ private: void SetDst(const InstOperand& operand, const IR::U32F32& value); void SetDst64(const InstOperand& operand, const IR::U64F64& value_raw); + IR::U32 VMovRelSHelper(u32 src_vgprno, const IR::U32 m0); + void VMovRelDHelper(u32 dst_vgprno, const IR::U32 src_val, const IR::U32 m0); + void LogMissingOpcode(const GcnInst& inst); private: @@ -240,7 +246,6 @@ private: Info& info; const RuntimeInfo& runtime_info; const Profile& profile; - IR::U32 m0_value; bool opcode_missing = false; }; diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index a07e70785..2024b7067 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "shader_recompiler/frontend/opcodes.h" #include "shader_recompiler/frontend/translate/translate.h" namespace Shader::Gcn { @@ -309,6 +310,12 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_MBCNT_U32_B32(true, inst); case Opcode::V_MBCNT_HI_U32_B32: return V_MBCNT_U32_B32(false, inst); + case Opcode::V_MOVRELS_B32: + return V_MOVRELS_B32(inst); + case Opcode::V_MOVRELD_B32: + return V_MOVRELD_B32(inst); + case Opcode::V_MOVRELSD_B32: + return V_MOVRELSD_B32(inst); case Opcode::V_NOP: return; @@ -990,4 +997,52 @@ void Translator::V_FFBH_U32(const GcnInst& inst) { SetDst(inst.dst[0], IR::U32{ir.Select(cond, pos_from_left, ir.Imm32(~0U))}); } +// TODO: add range analysis pass to hopefully put an upper bound on m0, and only select one of +// [src_vgprno, src_vgprno + max_m0]. Same for dst regs we may write back to + +IR::U32 Translator::VMovRelSHelper(u32 src_vgprno, const IR::U32 m0) { + // Read from VGPR0 by default when src_vgprno + m0 > num_allocated_vgprs + IR::U32 src_val = ir.GetVectorReg(IR::VectorReg::V0); + for (u32 i = src_vgprno; i < runtime_info.num_allocated_vgprs; i++) { + const IR::U1 cond = ir.IEqual(m0, ir.Imm32(i - src_vgprno)); + src_val = + IR::U32{ir.Select(cond, ir.GetVectorReg(IR::VectorReg::V0 + i), src_val)}; + } + return src_val; +} + +void Translator::VMovRelDHelper(u32 dst_vgprno, const IR::U32 src_val, const IR::U32 m0) { + for (u32 i = dst_vgprno; i < runtime_info.num_allocated_vgprs; i++) { + const IR::U1 cond = ir.IEqual(m0, ir.Imm32(i - dst_vgprno)); + const IR::U32 dst_val = + IR::U32{ir.Select(cond, src_val, ir.GetVectorReg(IR::VectorReg::V0 + i))}; + ir.SetVectorReg(IR::VectorReg::V0 + i, dst_val); + } +} + +void Translator::V_MOVRELS_B32(const GcnInst& inst) { + u32 src_vgprno = inst.src[0].code - static_cast(IR::VectorReg::V0); + const IR::U32 m0 = ir.GetM0(); + + const IR::U32 src_val = VMovRelSHelper(src_vgprno, m0); + SetDst(inst.dst[0], src_val); +} + +void Translator::V_MOVRELD_B32(const GcnInst& inst) { + const IR::U32 src_val{GetSrc(inst.src[0])}; + u32 dst_vgprno = inst.dst[0].code - static_cast(IR::VectorReg::V0); + IR::U32 m0 = ir.GetM0(); + + VMovRelDHelper(dst_vgprno, src_val, m0); +} + +void Translator::V_MOVRELSD_B32(const GcnInst& inst) { + u32 src_vgprno = inst.src[0].code - static_cast(IR::VectorReg::V0); + u32 dst_vgprno = inst.dst[0].code - static_cast(IR::VectorReg::V0); + IR::U32 m0 = ir.GetM0(); + + const IR::U32 src_val = VMovRelSHelper(src_vgprno, m0); + VMovRelDHelper(dst_vgprno, src_val, m0); +} + } // namespace Shader::Gcn diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 473ae4f66..2be0c1ac6 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -217,6 +217,10 @@ U32 IREmitter::GetVccHi() { return Inst(Opcode::GetVccHi); } +U32 IREmitter::GetM0() { + return Inst(Opcode::GetM0); +} + void IREmitter::SetScc(const U1& value) { Inst(Opcode::SetScc, value); } @@ -241,6 +245,10 @@ void IREmitter::SetVccHi(const U32& value) { Inst(Opcode::SetVccHi, value); } +void IREmitter::SetM0(const U32& value) { + Inst(Opcode::SetM0, value); +} + F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp) { return Inst(Opcode::GetAttribute, attribute, Imm32(comp)); } diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index de8fe450d..22d524fb3 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -67,12 +67,14 @@ public: [[nodiscard]] U1 GetVcc(); [[nodiscard]] U32 GetVccLo(); [[nodiscard]] U32 GetVccHi(); + [[nodiscard]] U32 GetM0(); void SetScc(const U1& value); void SetExec(const U1& value); void SetVcc(const U1& value); void SetSccLo(const U32& value); void SetVccLo(const U32& value); void SetVccHi(const U32& value); + void SetM0(const U32& value); [[nodiscard]] U1 Condition(IR::Condition cond); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 40dcfa441..4df8d13d1 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -60,12 +60,14 @@ OPCODE(GetExec, U1, Void, OPCODE(GetVcc, U1, Void, ) OPCODE(GetVccLo, U32, Void, ) OPCODE(GetVccHi, U32, Void, ) +OPCODE(GetM0, U32, Void, ) OPCODE(SetScc, Void, U1, ) OPCODE(SetExec, Void, U1, ) OPCODE(SetVcc, Void, U1, ) OPCODE(SetSccLo, Void, U32, ) OPCODE(SetVccLo, Void, U32, ) OPCODE(SetVccHi, Void, U32, ) +OPCODE(SetM0, Void, U32, ) // Undefined OPCODE(UndefU1, U1, ) diff --git a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp index 9edb157db..ea27c64f7 100644 --- a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp @@ -33,6 +33,7 @@ struct ExecFlagTag : FlagTag {}; struct VccFlagTag : FlagTag {}; struct VccLoTag : FlagTag {}; struct VccHiTag : FlagTag {}; +struct M0Tag : FlagTag {}; struct GotoVariable : FlagTag { GotoVariable() = default; @@ -44,7 +45,7 @@ struct GotoVariable : FlagTag { }; using Variant = std::variant; + VccFlagTag, VccLoTag, VccHiTag, M0Tag>; using ValueMap = std::unordered_map; struct DefTable { @@ -103,6 +104,12 @@ struct DefTable { void SetDef(IR::Block* block, VccFlagTag, const IR::Value& value) { vcc_flag.insert_or_assign(block, value); } + const IR::Value& Def(IR::Block* block, M0Tag) { + return m0_flag[block]; + } + void SetDef(IR::Block* block, M0Tag, const IR::Value& value) { + m0_flag.insert_or_assign(block, value); + } std::unordered_map goto_vars; ValueMap scc_flag; @@ -111,6 +118,7 @@ struct DefTable { ValueMap scc_lo_flag; ValueMap vcc_lo_flag; ValueMap vcc_hi_flag; + ValueMap m0_flag; }; IR::Opcode UndefOpcode(IR::ScalarReg) noexcept { @@ -129,6 +137,10 @@ IR::Opcode UndefOpcode(const VccHiTag) noexcept { return IR::Opcode::UndefU32; } +IR::Opcode UndefOpcode(const M0Tag) noexcept { + return IR::Opcode::UndefU32; +} + IR::Opcode UndefOpcode(const FlagTag) noexcept { return IR::Opcode::UndefU1; } @@ -330,6 +342,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::SetVccHi: pass.WriteVariable(VccHiTag{}, block, inst.Arg(0)); break; + case IR::Opcode::SetM0: + pass.WriteVariable(M0Tag{}, block, inst.Arg(0)); + break; case IR::Opcode::GetThreadBitScalarReg: case IR::Opcode::GetScalarRegister: { const IR::ScalarReg reg{inst.Arg(0).ScalarReg()}; @@ -362,6 +377,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::GetVccHi: inst.ReplaceUsesWith(pass.ReadVariable(VccHiTag{}, block)); break; + case IR::Opcode::GetM0: + inst.ReplaceUsesWith(pass.ReadVariable(M0Tag{}, block)); + break; default: break; } diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 776fd90a6..1bb065544 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -107,6 +107,7 @@ struct RuntimeInfo { Stage stage; u32 num_user_data; u32 num_input_vgprs; + u32 num_allocated_vgprs; VertexRuntimeInfo vs_info; FragmentRuntimeInfo fs_info; ComputeRuntimeInfo cs_info; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index b5435af1f..4419b0f81 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -76,6 +76,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) { case Shader::Stage::Vertex: { info.num_user_data = regs.vs_program.settings.num_user_regs; info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt; + info.num_allocated_vgprs = regs.vs_program.settings.num_vgprs * 4; GatherVertexOutputs(info.vs_info, regs.vs_output_control); info.vs_info.emulate_depth_negative_one_to_one = !instance.IsDepthClipControlSupported() && @@ -84,6 +85,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) { } case Shader::Stage::Fragment: { info.num_user_data = regs.ps_program.settings.num_user_regs; + info.num_allocated_vgprs = regs.ps_program.settings.num_vgprs * 4; std::ranges::transform(graphics_key.mrt_swizzles, info.fs_info.mrt_swizzles.begin(), [](Liverpool::ColorBuffer::SwapMode mode) { return static_cast(mode); @@ -102,6 +104,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) { case Shader::Stage::Compute: { const auto& cs_pgm = regs.cs_program; info.num_user_data = cs_pgm.settings.num_user_regs; + info.num_allocated_vgprs = regs.cs_program.settings.num_vgprs * 4; info.cs_info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full, cs_pgm.num_thread_z.full}; info.cs_info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1), From 09ce12a868bb73d48b38fb240951dbf4e06633ed Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Fri, 6 Sep 2024 15:51:20 -0500 Subject: [PATCH 03/11] shader_recompiler: Add more opcodes (#802) * Implement some missing shader opcodes Implements TBUFFER_STORE_FORMAT_XYZW, IMAGE_SAMPLE_CD, and IMAGE_GATHER4_C_LZ. These are seen in https://github.com/shadps4-emu/shadPS4/issues/496. * Implement IMAGE_STORE_MIP Not sure if this is the right way to do this, let me know if this needs changing. * Revert "Implement IMAGE_STORE_MIP" This reverts commit cff78b5924c20397995f590df3553dc8f7a45a7b. --- src/shader_recompiler/frontend/translate/vector_memory.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 73530dade..04b9b50dd 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -18,9 +18,11 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { case Opcode::IMAGE_SAMPLE_B: case Opcode::IMAGE_SAMPLE_C_LZ_O: case Opcode::IMAGE_SAMPLE_D: + case Opcode::IMAGE_SAMPLE_CD: return IMAGE_SAMPLE(inst); - case Opcode::IMAGE_GATHER4_C: case Opcode::IMAGE_GATHER4_LZ: + case Opcode::IMAGE_GATHER4_C: + case Opcode::IMAGE_GATHER4_C_LZ: case Opcode::IMAGE_GATHER4_LZ_O: return IMAGE_GATHER(inst); case Opcode::IMAGE_ATOMIC_ADD: @@ -98,6 +100,8 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { return BUFFER_STORE(2, true, inst); case Opcode::TBUFFER_STORE_FORMAT_XYZ: return BUFFER_STORE(3, true, inst); + case Opcode::TBUFFER_STORE_FORMAT_XYZW: + return BUFFER_STORE(4, true, inst); case Opcode::BUFFER_STORE_DWORD: return BUFFER_STORE(1, false, inst); From ffd0f7b53ac40dc8cbb9ebad9e6f29ea71896f35 Mon Sep 17 00:00:00 2001 From: "Daniel R." <47796739+polybiusproxy@users.noreply.github.com> Date: Fri, 6 Sep 2024 23:01:00 +0200 Subject: [PATCH 04/11] core/libraries/save_data: Implement wildcard searches on `sceSaveDataDirNameSearch` (#817) * libraries/save_data: Implement wildcards and params * clang-format --- .../libraries/kernel/thread_management.cpp | 4 +- src/core/libraries/save_data/savedata.cpp | 67 +++++++++++++++---- src/sdl_window.cpp | 11 +-- 3 files changed, 63 insertions(+), 19 deletions(-) diff --git a/src/core/libraries/kernel/thread_management.cpp b/src/core/libraries/kernel/thread_management.cpp index 919afcb47..8f97ed879 100644 --- a/src/core/libraries/kernel/thread_management.cpp +++ b/src/core/libraries/kernel/thread_management.cpp @@ -295,7 +295,7 @@ ScePthread PS4_SYSV_ABI scePthreadSelf() { int PS4_SYSV_ABI scePthreadAttrSetaffinity(ScePthreadAttr* pattr, const /*SceKernelCpumask*/ u64 mask) { - LOG_INFO(Kernel_Pthread, "called"); + LOG_DEBUG(Kernel_Pthread, "called"); if (pattr == nullptr || *pattr == nullptr) { return SCE_KERNEL_ERROR_EINVAL; @@ -387,7 +387,7 @@ int PS4_SYSV_ABI posix_pthread_attr_setstacksize(ScePthreadAttr* attr, size_t st } int PS4_SYSV_ABI scePthreadSetaffinity(ScePthread thread, const /*SceKernelCpumask*/ u64 mask) { - LOG_INFO(Kernel_Pthread, "called"); + LOG_DEBUG(Kernel_Pthread, "called"); if (thread == nullptr) { return SCE_KERNEL_ERROR_ESRCH; diff --git a/src/core/libraries/save_data/savedata.cpp b/src/core/libraries/save_data/savedata.cpp index 959a75705..779c922e6 100644 --- a/src/core/libraries/save_data/savedata.cpp +++ b/src/core/libraries/save_data/savedata.cpp @@ -179,15 +179,21 @@ int PS4_SYSV_ABI sceSaveDataDeleteUser() { int PS4_SYSV_ABI sceSaveDataDirNameSearch(const OrbisSaveDataDirNameSearchCond* cond, OrbisSaveDataDirNameSearchResult* result) { - if (cond == nullptr) + if (cond == nullptr || result == nullptr) return ORBIS_SAVE_DATA_ERROR_PARAMETER; - LOG_INFO(Lib_SaveData, "called"); + LOG_INFO(Lib_SaveData, "Number of directories = {}", result->dirNamesNum); const auto& mount_dir = Common::FS::GetUserPath(Common::FS::PathType::SaveDataDir) / std::to_string(cond->userId) / game_serial; if (!mount_dir.empty() && std::filesystem::exists(mount_dir)) { - if (cond->dirName == nullptr || std::string_view(cond->dirName->data) - .empty()) { // look for all dirs if no dir is provided. - for (int i = 0; const auto& entry : std::filesystem::directory_iterator(mount_dir)) { + int maxDirNum = result->dirNamesNum; // Games set a maximum of directories to search for + int i = 0; + + if (cond->dirName == nullptr || std::string_view(cond->dirName->data).empty()) { + // Look for all dirs if no dir is provided. + for (const auto& entry : std::filesystem::directory_iterator(mount_dir)) { + if (i >= maxDirNum) + break; + if (std::filesystem::is_directory(entry.path()) && entry.path().filename().string() != "sdmemory") { // sceSaveDataDirNameSearch does not search for dataMemory1/2 dirs. @@ -199,13 +205,50 @@ int PS4_SYSV_ABI sceSaveDataDirNameSearch(const OrbisSaveDataDirNameSearchCond* result->setNum = i; } } - } else { // Need a game to test. - LOG_ERROR(Lib_SaveData, "Check Me. sceSaveDataDirNameSearch: dirName = {}", - cond->dirName->data); - strncpy(result->dirNames[0].data, cond->dirName->data, 32); - result->hitNum = 1; - result->dirNamesNum = 1; - result->setNum = 1; + } else { + // Game checks for a specific directory. + LOG_INFO(Lib_SaveData, "dirName = {}", cond->dirName->data); + + // Games can pass '%' as a wildcard + // e.g. `SAVELIST%` searches for all folders with names starting with `SAVELIST` + std::string baseName(cond->dirName->data); + u64 wildcardPos = baseName.find('%'); + if (wildcardPos != std::string::npos) { + baseName = baseName.substr(0, wildcardPos); + } + + for (const auto& entry : std::filesystem::directory_iterator(mount_dir)) { + if (i >= maxDirNum) + break; + + if (std::filesystem::is_directory(entry.path())) { + std::string dirName = entry.path().filename().string(); + + if (wildcardPos != std::string::npos) { + if (dirName.compare(0, baseName.size(), baseName) != 0) { + continue; + } + } else if (wildcardPos == std::string::npos && dirName != cond->dirName->data) { + continue; + } + + strncpy(result->dirNames[i].data, cond->dirName->data, 32); + + i++; + result->hitNum = i; + result->dirNamesNum = i; + result->setNum = i; + } + } + } + + if (result->params != nullptr) { + Common::FS::IOFile file(mount_dir / cond->dirName->data / "param.txt", + Common::FS::FileAccessMode::Read); + if (file.IsOpen()) { + file.ReadRaw((void*)result->params, sizeof(OrbisSaveDataParam)); + file.Close(); + } } } else { result->hitNum = 0; diff --git a/src/sdl_window.cpp b/src/sdl_window.cpp index b83afd299..31460d07c 100644 --- a/src/sdl_window.cpp +++ b/src/sdl_window.cpp @@ -194,11 +194,6 @@ void WindowSDL::onKeyPress(const SDL_Event* event) { ax = Input::GetAxis(-0x80, 0x80, axisvalue); break; case SDLK_S: - if (event->key.mod == SDL_KMOD_LCTRL) { - // Trigger rdoc capture - VideoCore::TriggerCapture(); - break; - } axis = Input::Axis::LeftY; if (event->type == SDL_EVENT_KEY_DOWN) { axisvalue += 127; @@ -287,6 +282,12 @@ void WindowSDL::onKeyPress(const SDL_Event* event) { } } break; + case SDLK_F12: + if (event->type == SDL_EVENT_KEY_DOWN) { + // Trigger rdoc capture + VideoCore::TriggerCapture(); + } + break; default: break; } From 649527a235609217c0216365220956e73a0a3a60 Mon Sep 17 00:00:00 2001 From: CrazyBloo Date: Fri, 6 Sep 2024 17:08:32 -0400 Subject: [PATCH 05/11] libSceRtc HLE (#697) * SetTick + GetTick, adding functions, checkvalid * format * more functions * format * implement lizardy's changes * fix linux build * various formatting improvements and fixes * fix sceRtcGetCurrentClockLocalTime, fixes sceRtcGetCurrentClockLocalTime using lizardy's suggestions. also implements various formatting improvements and logging changes * fix mac and linux builds, const for UNIX_EPOCH * fix ConvertUtcToLocalTime,RtcConvertLocalTimeToUtc * format rfc2822, format rfc3339 * format * GetDosTime, GetTime_t, GetWin32FileTime +various formatting improvements * sceRtcParseRFC3339, sceRtcParseDateTime --- src/core/libraries/kernel/libkernel.h | 2 + src/core/libraries/kernel/time_management.h | 7 +- src/core/libraries/rtc/rtc.cpp | 1130 ++++++++++++++++--- src/core/libraries/rtc/rtc.h | 112 +- src/core/libraries/rtc/rtc_error.h | 17 +- 5 files changed, 1095 insertions(+), 173 deletions(-) diff --git a/src/core/libraries/kernel/libkernel.h b/src/core/libraries/kernel/libkernel.h index c28a548ff..73705cdc2 100644 --- a/src/core/libraries/kernel/libkernel.h +++ b/src/core/libraries/kernel/libkernel.h @@ -33,6 +33,8 @@ typedef struct { } OrbisKernelUuid; int* PS4_SYSV_ABI __Error(); +int PS4_SYSV_ABI sceKernelConvertUtcToLocaltime(time_t time, time_t* local_time, + struct OrbisTimesec* st, unsigned long* dst_sec); int PS4_SYSV_ABI sceKernelGetCompiledSdkVersion(int* ver); void LibKernel_Register(Core::Loader::SymbolsResolver* sym); diff --git a/src/core/libraries/kernel/time_management.h b/src/core/libraries/kernel/time_management.h index a28f8c133..a28e6e558 100644 --- a/src/core/libraries/kernel/time_management.h +++ b/src/core/libraries/kernel/time_management.h @@ -3,6 +3,8 @@ #pragma once +#include + #include "common/types.h" namespace Core::Loader { @@ -50,7 +52,10 @@ u64 PS4_SYSV_ABI sceKernelGetProcessTime(); u64 PS4_SYSV_ABI sceKernelGetProcessTimeCounter(); u64 PS4_SYSV_ABI sceKernelGetProcessTimeCounterFrequency(); u64 PS4_SYSV_ABI sceKernelReadTsc(); - +int PS4_SYSV_ABI sceKernelClockGettime(s32 clock_id, OrbisKernelTimespec* tp); +s32 PS4_SYSV_ABI sceKernelGettimezone(OrbisKernelTimezone* tz); +int PS4_SYSV_ABI sceKernelConvertLocaltimeToUtc(time_t param_1, int64_t param_2, time_t* seconds, + OrbisKernelTimezone* timezone, int* dst_seconds); void timeSymbolsRegister(Core::Loader::SymbolsResolver* sym); } // namespace Libraries::Kernel diff --git a/src/core/libraries/rtc/rtc.cpp b/src/core/libraries/rtc/rtc.cpp index 387a8558b..7a46a1e31 100644 --- a/src/core/libraries/rtc/rtc.cpp +++ b/src/core/libraries/rtc/rtc.cpp @@ -5,156 +5,827 @@ #include "common/logging/log.h" #include "core/libraries/error_codes.h" +#include "core/libraries/kernel/libkernel.h" +#include "core/libraries/kernel/time_management.h" #include "core/libraries/libs.h" #include "rtc.h" #include "rtc_error.h" namespace Libraries::Rtc { -int PS4_SYSV_ABI sceRtcCheckValid() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcCheckValid(OrbisRtcDateTime* pTime) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTime == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + if (pTime->year == 0 || pTime->year > 9999) + return ORBIS_RTC_ERROR_INVALID_YEAR; + + if (pTime->month == 0 || pTime->month > 12) + return ORBIS_RTC_ERROR_INVALID_MONTH; + + if (pTime->day == 0) + return ORBIS_RTC_ERROR_INVALID_DAY; + + using namespace std::chrono; + year chronoYear = year(pTime->year); + month chronoMonth = month(pTime->month); + int lastDay = + static_cast(unsigned(year_month_day_last{chronoYear / chronoMonth / last}.day())); + + if (pTime->day > lastDay) + return ORBIS_RTC_ERROR_INVALID_DAY; + + if (pTime->hour >= 24) + return ORBIS_RTC_ERROR_INVALID_HOUR; + + if (pTime->minute >= 60) + return ORBIS_RTC_ERROR_INVALID_MINUTE; + + if (pTime->second >= 60) + return ORBIS_RTC_ERROR_INVALID_SECOND; + + if (pTime->microsecond >= 1000000) + return ORBIS_RTC_ERROR_INVALID_MICROSECOND; + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcCompareTick() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcCompareTick(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTick1 == nullptr || pTick2 == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + if (pTick1->tick <= pTick2->tick) + return 1; + else + return 0; + + return ORBIS_FAIL; } -int PS4_SYSV_ABI sceRtcConvertLocalTimeToUtc() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcConvertLocalTimeToUtc(OrbisRtcTick* pTickLocal, OrbisRtcTick* pTickUtc) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTickLocal == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + time_t seconds; + Kernel::OrbisKernelTimezone timezone; + + int convertValue = Kernel::sceKernelConvertLocaltimeToUtc( + (pTickLocal->tick - UNIX_EPOCH_TICKS) / 1000000, 0xffffffff, &seconds, &timezone, 0); + + if (convertValue >= 0) { + convertValue = sceRtcTickAddMinutes( + pTickUtc, pTickLocal, -(((timezone.tz_dsttime * 60) - timezone.tz_minuteswest))); + } + + return convertValue; } -int PS4_SYSV_ABI sceRtcConvertUtcToLocalTime() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcConvertUtcToLocalTime(OrbisRtcTick* pTickUtc, OrbisRtcTick* pTickLocal) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTickUtc == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + Kernel::OrbisKernelTimezone timeZone; + int returnValue = Kernel::sceKernelGettimezone(&timeZone); + + sceRtcTickAddMinutes(pTickLocal, pTickUtc, + -(timeZone.tz_minuteswest - (timeZone.tz_dsttime * 60))); + + return 0; } int PS4_SYSV_ABI sceRtcEnd() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; + return SCE_OK; } -int PS4_SYSV_ABI sceRtcFormatRFC2822() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcFormatRFC2822(char* pszDateTime, const OrbisRtcTick* pTickUtc, + int iTimeZoneMinutes) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pszDateTime == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + OrbisRtcTick formatTick; + + if (pTickUtc == nullptr) { + sceRtcGetCurrentTick(&formatTick); + } else { + formatTick.tick = pTickUtc->tick; + } + + sceRtcTickAddMinutes(&formatTick, &formatTick, iTimeZoneMinutes); + + OrbisRtcDateTime formatTime; + sceRtcSetTick(&formatTime, &formatTick); + + int validTime = sceRtcCheckValid(&formatTime); + + std::string formattedString; + + if (validTime >= 0) { + int weekDay = sceRtcGetDayOfWeek(formatTime.year, formatTime.month, formatTime.day); + switch (weekDay) { + case 0: + formattedString = "Sun, "; + break; + case 1: + formattedString = "Mon, "; + break; + case 2: + formattedString = "Tue, "; + break; + case 3: + formattedString = "Wed, "; + break; + case 4: + formattedString = "Thu, "; + break; + case 5: + formattedString = "Fri, "; + break; + case 6: + formattedString = "Sat, "; + break; + } + + if (formatTime.day < 10) { + formattedString += "0" + std::to_string(formatTime.day) + " "; + } else { + formattedString += std::to_string(formatTime.day) + " "; + } + + switch (formatTime.month) { + case 1: + formattedString += "Jan "; + break; + case 2: + formattedString += "Feb "; + break; + case 3: + formattedString += "Mar "; + break; + case 4: + formattedString += "Apr "; + break; + case 5: + formattedString += "May "; + break; + case 6: + formattedString += "Jun "; + break; + case 7: + formattedString += "Jul "; + break; + case 8: + formattedString += "Aug "; + break; + case 9: + formattedString += "Sep "; + break; + case 10: + formattedString += "Oct "; + break; + case 11: + formattedString += "Nov "; + break; + case 12: + formattedString += "Dec "; + break; + } + + formattedString += std::to_string(formatTime.year) + " "; + + if (formatTime.hour < 10) { + formattedString += "0" + std::to_string(formatTime.hour) + ":"; + } else { + formattedString += std::to_string(formatTime.hour) + ":"; + } + + if (formatTime.minute < 10) { + formattedString += "0" + std::to_string(formatTime.minute) + ":"; + } else { + formattedString += std::to_string(formatTime.minute) + ":"; + } + + if (formatTime.second < 10) { + formattedString += "0" + std::to_string(formatTime.second) + " "; + } else { + formattedString += std::to_string(formatTime.second) + " "; + } + + if (iTimeZoneMinutes == 0) { + formattedString += "+0000"; + } else { + int timeZoneHours = iTimeZoneMinutes / 60; + int timeZoneRemainder = iTimeZoneMinutes % 60; + + if (timeZoneHours < 0) { + formattedString += "-"; + timeZoneHours *= -1; + } else { + formattedString += "+"; + } + + if (timeZoneHours < 10) { + formattedString += "0" + std::to_string(timeZoneHours); + } else { + formattedString += std::to_string(timeZoneHours); + } + + if (timeZoneRemainder == 0) { + formattedString += "00"; + } else { + if (timeZoneRemainder < 0) + timeZoneRemainder *= -1; + formattedString += std::to_string(timeZoneRemainder); + } + } + + for (int i = 0; i < formattedString.size() + 1; ++i) { + pszDateTime[i] = formattedString.c_str()[i]; + } + } + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcFormatRFC2822LocalTime() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcFormatRFC2822LocalTime(char* pszDateTime, const OrbisRtcTick* pTickUtc) { + LOG_TRACE(Lib_Rtc, "called"); + + Kernel::OrbisKernelTimezone timeZone; + Kernel::sceKernelGettimezone(&timeZone); + + return sceRtcFormatRFC2822(pszDateTime, pTickUtc, + -(timeZone.tz_minuteswest - (timeZone.tz_dsttime * 60))); } -int PS4_SYSV_ABI sceRtcFormatRFC3339() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcFormatRFC3339(char* pszDateTime, const OrbisRtcTick* pTickUtc, + int iTimeZoneMinutes) { + LOG_TRACE(Lib_Rtc, "called"); + return sceRtcFormatRFC3339Precise(pszDateTime, pTickUtc, iTimeZoneMinutes); } -int PS4_SYSV_ABI sceRtcFormatRFC3339LocalTime() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcFormatRFC3339LocalTime(char* pszDateTime, const OrbisRtcTick* pTickUtc) { + LOG_TRACE(Lib_Rtc, "called"); + + Kernel::OrbisKernelTimezone timeZone; + Kernel::sceKernelGettimezone(&timeZone); + + return sceRtcFormatRFC3339(pszDateTime, pTickUtc, + -(timeZone.tz_minuteswest - (timeZone.tz_dsttime * 60))); } -int PS4_SYSV_ABI sceRtcFormatRFC3339Precise() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcFormatRFC3339Precise(char* pszDateTime, const OrbisRtcTick* pTickUtc, + int iTimeZoneMinutes) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pszDateTime == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + OrbisRtcTick formatTick; + + if (pTickUtc == nullptr) { + sceRtcGetCurrentTick(&formatTick); + } else { + formatTick.tick = pTickUtc->tick; + } + + sceRtcTickAddMinutes(&formatTick, &formatTick, iTimeZoneMinutes); + + OrbisRtcDateTime formatTime; + + sceRtcSetTick(&formatTime, &formatTick); + + std::string formattedString; + formattedString = std::to_string(formatTime.year) + "-"; + + if (formatTime.month < 10) { + formattedString += "0" + std::to_string(formatTime.month) + "-"; + } else { + formattedString += std::to_string(formatTime.month) + "-"; + } + + if (formatTime.day < 10) { + formattedString += "0" + std::to_string(formatTime.day) + "T"; + } else { + formattedString += std::to_string(formatTime.day) + "T"; + } + + if (formatTime.hour < 10) { + formattedString += "0" + std::to_string(formatTime.hour) + ":"; + } else { + formattedString += std::to_string(formatTime.hour) + ":"; + } + + if (formatTime.minute < 10) { + formattedString += "0" + std::to_string(formatTime.minute) + ":"; + } else { + formattedString += std::to_string(formatTime.minute) + ":"; + } + + if (formatTime.second < 10) { + formattedString += "0" + std::to_string(formatTime.second); + } else { + formattedString += std::to_string(formatTime.second); + } + + if (formatTime.microsecond != 0) { + formattedString += "." + std::to_string(formatTime.microsecond / 1000).substr(0, 2); + } else { + formattedString += ".00"; + } + + if (iTimeZoneMinutes == 0) { + formattedString += "Z"; + } else { + int timeZoneHours = iTimeZoneMinutes / 60; + int timeZoneRemainder = iTimeZoneMinutes % 60; + + if (timeZoneHours < 0) { + formattedString += "-"; + timeZoneHours *= -1; + } else { + formattedString += "+"; + } + + if (timeZoneHours < 10) { + formattedString += "0" + std::to_string(timeZoneHours); + } else { + formattedString += std::to_string(timeZoneHours); + } + + if (timeZoneRemainder == 0) { + formattedString += ":00"; + } else { + if (timeZoneRemainder < 0) + timeZoneRemainder *= -1; + formattedString += ":" + std::to_string(timeZoneRemainder); + } + } + + for (int i = 0; i < formattedString.size() + 1; ++i) { + pszDateTime[i] = formattedString.c_str()[i]; + } + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcFormatRFC3339PreciseLocalTime() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcFormatRFC3339PreciseLocalTime(char* pszDateTime, + const OrbisRtcTick* pTickUtc) { + LOG_TRACE(Lib_Rtc, "called"); + + Kernel::OrbisKernelTimezone timeZone; + Kernel::sceKernelGettimezone(&timeZone); + + return sceRtcFormatRFC3339Precise(pszDateTime, pTickUtc, + -(timeZone.tz_minuteswest - (timeZone.tz_dsttime * 60))); } -int PS4_SYSV_ABI sceRtcGetCurrentAdNetworkTick() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcGetCurrentAdNetworkTick(OrbisRtcTick* pTick) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTick == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + Kernel::OrbisKernelTimespec clocktime; + int returnValue = Kernel::sceKernelClockGettime(Kernel::ORBIS_CLOCK_REALTIME, &clocktime); + + if (returnValue == SCE_OK) { + pTick->tick = clocktime.tv_nsec / 1000 + clocktime.tv_sec * 1000000 + UNIX_EPOCH_TICKS; + } else { + return ORBIS_RTC_ERROR_NOT_INITIALIZED; + } + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcGetCurrentClock() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcGetCurrentClock(OrbisRtcDateTime* pTime, int timeZone) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTime == nullptr) + return ORBIS_RTC_ERROR_DATETIME_UNINITIALIZED; + + Kernel::OrbisKernelTimespec clocktime; + int returnValue = Kernel::sceKernelClockGettime(Kernel::ORBIS_CLOCK_REALTIME, &clocktime); + + if (returnValue == SCE_OK) { + OrbisRtcTick clockTick; + clockTick.tick = clocktime.tv_nsec / 1000 + clocktime.tv_sec * 1000000 + UNIX_EPOCH_TICKS; + + sceRtcTickAddMinutes(&clockTick, &clockTick, timeZone); + sceRtcSetTick(pTime, &clockTick); + } + + return returnValue; } -int PS4_SYSV_ABI sceRtcGetCurrentClockLocalTime() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcGetCurrentClockLocalTime(OrbisRtcDateTime* pTime) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTime == nullptr) + return ORBIS_RTC_ERROR_DATETIME_UNINITIALIZED; + + Kernel::OrbisKernelTimezone timeZone; + int returnValue = Kernel::sceKernelGettimezone(&timeZone); + + if (returnValue >= 0) { + Kernel::OrbisKernelTimespec clocktime; + + // calculate total timezone offset for converting UTC to local time + uint64_t tzOffset = -(timeZone.tz_minuteswest - (timeZone.tz_dsttime * 60)); + + if (returnValue >= 0) { + OrbisRtcTick newTick; + sceRtcGetCurrentTick(&newTick); + sceRtcTickAddMinutes(&newTick, &newTick, tzOffset); + sceRtcSetTick(pTime, &newTick); + } + } + + return returnValue; } -int PS4_SYSV_ABI sceRtcGetCurrentDebugNetworkTick() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcGetCurrentDebugNetworkTick(OrbisRtcTick* pTick) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTick == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + Kernel::OrbisKernelTimespec clocktime; + int returnValue = Kernel::sceKernelClockGettime(Kernel::ORBIS_CLOCK_REALTIME, &clocktime); + + if (returnValue == SCE_OK) { + pTick->tick = clocktime.tv_nsec / 1000 + clocktime.tv_sec * 1000000 + UNIX_EPOCH_TICKS; + } else { + return ORBIS_RTC_ERROR_NOT_INITIALIZED; + } + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcGetCurrentNetworkTick() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcGetCurrentNetworkTick(OrbisRtcTick* pTick) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTick == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + Kernel::OrbisKernelTimespec clocktime; + int returnValue = Kernel::sceKernelClockGettime(Kernel::ORBIS_CLOCK_REALTIME, &clocktime); + + if (returnValue == SCE_OK) { + pTick->tick = clocktime.tv_nsec / 1000 + clocktime.tv_sec * 1000000 + UNIX_EPOCH_TICKS; + } else { + return ORBIS_RTC_ERROR_NOT_INITIALIZED; + } + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcGetCurrentRawNetworkTick() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcGetCurrentRawNetworkTick(OrbisRtcTick* pTick) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTick == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + Kernel::OrbisKernelTimespec clocktime; + int returnValue = Kernel::sceKernelClockGettime(Kernel::ORBIS_CLOCK_REALTIME, &clocktime); + + if (returnValue == SCE_OK) { + pTick->tick = clocktime.tv_nsec / 1000 + clocktime.tv_sec * 1000000 + UNIX_EPOCH_TICKS; + } else { + return ORBIS_RTC_ERROR_NOT_INITIALIZED; + } + + return SCE_OK; } int PS4_SYSV_ABI sceRtcGetCurrentTick(OrbisRtcTick* pTick) { - pTick->tick = std::chrono::duration_cast( - std::chrono::high_resolution_clock::now().time_since_epoch()) - .count(); - return ORBIS_OK; + LOG_TRACE(Lib_Rtc, "called"); + + if (pTick == nullptr) + return ORBIS_RTC_ERROR_DATETIME_UNINITIALIZED; + + Kernel::OrbisKernelTimespec clocktime; + int returnValue = Kernel::sceKernelClockGettime(Kernel::ORBIS_CLOCK_REALTIME, &clocktime); + + if (returnValue >= 0) { + pTick->tick = clocktime.tv_nsec / 1000 + clocktime.tv_sec * 1000000 + UNIX_EPOCH_TICKS; + } + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcGetDayOfWeek() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcGetDayOfWeek(int year, int month, int day) { + LOG_TRACE(Lib_Rtc, "called"); + + int sdk_version = 0; + int sdkResult = Kernel::sceKernelGetCompiledSdkVersion(&sdk_version); + if (sdkResult != ORBIS_OK) { + sdk_version = 0; + } + + if (sdk_version < 0x3000000) { + if (year < 1) { + return ORBIS_RTC_ERROR_INVALID_YEAR; + } + if (month > 12 || month <= 0) { + return ORBIS_RTC_ERROR_INVALID_MONTH; + } + } else { + if (year > 9999 || year < 1) { + return ORBIS_RTC_ERROR_INVALID_YEAR; + } + if (month > 12 || month <= 0) { + return ORBIS_RTC_ERROR_INVALID_MONTH; + } + } + + int daysInMonth = sceRtcGetDaysInMonth(year, month); + + if (day <= 0 || day > daysInMonth) + return ORBIS_RTC_ERROR_INVALID_DAY; + + std::chrono::sys_days chrono_time{std::chrono::year(year) / std::chrono::month(month) / + std::chrono::day(day)}; + std::chrono::weekday chrono_weekday{chrono_time}; + + return chrono_weekday.c_encoding(); } -int PS4_SYSV_ABI sceRtcGetDaysInMonth() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcGetDaysInMonth(int year, int month) { + LOG_TRACE(Lib_Rtc, "called"); + + if (year <= 0) + return ORBIS_RTC_ERROR_INVALID_YEAR; + + if (month <= 0 || month > 12) + return ORBIS_RTC_ERROR_INVALID_MONTH; + + std::chrono::year chronoYear = std::chrono::year(year); + std::chrono::month chronoMonth = std::chrono::month(month); + int lastDay = static_cast(unsigned( + std::chrono::year_month_day_last{chronoYear / chronoMonth / std::chrono::last}.day())); + + return lastDay; } -int PS4_SYSV_ABI sceRtcGetDosTime() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcGetDosTime(OrbisRtcDateTime* pTime, unsigned int* dosTime) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTime == nullptr || dosTime == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + int isValid = sceRtcCheckValid(pTime); + if (isValid != SCE_OK) { + return isValid; + } + + *dosTime |= (pTime->second / 2) & 0x1F; + *dosTime |= (pTime->minute & 0x3F) << 5; + *dosTime |= (pTime->hour & 0x1F) << 11; + *dosTime |= (pTime->day & 0x1F) << 16; + *dosTime |= (pTime->month & 0x0F) << 21; + *dosTime |= ((pTime->year - 1980) & 0x7F) << 25; + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcGetTick() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcGetTick(OrbisRtcDateTime* pTime, OrbisRtcTick* pTick) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTime == nullptr || pTick == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + int isTimeValid = sceRtcCheckValid(pTime); + if (isTimeValid != 0) + return isTimeValid; + + if (pTime->month > 2) { + pTime->month -= 3; + } else { + pTime->month += 9; + pTime->year -= 1; + } + + int c = pTime->year / 100; + int ya = pTime->year - 100 * c; + + u64 days; + u64 msec; + + days = ((146097 * c) >> 2) + ((1461 * ya) >> 2) + (153 * pTime->month + 2) / 5 + pTime->day; + days -= 307; + days *= 86400000000; + + msec = pTime->hour * 3600000000 + pTime->minute * 60000000 + pTime->second * 1000000 + + pTime->microsecond; + + pTick->tick = days + msec; + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcGetTickResolution() { +unsigned int PS4_SYSV_ABI sceRtcGetTickResolution() { + LOG_TRACE(Lib_Rtc, "called"); + return 1000000; } -int PS4_SYSV_ABI sceRtcGetTime_t() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcGetTime_t(OrbisRtcDateTime* pTime, time_t* llTime) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTime == nullptr || llTime == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + int isValid = sceRtcCheckValid(pTime); + if (isValid != SCE_OK) { + return isValid; + } + + OrbisRtcTick timeTick; + sceRtcGetTick(pTime, &timeTick); + + if (timeTick.tick < UNIX_EPOCH_TICKS) { + *llTime = 0; + } else { + *llTime = (timeTick.tick - UNIX_EPOCH_TICKS) / 1000000; + } + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcGetWin32FileTime() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcGetWin32FileTime(OrbisRtcDateTime* pTime, uint64_t* ulWin32Time) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTime == nullptr || ulWin32Time == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + int isValid = sceRtcCheckValid(pTime); + if (isValid != SCE_OK) { + return isValid; + } + + OrbisRtcTick timeTick; + sceRtcGetTick(pTime, &timeTick); + + if (timeTick.tick < WIN32_FILETIME_EPOCH_TICKS) { + *ulWin32Time = 0; + } else { + *ulWin32Time = (timeTick.tick - WIN32_FILETIME_EPOCH_TICKS) * 10; + } + + return SCE_OK; } int PS4_SYSV_ABI sceRtcInit() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; + return SCE_OK; } -int PS4_SYSV_ABI sceRtcIsLeapYear() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcIsLeapYear(int yearInt) { + LOG_TRACE(Lib_Rtc, "called"); + + if (yearInt < 1) + return ORBIS_RTC_ERROR_INVALID_YEAR; + + using namespace std::chrono; + + year_month_day_last ymdl{year(yearInt) / February / last}; + return (ymdl.day() == 29d); } -int PS4_SYSV_ABI sceRtcParseDateTime() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int GetMonthFromString(std::string monthStr) { + if (monthStr == "Jan") + return 1; + + if (monthStr == "Feb") + return 2; + + if (monthStr == "Mar") + return 3; + + if (monthStr == "Apr") + return 4; + + if (monthStr == "May") + return 5; + + if (monthStr == "Jun") + return 6; + + if (monthStr == "Jul") + return 7; + + if (monthStr == "Aug") + return 8; + + if (monthStr == "Sep") + return 9; + + if (monthStr == "Oct") + return 10; + + if (monthStr == "Nov") + return 11; + + if (monthStr == "Dec") + return 12; + + return 1; } -int PS4_SYSV_ABI sceRtcParseRFC3339() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcParseDateTime(OrbisRtcTick* pTickUtc, const char* pszDateTime) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTickUtc == nullptr || pszDateTime == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + std::string dateTimeString = std::string(pszDateTime); + + char formatKey = dateTimeString[22]; + OrbisRtcDateTime dateTime; + + if (formatKey == 'Z' || formatKey == '-' || formatKey == '+') { + // RFC3339 + sceRtcParseRFC3339(pTickUtc, pszDateTime); + } else if (formatKey == ':') { + // RFC2822 + dateTime.day = std::stoi(dateTimeString.substr(5, 2)); + dateTime.month = GetMonthFromString(dateTimeString.substr(8, 3)); + dateTime.year = std::stoi(dateTimeString.substr(12, 4)); + dateTime.hour = std::stoi(dateTimeString.substr(17, 2)); + dateTime.minute = std::stoi(dateTimeString.substr(20, 2)); + dateTime.second = std::stoi(dateTimeString.substr(23, 2)); + dateTime.microsecond = 0; + + sceRtcGetTick(&dateTime, pTickUtc); + + if (dateTimeString[26] == '+') { + int timeZoneOffset = std::stoi(dateTimeString.substr(27, 2)) * 60; + timeZoneOffset += std::stoi(dateTimeString.substr(29, 2)); + sceRtcTickAddMinutes(pTickUtc, pTickUtc, timeZoneOffset); + } else if (dateTimeString[26] == '-') { + int timeZoneOffset = std::stoi(dateTimeString.substr(27, 2)) * 60; + timeZoneOffset += std::stoi(dateTimeString.substr(29, 2)); + timeZoneOffset *= -1; + sceRtcTickAddMinutes(pTickUtc, pTickUtc, timeZoneOffset); + } + + } else { + // asctime + dateTime.month = GetMonthFromString(dateTimeString.substr(4, 3)); + dateTime.day = std::stoi(dateTimeString.substr(8, 2)); + dateTime.hour = std::stoi(dateTimeString.substr(11, 2)); + dateTime.minute = std::stoi(dateTimeString.substr(14, 2)); + dateTime.second = std::stoi(dateTimeString.substr(17, 2)); + dateTime.year = std::stoi(dateTimeString.substr(20, 4)); + dateTime.microsecond = 0; + + sceRtcGetTick(&dateTime, pTickUtc); + } + + return SCE_OK; +} + +int PS4_SYSV_ABI sceRtcParseRFC3339(OrbisRtcTick* pTickUtc, const char* pszDateTime) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTickUtc == nullptr || pszDateTime == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + std::string dateTimeString = std::string(pszDateTime); + + OrbisRtcDateTime dateTime; + dateTime.year = std::stoi(dateTimeString.substr(0, 4)); + dateTime.month = std::stoi(dateTimeString.substr(5, 2)); + dateTime.day = std::stoi(dateTimeString.substr(8, 2)); + dateTime.hour = std::stoi(dateTimeString.substr(11, 2)); + dateTime.minute = std::stoi(dateTimeString.substr(14, 2)); + dateTime.second = std::stoi(dateTimeString.substr(17, 2)); + dateTime.microsecond = std::stoi(dateTimeString.substr(20, 2)); + + sceRtcGetTick(&dateTime, pTickUtc); + + if (dateTimeString[22] != 'Z') { + if (dateTimeString[22] == '-') { + int timeZoneOffset = std::stoi(dateTimeString.substr(23, 2)) * 60; + timeZoneOffset += std::stoi(dateTimeString.substr(26, 2)); + timeZoneOffset *= -1; + sceRtcTickAddMinutes(pTickUtc, pTickUtc, timeZoneOffset); + } else if (dateTimeString[22] == '+') { + int timeZoneOffset = std::stoi(dateTimeString.substr(23, 2)) * 60; + timeZoneOffset += std::stoi(dateTimeString.substr(26, 2)); + sceRtcTickAddMinutes(pTickUtc, pTickUtc, timeZoneOffset); + } + } + + return SCE_OK; } int PS4_SYSV_ABI sceRtcSetConf() { @@ -162,89 +833,294 @@ int PS4_SYSV_ABI sceRtcSetConf() { return ORBIS_OK; } -int PS4_SYSV_ABI sceRtcSetCurrentAdNetworkTick() { +int PS4_SYSV_ABI sceRtcSetCurrentAdNetworkTick(OrbisRtcTick* pTick) { LOG_ERROR(Lib_Rtc, "(STUBBED) called"); return ORBIS_OK; } -int PS4_SYSV_ABI sceRtcSetCurrentDebugNetworkTick() { +int PS4_SYSV_ABI sceRtcSetCurrentDebugNetworkTick(OrbisRtcTick* pTick) { LOG_ERROR(Lib_Rtc, "(STUBBED) called"); return ORBIS_OK; } -int PS4_SYSV_ABI sceRtcSetCurrentNetworkTick() { +int PS4_SYSV_ABI sceRtcSetCurrentNetworkTick(OrbisRtcTick* pTick) { LOG_ERROR(Lib_Rtc, "(STUBBED) called"); return ORBIS_OK; } -int PS4_SYSV_ABI sceRtcSetCurrentTick() { +int PS4_SYSV_ABI sceRtcSetCurrentTick(OrbisRtcTick* pTick) { LOG_ERROR(Lib_Rtc, "(STUBBED) called"); return ORBIS_OK; } -int PS4_SYSV_ABI sceRtcSetDosTime() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcSetDosTime(OrbisRtcDateTime* pTime, u32 dosTime) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTime == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + pTime->microsecond = 0; + pTime->second = (dosTime << 1) & 0x3e; + pTime->minute = (dosTime >> 5) & 0x3f; + pTime->hour = (dosTime & 0xf800) >> 0xb; + + int16_t days = dosTime >> 0x10; + + pTime->day = days & 0x1f; + pTime->month = (days >> 5) & 0x0f; + pTime->year = (days >> 9) + 1980; + return SCE_OK; } -int PS4_SYSV_ABI sceRtcSetTick() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcSetTick(OrbisRtcDateTime* pTime, OrbisRtcTick* pTick) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTime == nullptr || pTick == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + u32 ly, ld, lm, j; + u64 days, msec; + + days = pTick->tick / 86400000000; + msec = pTick->tick % 86400000000; + + days += 307; + + j = (days << 2) - 1; + ly = j / 146097; + + j -= (146097 * ly); + ld = j >> 2; + + j = ((ld << 2) + 3) / 1461; + ld = (((ld << 2) + 7) - 1461 * j) >> 2; + + lm = (5 * ld - 3) / 153; + ld = (5 * ld + 2 - 153 * lm) / 5; + ly = 100 * ly + j; + + if (lm < 10) { + lm += 3; + } else { + lm -= 9; + ly++; + } + + pTime->year = ly; + pTime->month = lm; + pTime->day = ld; + + pTime->hour = msec / 3600000000; + msec %= 3600000000; + pTime->minute = msec / 60000000; + msec %= 60000000; + pTime->second = msec / 1000000; + msec %= 1000000; + pTime->microsecond = msec; + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcSetTime_t() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcSetTime_t(OrbisRtcDateTime* pTime, time_t llTime) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTime == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + int sdk_version; + int sdkResult = Kernel::sceKernelGetCompiledSdkVersion(&sdk_version); + if (sdkResult != ORBIS_OK) { + sdk_version = 0; + } + + OrbisRtcTick newTick; + if (sdk_version < 0x3000000) { + newTick.tick = (llTime & 0xffffffff) * 1000000; + } else { + if (llTime < 0) { + return ORBIS_RTC_ERROR_INVALID_VALUE; + } + newTick.tick = llTime * 1000000; + } + + newTick.tick += UNIX_EPOCH_TICKS; + sceRtcSetTick(pTime, &newTick); + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcSetWin32FileTime() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcSetWin32FileTime(OrbisRtcDateTime* pTime, int64_t ulWin32Time) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTime == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + u64 convertedTime = (ulWin32Time / 10) + WIN32_FILETIME_EPOCH_TICKS; + + OrbisRtcTick convertedTick; + convertedTick.tick = convertedTime; + + sceRtcSetTick(pTime, &convertedTick); + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcTickAddDays() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcTickAddDays(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int32_t lAdd) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTick1 == nullptr || pTick2 == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + pTick1->tick = (lAdd * 86400000000) + pTick2->tick; + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcTickAddHours() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcTickAddHours(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int32_t lAdd) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTick1 == nullptr || pTick2 == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + pTick1->tick = (lAdd * 3600000000) + pTick2->tick; + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcTickAddMicroseconds() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcTickAddMicroseconds(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, + int64_t lAdd) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTick1 == nullptr || pTick2 == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + pTick1->tick = lAdd + pTick2->tick; + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcTickAddMinutes() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcTickAddMinutes(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int64_t lAdd) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTick1 == nullptr || pTick2 == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + pTick1->tick = (lAdd * 60000000) + pTick2->tick; + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcTickAddMonths() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcTickAddMonths(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int32_t lAdd) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTick1 == nullptr || pTick2 == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + if (lAdd == 0) { + pTick1->tick = pTick2->tick; + return SCE_OK; + } + + OrbisRtcDateTime time; + s64 s; + s64 tempMonth; + + sceRtcSetTick(&time, pTick1); + + if (lAdd >= 0) { + s = 1; + } else { + s = -1; + } + + time.year += (lAdd / 12); + tempMonth = time.month + (lAdd % 12) - 1; + + if (tempMonth > 11 || tempMonth < 0) { + tempMonth -= s * 12; + time.year += s; + } + + time.month = tempMonth + 1; + + using namespace std::chrono; + year chronoYear = year(time.year); + month chronoMonth = month(time.month); + int lastDay = + static_cast(unsigned(year_month_day_last{chronoYear / chronoMonth / last}.day())); + + if (time.day > lastDay) { + time.day = lastDay; + } + + int timeIsValid = sceRtcCheckValid(&time); + if (timeIsValid == SCE_OK) { + sceRtcGetTick(&time, pTick1); + } else { + return timeIsValid; + } + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcTickAddSeconds() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcTickAddSeconds(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int64_t lAdd) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTick1 == nullptr || pTick2 == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + pTick1->tick = (lAdd * 1000000) + pTick2->tick; + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcTickAddTicks() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcTickAddTicks(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int64_t lAdd) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTick1 == nullptr || pTick2 == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + pTick1->tick = lAdd + pTick2->tick; + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcTickAddWeeks() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcTickAddWeeks(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int32_t lAdd) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTick1 == nullptr || pTick2 == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + pTick1->tick = (lAdd * 604800000000) + pTick2->tick; + + return SCE_OK; } -int PS4_SYSV_ABI sceRtcTickAddYears() { - LOG_ERROR(Lib_Rtc, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceRtcTickAddYears(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int32_t lAdd) { + LOG_TRACE(Lib_Rtc, "called"); + + if (pTick1 == nullptr || pTick2 == nullptr) + return ORBIS_RTC_ERROR_INVALID_POINTER; + + OrbisRtcDateTime time; + + if (lAdd == 0) { + pTick1->tick = pTick2->tick; + return SCE_OK; + } + + sceRtcSetTick(&time, pTick1); + + time.year += lAdd; + + int timeIsValid = sceRtcCheckValid(&time); + if (timeIsValid == SCE_OK) { + sceRtcGetTick(&time, pTick1); + } else { + return timeIsValid; + } + + return SCE_OK; } void RegisterlibSceRtc(Core::Loader::SymbolsResolver* sym) { diff --git a/src/core/libraries/rtc/rtc.h b/src/core/libraries/rtc/rtc.h index ee6afa70e..c41040863 100644 --- a/src/core/libraries/rtc/rtc.h +++ b/src/core/libraries/rtc/rtc.h @@ -11,57 +11,81 @@ class SymbolsResolver; namespace Libraries::Rtc { +constexpr int ORBIS_RTC_DAYOFWEEK_SUNDAY = 0; +constexpr int ORBIS_RTC_DAYOFWEEK_MONDAY = 1; +constexpr int ORBIS_RTC_DAYOFWEEK_TUESDAY = 2; +constexpr int ORBIS_RTC_DAYOFWEEK_WEDNESDAY = 3; +constexpr int ORBIS_RTC_DAYOFWEEK_THURSDAY = 4; +constexpr int ORBIS_RTC_DAYOFWEEK_FRIDAY = 5; +constexpr int ORBIS_RTC_DAYOFWEEK_SATURDAY = 6; + +constexpr int64_t UNIX_EPOCH_TICKS = 0xdcbffeff2bc000; +constexpr int64_t WIN32_FILETIME_EPOCH_TICKS = 0xb36168b6a58000; + struct OrbisRtcTick { - u64 tick; + uint64_t tick; }; -int PS4_SYSV_ABI sceRtcCheckValid(); -int PS4_SYSV_ABI sceRtcCompareTick(); -int PS4_SYSV_ABI sceRtcConvertLocalTimeToUtc(); -int PS4_SYSV_ABI sceRtcConvertUtcToLocalTime(); +struct OrbisRtcDateTime { + uint16_t year; + uint16_t month; + uint16_t day; + uint16_t hour; + uint16_t minute; + uint16_t second; + uint32_t microsecond; +}; + +int PS4_SYSV_ABI sceRtcCheckValid(OrbisRtcDateTime* pTime); +int PS4_SYSV_ABI sceRtcCompareTick(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2); +int PS4_SYSV_ABI sceRtcConvertLocalTimeToUtc(OrbisRtcTick* pTickLocal, OrbisRtcTick* pTickUtc); +int PS4_SYSV_ABI sceRtcConvertUtcToLocalTime(OrbisRtcTick* pTickUtc, OrbisRtcTick* pTickLocal); int PS4_SYSV_ABI sceRtcEnd(); -int PS4_SYSV_ABI sceRtcFormatRFC2822(); -int PS4_SYSV_ABI sceRtcFormatRFC2822LocalTime(); -int PS4_SYSV_ABI sceRtcFormatRFC3339(); -int PS4_SYSV_ABI sceRtcFormatRFC3339LocalTime(); -int PS4_SYSV_ABI sceRtcFormatRFC3339Precise(); -int PS4_SYSV_ABI sceRtcFormatRFC3339PreciseLocalTime(); -int PS4_SYSV_ABI sceRtcGetCurrentAdNetworkTick(); -int PS4_SYSV_ABI sceRtcGetCurrentClock(); -int PS4_SYSV_ABI sceRtcGetCurrentClockLocalTime(); -int PS4_SYSV_ABI sceRtcGetCurrentDebugNetworkTick(); -int PS4_SYSV_ABI sceRtcGetCurrentNetworkTick(); -int PS4_SYSV_ABI sceRtcGetCurrentRawNetworkTick(); +int PS4_SYSV_ABI sceRtcFormatRFC2822(char* pszDateTime, const OrbisRtcTick* pTickUtc, int minutes); +int PS4_SYSV_ABI sceRtcFormatRFC2822LocalTime(char* pszDateTime, const OrbisRtcTick* pTickUtc); +int PS4_SYSV_ABI sceRtcFormatRFC3339(char* pszDateTime, const OrbisRtcTick* pTickUtc, int minutes); +int PS4_SYSV_ABI sceRtcFormatRFC3339LocalTime(char* pszDateTime, const OrbisRtcTick* pTickUtc); +int PS4_SYSV_ABI sceRtcFormatRFC3339Precise(char* pszDateTime, const OrbisRtcTick* pTickUtc, + int minutes); +int PS4_SYSV_ABI sceRtcFormatRFC3339PreciseLocalTime(char* pszDateTime, + const OrbisRtcTick* pTickUtc); +int PS4_SYSV_ABI sceRtcGetCurrentAdNetworkTick(OrbisRtcTick* pTick); +int PS4_SYSV_ABI sceRtcGetCurrentClock(OrbisRtcDateTime* pTime, int timeZone); +int PS4_SYSV_ABI sceRtcGetCurrentClockLocalTime(OrbisRtcDateTime* pTime); +int PS4_SYSV_ABI sceRtcGetCurrentDebugNetworkTick(OrbisRtcTick* pTick); +int PS4_SYSV_ABI sceRtcGetCurrentNetworkTick(OrbisRtcTick* pTick); +int PS4_SYSV_ABI sceRtcGetCurrentRawNetworkTick(OrbisRtcTick* pTick); int PS4_SYSV_ABI sceRtcGetCurrentTick(OrbisRtcTick* pTick); -int PS4_SYSV_ABI sceRtcGetDayOfWeek(); -int PS4_SYSV_ABI sceRtcGetDaysInMonth(); -int PS4_SYSV_ABI sceRtcGetDosTime(); -int PS4_SYSV_ABI sceRtcGetTick(); -int PS4_SYSV_ABI sceRtcGetTickResolution(); -int PS4_SYSV_ABI sceRtcGetTime_t(); -int PS4_SYSV_ABI sceRtcGetWin32FileTime(); +int PS4_SYSV_ABI sceRtcGetDayOfWeek(int year, int month, int day); +int PS4_SYSV_ABI sceRtcGetDaysInMonth(int year, int month); +int PS4_SYSV_ABI sceRtcGetDosTime(OrbisRtcDateTime* pTime, unsigned int* dosTime); +int PS4_SYSV_ABI sceRtcGetTick(OrbisRtcDateTime* pTime, OrbisRtcTick* pTick); +unsigned int PS4_SYSV_ABI sceRtcGetTickResolution(); +int PS4_SYSV_ABI sceRtcGetTime_t(OrbisRtcDateTime* pTime, time_t* llTime); +int PS4_SYSV_ABI sceRtcGetWin32FileTime(OrbisRtcDateTime* pTime, uint64_t* ulWin32Time); int PS4_SYSV_ABI sceRtcInit(); -int PS4_SYSV_ABI sceRtcIsLeapYear(); -int PS4_SYSV_ABI sceRtcParseDateTime(); -int PS4_SYSV_ABI sceRtcParseRFC3339(); +int PS4_SYSV_ABI sceRtcIsLeapYear(int yearInt); +int PS4_SYSV_ABI sceRtcParseDateTime(OrbisRtcTick* pTickUtc, const char* pszDateTime); +int PS4_SYSV_ABI sceRtcParseRFC3339(OrbisRtcTick* pTickUtc, const char* pszDateTime); int PS4_SYSV_ABI sceRtcSetConf(); -int PS4_SYSV_ABI sceRtcSetCurrentAdNetworkTick(); -int PS4_SYSV_ABI sceRtcSetCurrentDebugNetworkTick(); -int PS4_SYSV_ABI sceRtcSetCurrentNetworkTick(); -int PS4_SYSV_ABI sceRtcSetCurrentTick(); -int PS4_SYSV_ABI sceRtcSetDosTime(); -int PS4_SYSV_ABI sceRtcSetTick(); -int PS4_SYSV_ABI sceRtcSetTime_t(); -int PS4_SYSV_ABI sceRtcSetWin32FileTime(); -int PS4_SYSV_ABI sceRtcTickAddDays(); -int PS4_SYSV_ABI sceRtcTickAddHours(); -int PS4_SYSV_ABI sceRtcTickAddMicroseconds(); -int PS4_SYSV_ABI sceRtcTickAddMinutes(); -int PS4_SYSV_ABI sceRtcTickAddMonths(); -int PS4_SYSV_ABI sceRtcTickAddSeconds(); -int PS4_SYSV_ABI sceRtcTickAddTicks(); -int PS4_SYSV_ABI sceRtcTickAddWeeks(); -int PS4_SYSV_ABI sceRtcTickAddYears(); +int PS4_SYSV_ABI sceRtcSetCurrentAdNetworkTick(OrbisRtcTick* pTick); +int PS4_SYSV_ABI sceRtcSetCurrentDebugNetworkTick(OrbisRtcTick* pTick); +int PS4_SYSV_ABI sceRtcSetCurrentNetworkTick(OrbisRtcTick* pTick); +int PS4_SYSV_ABI sceRtcSetCurrentTick(OrbisRtcTick* pTick); +int PS4_SYSV_ABI sceRtcSetDosTime(OrbisRtcDateTime* pTime, u32 dosTime); +int PS4_SYSV_ABI sceRtcSetTick(OrbisRtcDateTime* pTime, OrbisRtcTick* pTick); +int PS4_SYSV_ABI sceRtcSetTime_t(OrbisRtcDateTime* pTime, time_t llTime); +int PS4_SYSV_ABI sceRtcSetWin32FileTime(OrbisRtcDateTime* pTime, int64_t ulWin32Time); +int PS4_SYSV_ABI sceRtcTickAddDays(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int32_t lAdd); +int PS4_SYSV_ABI sceRtcTickAddHours(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int32_t lAdd); +int PS4_SYSV_ABI sceRtcTickAddMicroseconds(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, + int64_t lAdd); +int PS4_SYSV_ABI sceRtcTickAddMinutes(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int64_t lAdd); +int PS4_SYSV_ABI sceRtcTickAddMonths(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int32_t lAdd); +int PS4_SYSV_ABI sceRtcTickAddSeconds(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int64_t lAdd); +int PS4_SYSV_ABI sceRtcTickAddTicks(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int64_t lAdd); +int PS4_SYSV_ABI sceRtcTickAddWeeks(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int32_t lAdd); +int PS4_SYSV_ABI sceRtcTickAddYears(OrbisRtcTick* pTick1, OrbisRtcTick* pTick2, int32_t lAdd); void RegisterlibSceRtc(Core::Loader::SymbolsResolver* sym); } // namespace Libraries::Rtc \ No newline at end of file diff --git a/src/core/libraries/rtc/rtc_error.h b/src/core/libraries/rtc/rtc_error.h index 04eecbbdf..3af5a68fd 100644 --- a/src/core/libraries/rtc/rtc_error.h +++ b/src/core/libraries/rtc/rtc_error.h @@ -3,6 +3,7 @@ #pragma once +constexpr int ORBIS_RTC_ERROR_DATETIME_UNINITIALIZED = 0x7FFEF9FE; constexpr int ORBIS_RTC_ERROR_INVALID_PARAMETER = 0x80010602; constexpr int ORBIS_RTC_ERROR_INVALID_TICK_PARAMETER = 0x80010603; constexpr int ORBIS_RTC_ERROR_INVALID_DATE_PARAMETER = 0x80010604; @@ -14,4 +15,18 @@ constexpr int ORBIS_RTC_ERROR_INVALID_DAYS_PARAMETER = 0x80010623; constexpr int ORBIS_RTC_ERROR_INVALID_HOURS_PARAMETER = 0x80010624; constexpr int ORBIS_RTC_ERROR_INVALID_MINUTES_PARAMETER = 0x80010625; constexpr int ORBIS_RTC_ERROR_INVALID_SECONDS_PARAMETER = 0x80010626; -constexpr int ORBIS_RTC_ERROR_INVALID_MILLISECONDS_PARAMETER = 0x80010627; \ No newline at end of file +constexpr int ORBIS_RTC_ERROR_INVALID_MILLISECONDS_PARAMETER = 0x80010627; +constexpr int ORBIS_RTC_ERROR_NOT_INITIALIZED = 0x80B50001; +constexpr int ORBIS_RTC_ERROR_INVALID_POINTER = 0x80B50002; +constexpr int ORBIS_RTC_ERROR_INVALID_VALUE = 0x80B50003; +constexpr int ORBIS_RTC_ERROR_INVALID_ARG = 0x80B50004; +constexpr int ORBIS_RTC_ERROR_NOT_SUPPORTED = 0x80B50005; +constexpr int ORBIS_RTC_ERROR_NO_CLOCK = 0x80B50006; +constexpr int ORBIS_RTC_ERROR_BAD_PARSE = 0x80B50007; +constexpr int ORBIS_RTC_ERROR_INVALID_YEAR = 0x80B50008; +constexpr int ORBIS_RTC_ERROR_INVALID_MONTH = 0x80B50009; +constexpr int ORBIS_RTC_ERROR_INVALID_DAY = 0x80B5000A; +constexpr int ORBIS_RTC_ERROR_INVALID_HOUR = 0x80B5000B; +constexpr int ORBIS_RTC_ERROR_INVALID_MINUTE = 0x80B5000C; +constexpr int ORBIS_RTC_ERROR_INVALID_SECOND = 0x80B5000D; +constexpr int ORBIS_RTC_ERROR_INVALID_MICROSECOND = 0x80B5000E; \ No newline at end of file From 13743b27fc942a89999fce9d359ac645584f7a54 Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Sat, 7 Sep 2024 00:14:51 +0300 Subject: [PATCH 06/11] shader_recompiler: Implement data share append and consume operations (#814) * shader_recompiler: Add more format swap modes * texture_cache: Handle stencil texture reads * emulator: Support loading font library * readme: Add thanks section * shader_recompiler: Constant buffers as integers * shader_recompiler: Typed buffers as integers * shader_recompiler: Separate thread bit scalars * We can assume guest shader never mixes them with normal sgprs. This helps avoid errors where ssa could view an sgpr write dominating a thread bit read, due to how control flow is structurized, even though its not possible in actual control flow * shader_recompiler: Implement data append/consume operations * clang format * buffer_cache: Simplify invalidation scheme * video_core: Remove some invalidation remnants * adjust --- README.md | 14 ++ src/emulator.cpp | 5 +- .../backend/spirv/emit_spirv_atomic.cpp | 16 +++ .../spirv/emit_spirv_context_get_set.cpp | 60 ++++----- .../backend/spirv/emit_spirv_instructions.h | 28 ++-- .../frontend/translate/data_share.cpp | 18 +++ .../frontend/translate/export.cpp | 6 + .../frontend/translate/scalar_alu.cpp | 16 ++- .../frontend/translate/translate.h | 8 +- .../frontend/translate/vector_alu.cpp | 35 +++-- .../frontend/translate/vector_memory.cpp | 33 ++--- src/shader_recompiler/info.h | 3 +- src/shader_recompiler/ir/basic_block.h | 1 + src/shader_recompiler/ir/ir_emitter.cpp | 29 ++-- src/shader_recompiler/ir/ir_emitter.h | 4 +- src/shader_recompiler/ir/microinstruction.cpp | 11 +- src/shader_recompiler/ir/opcodes.inc | 27 ++-- .../ir/passes/resource_tracking_pass.cpp | 97 ++++++++++---- .../ir/passes/ssa_rewrite_pass.cpp | 46 +++++-- src/video_core/amdgpu/liverpool.cpp | 11 ++ src/video_core/amdgpu/pm4_cmds.h | 23 +++- src/video_core/buffer_cache/buffer_cache.cpp | 124 ++++++++++-------- src/video_core/buffer_cache/buffer_cache.h | 9 ++ .../renderer_vulkan/liverpool_to_vk.cpp | 19 ++- .../renderer_vulkan/vk_compute_pipeline.cpp | 63 +++++---- .../renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 10 ++ .../renderer_vulkan/vk_rasterizer.cpp | 15 +++ .../renderer_vulkan/vk_rasterizer.h | 3 + src/video_core/texture_cache/image.h | 1 - src/video_core/texture_cache/image_info.cpp | 2 +- src/video_core/texture_cache/image_view.cpp | 4 + .../texture_cache/texture_cache.cpp | 38 +++--- src/video_core/texture_cache/texture_cache.h | 3 + 34 files changed, 512 insertions(+), 272 deletions(-) diff --git a/README.md b/README.md index db898e565..5cf307e33 100644 --- a/README.md +++ b/README.md @@ -159,6 +159,20 @@ Open a PR and we'll check it :) + +# Special Thanks + +A few noteworthy teams/projects who've helped us along the way are: + +- [**Panda3DS**](https://github.com/wheremyfoodat/Panda3DS): A multiplatform 3DS emulator from our co-author wheremyfoodat. They have been incredibly helpful in understanding and solving problems that came up from natively executing the x64 code of PS4 binaries + +- [**fpPS4**](https://github.com/red-prig/fpPS4): The fpPS4 team has assisted massively with understanding some of the more complex parts of the PS4 operating system and libraries, by helping with reverse engineering work and research. + +- **yuzu**: Our shader compiler has been designed with yuzu's Hades compiler as a blueprint. This allowed us to focus on the challenges of emulating a modern AMD GPU while having a high-quality optimizing shader compiler implementation as a base. + +- [**hydra**](https://github.com/hydra-emu/hydra): A multisystem, multiplatform emulator (chip-8, GB, NES, N64) from Paris. + + # Sister Projects - [**Panda3DS**](https://github.com/wheremyfoodat/Panda3DS): A multiplatform 3DS emulator from our co-author wheremyfoodat. diff --git a/src/emulator.cpp b/src/emulator.cpp index 9d1bb00d9..a469a31ce 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -195,7 +195,7 @@ void Emulator::Run(const std::filesystem::path& file) { } void Emulator::LoadSystemModules(const std::filesystem::path& file) { - constexpr std::array ModulesToLoad{ + constexpr std::array ModulesToLoad{ {{"libSceNgs2.sprx", &Libraries::Ngs2::RegisterlibSceNgs2}, {"libSceFiber.sprx", nullptr}, {"libSceUlt.sprx", nullptr}, @@ -204,7 +204,8 @@ void Emulator::LoadSystemModules(const std::filesystem::path& file) { {"libSceLibcInternal.sprx", &Libraries::LibcInternal::RegisterlibSceLibcInternal}, {"libSceDiscMap.sprx", &Libraries::DiscMap::RegisterlibSceDiscMap}, {"libSceRtc.sprx", &Libraries::Rtc::RegisterlibSceRtc}, - {"libSceJpegEnc.sprx", nullptr}}, + {"libSceJpegEnc.sprx", nullptr}, + {"libSceFont.sprx", nullptr}}, }; std::vector found_modules; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 1d553dc56..a58b2778f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -152,4 +152,20 @@ Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id co return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicExchange); } +Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding) { + auto& buffer = ctx.buffers[binding]; + const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, + ctx.ConstU32(gds_addr)); + const auto [scope, semantics]{AtomicArgs(ctx)}; + return ctx.OpAtomicIIncrement(ctx.U32[1], ptr, scope, semantics); +} + +Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding) { + auto& buffer = ctx.buffers[binding]; + const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, + ctx.ConstU32(gds_addr)); + const auto [scope, semantics]{AtomicArgs(ctx)}; + return ctx.OpAtomicIDecrement(ctx.U32[1], ptr, scope, semantics); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 39a214fa0..64ce532b5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -133,10 +133,6 @@ Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) { return ctx.OpLoad(buffer.data_types->Get(1), ptr); } -Id EmitReadConstBufferU32(EmitContext& ctx, u32 handle, Id index) { - return ctx.OpBitcast(ctx.U32[1], EmitReadConstBuffer(ctx, handle, index)); -} - Id EmitReadStepRate(EmitContext& ctx, int rate_idx) { return ctx.OpLoad( ctx.U32[1], ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]), @@ -222,12 +218,8 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen ctx.OpStore(pointer, ctx.OpBitcast(ctx.F32[1], value)); } -Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - return EmitLoadBufferF32(ctx, inst, handle, address); -} - template -static Id EmitLoadBufferF32xN(EmitContext& ctx, u32 handle, Id address) { +static Id EmitLoadBufferU32xN(EmitContext& ctx, u32 handle, Id address) { auto& buffer = ctx.buffers[handle]; address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); @@ -246,20 +238,20 @@ static Id EmitLoadBufferF32xN(EmitContext& ctx, u32 handle, Id address) { } } -Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { - return EmitLoadBufferF32xN<1>(ctx, handle, address); +Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { + return EmitLoadBufferU32xN<1>(ctx, handle, address); } -Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { - return EmitLoadBufferF32xN<2>(ctx, handle, address); +Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { + return EmitLoadBufferU32xN<2>(ctx, handle, address); } -Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { - return EmitLoadBufferF32xN<3>(ctx, handle, address); +Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { + return EmitLoadBufferU32xN<3>(ctx, handle, address); } -Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { - return EmitLoadBufferF32xN<4>(ctx, handle, address); +Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { + return EmitLoadBufferU32xN<4>(ctx, handle, address); } Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { @@ -275,7 +267,7 @@ Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addr } template -static void EmitStoreBufferF32xN(EmitContext& ctx, u32 handle, Id address, Id value) { +static void EmitStoreBufferU32xN(EmitContext& ctx, u32 handle, Id address, Id value) { auto& buffer = ctx.buffers[handle]; address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); @@ -287,29 +279,25 @@ static void EmitStoreBufferF32xN(EmitContext& ctx, u32 handle, Id address, Id va const Id index_i = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i)); const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index_i); - ctx.OpStore(ptr, ctx.OpCompositeExtract(ctx.F32[1], value, i)); + ctx.OpStore(ptr, ctx.OpCompositeExtract(buffer.data_types->Get(1), value, i)); } } } -void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferF32xN<1>(ctx, handle, address, value); -} - -void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferF32xN<2>(ctx, handle, address, value); -} - -void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferF32xN<3>(ctx, handle, address, value); -} - -void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferF32xN<4>(ctx, handle, address, value); -} - void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferF32xN<1>(ctx, handle, address, value); + EmitStoreBufferU32xN<1>(ctx, handle, address, value); +} + +void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + EmitStoreBufferU32xN<2>(ctx, handle, address, value); +} + +void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + EmitStoreBufferU32xN<3>(ctx, handle, address, value); +} + +void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + EmitStoreBufferU32xN<4>(ctx, handle, address, value); } void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 0cd59175d..e506ced3a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -64,25 +64,16 @@ void EmitGetGotoVariable(EmitContext& ctx); void EmitSetScc(EmitContext& ctx); Id EmitReadConst(EmitContext& ctx); Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index); -Id EmitReadConstBufferU32(EmitContext& ctx, u32 handle, Id index); -Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); @@ -406,12 +397,13 @@ Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); - Id EmitLaneId(EmitContext& ctx); Id EmitWarpId(EmitContext& ctx); Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index); Id EmitReadFirstLane(EmitContext& ctx, Id value); Id EmitReadLane(EmitContext& ctx, Id value, u32 lane); Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane); +Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding); +Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index c0f0fa274..d01c1977a 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -43,6 +43,10 @@ void Translator::EmitDataShare(const GcnInst& inst) { return DS_MIN_U32(inst, false, true); case Opcode::DS_MAX_RTN_U32: return DS_MAX_U32(inst, false, true); + case Opcode::DS_APPEND: + return DS_APPEND(inst); + case Opcode::DS_CONSUME: + return DS_CONSUME(inst); default: LogMissingOpcode(inst); } @@ -192,4 +196,18 @@ void Translator::V_WRITELANE_B32(const GcnInst& inst) { ir.SetVectorReg(dst, ir.WriteLane(old_value, value, lane)); } +void Translator::DS_APPEND(const GcnInst& inst) { + const u32 inst_offset = inst.control.ds.offset0; + const IR::U32 gds_offset = ir.IAdd(ir.GetM0(), ir.Imm32(inst_offset)); + const IR::U32 prev = ir.DataAppend(gds_offset); + SetDst(inst.dst[0], prev); +} + +void Translator::DS_CONSUME(const GcnInst& inst) { + const u32 inst_offset = inst.control.ds.offset0; + const IR::U32 gds_offset = ir.IAdd(ir.GetM0(), ir.Imm32(inst_offset)); + const IR::U32 prev = ir.DataConsume(gds_offset); + SetDst(inst.dst[0], prev); +} + } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp index d4db09a64..18e830f7b 100644 --- a/src/shader_recompiler/frontend/translate/export.cpp +++ b/src/shader_recompiler/frontend/translate/export.cpp @@ -31,6 +31,12 @@ void Translator::EmitExport(const GcnInst& inst) { case MrtSwizzle::Alt: static constexpr std::array AltSwizzle = {2, 1, 0, 3}; return AltSwizzle[comp]; + case MrtSwizzle::Reverse: + static constexpr std::array RevSwizzle = {3, 2, 1, 0}; + return RevSwizzle[comp]; + case MrtSwizzle::ReverseAlt: + static constexpr std::array AltRevSwizzle = {3, 0, 1, 2}; + return AltRevSwizzle[comp]; default: UNREACHABLE(); } diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index af258cd19..adc127f12 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -73,9 +73,13 @@ void Translator::EmitScalarAlu(const GcnInst& inst) { case Opcode::S_SUB_I32: return S_SUB_U32(inst); case Opcode::S_MIN_U32: - return S_MIN_U32(inst); + return S_MIN_U32(false, inst); + case Opcode::S_MIN_I32: + return S_MIN_U32(true, inst); case Opcode::S_MAX_U32: - return S_MAX_U32(inst); + return S_MAX_U32(false, inst); + case Opcode::S_MAX_I32: + return S_MAX_U32(true, inst); case Opcode::S_WQM_B64: break; default: @@ -533,18 +537,18 @@ void Translator::S_ADDC_U32(const GcnInst& inst) { SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), carry)); } -void Translator::S_MAX_U32(const GcnInst& inst) { +void Translator::S_MAX_U32(bool is_signed, const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; - const IR::U32 result = ir.UMax(src0, src1); + const IR::U32 result = ir.IMax(src0, src1, is_signed); SetDst(inst.dst[0], result); ir.SetScc(ir.IEqual(result, src0)); } -void Translator::S_MIN_U32(const GcnInst& inst) { +void Translator::S_MIN_U32(bool is_signed, const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; - const IR::U32 result = ir.UMin(src0, src1); + const IR::U32 result = ir.IMin(src0, src1, is_signed); SetDst(inst.dst[0], result); ir.SetScc(ir.IEqual(result, src0)); } diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index d6887818d..e4be298ea 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -101,8 +101,8 @@ public: void S_ADDC_U32(const GcnInst& inst); void S_MULK_I32(const GcnInst& inst); void S_ADDK_I32(const GcnInst& inst); - void S_MAX_U32(const GcnInst& inst); - void S_MIN_U32(const GcnInst& inst); + void S_MAX_U32(bool is_signed, const GcnInst& inst); + void S_MIN_U32(bool is_signed, const GcnInst& inst); void S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst); // Scalar Memory @@ -173,7 +173,7 @@ public: void V_BCNT_U32_B32(const GcnInst& inst); void V_COS_F32(const GcnInst& inst); void V_MAX3_F32(const GcnInst& inst); - void V_MAX3_U32(const GcnInst& inst); + void V_MAX3_U32(bool is_signed, const GcnInst& inst); void V_CVT_I32_F32(const GcnInst& inst); void V_MIN_I32(const GcnInst& inst); void V_MUL_LO_U32(const GcnInst& inst); @@ -217,6 +217,8 @@ public: void V_READFIRSTLANE_B32(const GcnInst& inst); void V_READLANE_B32(const GcnInst& inst); void V_WRITELANE_B32(const GcnInst& inst); + void DS_APPEND(const GcnInst& inst); + void DS_CONSUME(const GcnInst& inst); void S_BARRIER(); // MIMG diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 2024b7067..b4470ee39 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -227,7 +227,9 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { case Opcode::V_MAX3_F32: return V_MAX3_F32(inst); case Opcode::V_MAX3_U32: - return V_MAX3_U32(inst); + return V_MAX3_U32(false, inst); + case Opcode::V_MAX3_I32: + return V_MAX_U32(true, inst); case Opcode::V_TRUNC_F32: return V_TRUNC_F32(inst); case Opcode::V_CEIL_F32: @@ -831,11 +833,11 @@ void Translator::V_MAX3_F32(const GcnInst& inst) { SetDst(inst.dst[0], ir.FPMax(src0, ir.FPMax(src1, src2))); } -void Translator::V_MAX3_U32(const GcnInst& inst) { +void Translator::V_MAX3_U32(bool is_signed, const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; const IR::U32 src2{GetSrc(inst.src[2])}; - SetDst(inst.dst[0], ir.UMax(src0, ir.UMax(src1, src2))); + SetDst(inst.dst[0], ir.IMax(src0, ir.IMax(src1, src2, is_signed), is_signed)); } void Translator::V_CVT_I32_F32(const GcnInst& inst) { @@ -967,14 +969,29 @@ void Translator::V_FFBL_B32(const GcnInst& inst) { } void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) { - const IR::U32 src0{GetSrc(inst.src[0])}; - const IR::U32 src1{GetSrc(inst.src[1])}; if (!is_low) { - ASSERT(src0.IsImmediate() && src0.U32() == ~0U && src1.IsImmediate() && src1.U32() == 0U); - return; + // v_mbcnt_hi_u32_b32 v2, -1, 0 + if (inst.src[0].field == OperandField::SignedConstIntNeg && inst.src[0].code == 193 && + inst.src[1].field == OperandField::ConstZero) { + return; + } + // v_mbcnt_hi_u32_b32 vX, exec_hi, 0 + if (inst.src[0].field == OperandField::ExecHi && + inst.src[1].field == OperandField::ConstZero) { + return; + } + } else { + // v_mbcnt_lo_u32_b32 v2, -1, vX + // used combined with above to fetch lane id in non-compute stages + if (inst.src[0].field == OperandField::SignedConstIntNeg && inst.src[0].code == 193) { + SetDst(inst.dst[0], ir.LaneId()); + } + // v_mbcnt_lo_u32_b32 v20, exec_lo, vX + // used combined in above for append buffer indexing. + if (inst.src[0].field == OperandField::ExecLo) { + SetDst(inst.dst[0], ir.Imm32(0)); + } } - ASSERT(src0.IsImmediate() && src0.U32() == ~0U); - SetDst(inst.dst[0], ir.LaneId()); } void Translator::V_BFM_B32(const GcnInst& inst) { diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 04b9b50dd..5af283364 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -147,10 +147,6 @@ void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) { void Translator::IMAGE_SAMPLE(const GcnInst& inst) { const auto& mimg = inst.control.mimg; - if (mimg.da) { - LOG_WARNING(Render_Vulkan, "Image instruction declares an array"); - } - IR::VectorReg addr_reg{inst.src[0].code}; IR::VectorReg dest_reg{inst.dst[0].code}; const IR::ScalarReg tsharp_reg{inst.src[2].code * 4}; @@ -388,11 +384,11 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst) const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, info); const IR::VectorReg dst_reg{inst.src[1].code}; if (num_dwords == 1) { - ir.SetVectorReg(dst_reg, IR::F32{value}); + ir.SetVectorReg(dst_reg, IR::U32{value}); return; } for (u32 i = 0; i < num_dwords; i++) { - ir.SetVectorReg(dst_reg + i, IR::F32{ir.CompositeExtract(value, i)}); + ir.SetVectorReg(dst_reg + i, IR::U32{ir.CompositeExtract(value, i)}); } } @@ -456,21 +452,18 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst const IR::VectorReg src_reg{inst.src[1].code}; switch (num_dwords) { case 1: - value = ir.GetVectorReg(src_reg); + value = ir.GetVectorReg(src_reg); break; case 2: - value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), - ir.GetVectorReg(src_reg + 1)); + value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1)); break; case 3: - value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), - ir.GetVectorReg(src_reg + 1), - ir.GetVectorReg(src_reg + 2)); + value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1), + ir.GetVectorReg(src_reg + 2)); break; case 4: - value = ir.CompositeConstruct( - ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1), - ir.GetVectorReg(src_reg + 2), ir.GetVectorReg(src_reg + 3)); + value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1), + ir.GetVectorReg(src_reg + 2), ir.GetVectorReg(src_reg + 3)); break; } const IR::Value handle = @@ -518,6 +511,15 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) { const IR::VectorReg vaddr{inst.src[0].code}; const IR::VectorReg vdata{inst.src[1].code}; const IR::ScalarReg srsrc{inst.src[2].code * 4}; + const IR::Value address = [&] -> IR::Value { + if (mubuf.idxen && mubuf.offen) { + return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1)); + } + if (mubuf.idxen || mubuf.offen) { + return ir.GetVectorReg(vaddr); + } + return {}; + }(); const IR::U32 soffset{GetSrc(inst.src[3])}; ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported"); @@ -527,7 +529,6 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) { info.offset_enable.Assign(mubuf.offen); IR::Value vdata_val = ir.GetVectorReg(vdata); - const IR::U32 address = ir.GetVectorReg(vaddr); const IR::Value handle = ir.CompositeConstruct(ir.GetScalarReg(srsrc), ir.GetScalarReg(srsrc + 1), ir.GetScalarReg(srsrc + 2), ir.GetScalarReg(srsrc + 3)); diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index cdc17304c..0184a7f63 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -37,12 +37,13 @@ struct BufferResource { u32 dword_offset; IR::Type used_types; AmdGpu::Buffer inline_cbuf; + bool is_gds_buffer{}; bool is_instance_data{}; bool is_written{}; bool IsStorage(AmdGpu::Buffer buffer) const noexcept { static constexpr size_t MaxUboSize = 65536; - return buffer.GetSize() > MaxUboSize || is_written; + return buffer.GetSize() > MaxUboSize || is_written || is_gds_buffer; } constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept; diff --git a/src/shader_recompiler/ir/basic_block.h b/src/shader_recompiler/ir/basic_block.h index 1eb11469c..11ae969bc 100644 --- a/src/shader_recompiler/ir/basic_block.h +++ b/src/shader_recompiler/ir/basic_block.h @@ -147,6 +147,7 @@ public: /// Intrusively store the value of a register in the block. std::array ssa_sreg_values; + std::array ssa_sbit_values; std::array ssa_vreg_values; bool has_multiple_predecessors{false}; diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 2be0c1ac6..7e52cfb5f 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -313,21 +313,21 @@ U32 IREmitter::ReadConst(const Value& base, const U32& offset) { return Inst(Opcode::ReadConst, base, offset); } -F32 IREmitter::ReadConstBuffer(const Value& handle, const U32& index) { - return Inst(Opcode::ReadConstBuffer, handle, index); +U32 IREmitter::ReadConstBuffer(const Value& handle, const U32& index) { + return Inst(Opcode::ReadConstBuffer, handle, index); } Value IREmitter::LoadBuffer(int num_dwords, const Value& handle, const Value& address, BufferInstInfo info) { switch (num_dwords) { case 1: - return Inst(Opcode::LoadBufferF32, Flags{info}, handle, address); + return Inst(Opcode::LoadBufferU32, Flags{info}, handle, address); case 2: - return Inst(Opcode::LoadBufferF32x2, Flags{info}, handle, address); + return Inst(Opcode::LoadBufferU32x2, Flags{info}, handle, address); case 3: - return Inst(Opcode::LoadBufferF32x3, Flags{info}, handle, address); + return Inst(Opcode::LoadBufferU32x3, Flags{info}, handle, address); case 4: - return Inst(Opcode::LoadBufferF32x4, Flags{info}, handle, address); + return Inst(Opcode::LoadBufferU32x4, Flags{info}, handle, address); default: UNREACHABLE_MSG("Invalid number of dwords {}", num_dwords); } @@ -341,17 +341,16 @@ void IREmitter::StoreBuffer(int num_dwords, const Value& handle, const Value& ad const Value& data, BufferInstInfo info) { switch (num_dwords) { case 1: - Inst(data.Type() == Type::F32 ? Opcode::StoreBufferF32 : Opcode::StoreBufferU32, - Flags{info}, handle, address, data); + Inst(Opcode::StoreBufferU32, Flags{info}, handle, address, data); break; case 2: - Inst(Opcode::StoreBufferF32x2, Flags{info}, handle, address, data); + Inst(Opcode::StoreBufferU32x2, Flags{info}, handle, address, data); break; case 3: - Inst(Opcode::StoreBufferF32x3, Flags{info}, handle, address, data); + Inst(Opcode::StoreBufferU32x3, Flags{info}, handle, address, data); break; case 4: - Inst(Opcode::StoreBufferF32x4, Flags{info}, handle, address, data); + Inst(Opcode::StoreBufferU32x4, Flags{info}, handle, address, data); break; default: UNREACHABLE_MSG("Invalid number of dwords {}", num_dwords); @@ -410,6 +409,14 @@ void IREmitter::StoreBufferFormat(const Value& handle, const Value& address, con Inst(Opcode::StoreBufferFormatF32, Flags{info}, handle, address, data); } +U32 IREmitter::DataAppend(const U32& counter) { + return Inst(Opcode::DataAppend, counter, Imm32(0)); +} + +U32 IREmitter::DataConsume(const U32& counter) { + return Inst(Opcode::DataConsume, counter, Imm32(0)); +} + U32 IREmitter::LaneId() { return Inst(Opcode::LaneId); } diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 22d524fb3..01e71893c 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -90,7 +90,7 @@ public: [[nodiscard]] U32 SharedAtomicIMax(const U32& address, const U32& data, bool is_signed); [[nodiscard]] U32 ReadConst(const Value& base, const U32& offset); - [[nodiscard]] F32 ReadConstBuffer(const Value& handle, const U32& index); + [[nodiscard]] U32 ReadConstBuffer(const Value& handle, const U32& index); [[nodiscard]] Value LoadBuffer(int num_dwords, const Value& handle, const Value& address, BufferInstInfo info); @@ -120,6 +120,8 @@ public: [[nodiscard]] Value BufferAtomicSwap(const Value& handle, const Value& address, const Value& value, BufferInstInfo info); + [[nodiscard]] U32 DataAppend(const U32& counter); + [[nodiscard]] U32 DataConsume(const U32& counter); [[nodiscard]] U32 LaneId(); [[nodiscard]] U32 WarpId(); [[nodiscard]] U32 QuadShuffle(const U32& value, const U32& index); diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index d6ef49cf7..601c453d9 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -51,12 +51,11 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::Discard: case Opcode::DiscardCond: case Opcode::SetAttribute: - case Opcode::StoreBufferF32: - case Opcode::StoreBufferF32x2: - case Opcode::StoreBufferF32x3: - case Opcode::StoreBufferF32x4: - case Opcode::StoreBufferFormatF32: case Opcode::StoreBufferU32: + case Opcode::StoreBufferU32x2: + case Opcode::StoreBufferU32x3: + case Opcode::StoreBufferU32x4: + case Opcode::StoreBufferFormatF32: case Opcode::BufferAtomicIAdd32: case Opcode::BufferAtomicSMin32: case Opcode::BufferAtomicUMin32: @@ -68,6 +67,8 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::BufferAtomicOr32: case Opcode::BufferAtomicXor32: case Opcode::BufferAtomicSwap32: + case Opcode::DataAppend: + case Opcode::DataConsume: case Opcode::WriteSharedU128: case Opcode::WriteSharedU64: case Opcode::WriteSharedU32: diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 4df8d13d1..4b922d55b 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -17,8 +17,7 @@ OPCODE(DiscardCond, Void, U1, // Constant memory operations OPCODE(ReadConst, U32, U32x2, U32, ) -OPCODE(ReadConstBuffer, F32, Opaque, U32, ) -OPCODE(ReadConstBufferU32, U32, Opaque, U32, ) +OPCODE(ReadConstBuffer, U32, Opaque, U32, ) // Barriers OPCODE(Barrier, Void, ) @@ -77,21 +76,19 @@ OPCODE(UndefU32, U32, OPCODE(UndefU64, U64, ) // Buffer operations -OPCODE(LoadBufferF32, F32, Opaque, Opaque, ) -OPCODE(LoadBufferF32x2, F32x2, Opaque, Opaque, ) -OPCODE(LoadBufferF32x3, F32x3, Opaque, Opaque, ) -OPCODE(LoadBufferF32x4, F32x4, Opaque, Opaque, ) -OPCODE(LoadBufferFormatF32, F32x4, Opaque, Opaque, ) OPCODE(LoadBufferU32, U32, Opaque, Opaque, ) -OPCODE(StoreBufferF32, Void, Opaque, Opaque, F32, ) -OPCODE(StoreBufferF32x2, Void, Opaque, Opaque, F32x2, ) -OPCODE(StoreBufferF32x3, Void, Opaque, Opaque, F32x3, ) -OPCODE(StoreBufferF32x4, Void, Opaque, Opaque, F32x4, ) -OPCODE(StoreBufferFormatF32, Void, Opaque, Opaque, F32x4, ) +OPCODE(LoadBufferU32x2, U32x2, Opaque, Opaque, ) +OPCODE(LoadBufferU32x3, U32x3, Opaque, Opaque, ) +OPCODE(LoadBufferU32x4, U32x4, Opaque, Opaque, ) +OPCODE(LoadBufferFormatF32, F32x4, Opaque, Opaque, ) OPCODE(StoreBufferU32, Void, Opaque, Opaque, U32, ) +OPCODE(StoreBufferU32x2, Void, Opaque, Opaque, U32x2, ) +OPCODE(StoreBufferU32x3, Void, Opaque, Opaque, U32x3, ) +OPCODE(StoreBufferU32x4, Void, Opaque, Opaque, U32x4, ) +OPCODE(StoreBufferFormatF32, Void, Opaque, Opaque, U32x4, ) // Buffer atomic operations -OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 ) +OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32 ) OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32 ) @@ -101,7 +98,7 @@ OPCODE(BufferAtomicDec32, U32, Opaq OPCODE(BufferAtomicAnd32, U32, Opaque, Opaque, U32, ) OPCODE(BufferAtomicOr32, U32, Opaque, Opaque, U32, ) OPCODE(BufferAtomicXor32, U32, Opaque, Opaque, U32, ) -OPCODE(BufferAtomicSwap32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicSwap32, U32, Opaque, Opaque, U32, ) // Vector utility OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) @@ -345,3 +342,5 @@ OPCODE(QuadShuffle, U32, U32, OPCODE(ReadFirstLane, U32, U32, ) OPCODE(ReadLane, U32, U32, U32 ) OPCODE(WriteLane, U32, U32, U32, U32 ) +OPCODE(DataAppend, U32, U32, U32 ) +OPCODE(DataConsume, U32, U32, U32 ) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 025bb98c8..aa5d39ae8 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -3,7 +3,6 @@ #include #include -#include "common/alignment.h" #include "shader_recompiler/info.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/breadth_first_search.h" @@ -42,11 +41,10 @@ bool IsBufferAtomic(const IR::Inst& inst) { bool IsBufferStore(const IR::Inst& inst) { switch (inst.GetOpcode()) { - case IR::Opcode::StoreBufferF32: - case IR::Opcode::StoreBufferF32x2: - case IR::Opcode::StoreBufferF32x3: - case IR::Opcode::StoreBufferF32x4: case IR::Opcode::StoreBufferU32: + case IR::Opcode::StoreBufferU32x2: + case IR::Opcode::StoreBufferU32x3: + case IR::Opcode::StoreBufferU32x4: return true; default: return IsBufferAtomic(inst); @@ -55,25 +53,28 @@ bool IsBufferStore(const IR::Inst& inst) { bool IsBufferInstruction(const IR::Inst& inst) { switch (inst.GetOpcode()) { - case IR::Opcode::LoadBufferF32: - case IR::Opcode::LoadBufferF32x2: - case IR::Opcode::LoadBufferF32x3: - case IR::Opcode::LoadBufferF32x4: case IR::Opcode::LoadBufferU32: + case IR::Opcode::LoadBufferU32x2: + case IR::Opcode::LoadBufferU32x3: + case IR::Opcode::LoadBufferU32x4: case IR::Opcode::ReadConstBuffer: - case IR::Opcode::ReadConstBufferU32: return true; default: return IsBufferStore(inst); } } +bool IsDataRingInstruction(const IR::Inst& inst) { + return inst.GetOpcode() == IR::Opcode::DataAppend || + inst.GetOpcode() == IR::Opcode::DataConsume; +} + bool IsTextureBufferInstruction(const IR::Inst& inst) { return inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32 || inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32; } -static bool UseFP16(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) { +bool UseFP16(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) { switch (num_format) { case AmdGpu::NumberFormat::Float: switch (data_format) { @@ -98,19 +99,15 @@ static bool UseFP16(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_for IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) { switch (inst.GetOpcode()) { - case IR::Opcode::LoadBufferF32: - case IR::Opcode::LoadBufferF32x2: - case IR::Opcode::LoadBufferF32x3: - case IR::Opcode::LoadBufferF32x4: - case IR::Opcode::ReadConstBuffer: - case IR::Opcode::StoreBufferF32: - case IR::Opcode::StoreBufferF32x2: - case IR::Opcode::StoreBufferF32x3: - case IR::Opcode::StoreBufferF32x4: - return IR::Type::F32; case IR::Opcode::LoadBufferU32: - case IR::Opcode::ReadConstBufferU32: + case IR::Opcode::LoadBufferU32x2: + case IR::Opcode::LoadBufferU32x3: + case IR::Opcode::LoadBufferU32x4: case IR::Opcode::StoreBufferU32: + case IR::Opcode::StoreBufferU32x2: + case IR::Opcode::StoreBufferU32x3: + case IR::Opcode::StoreBufferU32x4: + case IR::Opcode::ReadConstBuffer: case IR::Opcode::BufferAtomicIAdd32: case IR::Opcode::BufferAtomicSwap32: return IR::Type::U32; @@ -191,6 +188,10 @@ public: u32 Add(const BufferResource& desc) { const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) { + // Only one GDS binding can exist. + if (desc.is_gds_buffer && existing.is_gds_buffer) { + return true; + } return desc.sgpr_base == existing.sgpr_base && desc.dword_offset == existing.dword_offset && desc.inline_cbuf == existing.inline_cbuf; @@ -399,8 +400,7 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable); // Address of constant buffer reads can be calculated at IR emittion time. - if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer || - inst.GetOpcode() == IR::Opcode::ReadConstBufferU32) { + if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) { return; } @@ -609,6 +609,51 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip } } +void PatchDataRingInstruction(IR::Block& block, IR::Inst& inst, Info& info, + Descriptors& descriptors) { + // Insert gds binding in the shader if it doesn't exist already. + // The buffer is used for append/consume counters. + constexpr static AmdGpu::Buffer GdsSharp{.base_address = 1}; + const u32 binding = descriptors.Add(BufferResource{ + .used_types = IR::Type::U32, + .inline_cbuf = GdsSharp, + .is_gds_buffer = true, + .is_written = true, + }); + + const auto pred = [](const IR::Inst* inst) -> std::optional { + if (inst->GetOpcode() == IR::Opcode::GetUserData) { + return inst; + } + return std::nullopt; + }; + + // Attempt to deduce the GDS address of counter at compile time. + const u32 gds_addr = [&] { + const IR::Value& gds_offset = inst.Arg(0); + if (gds_offset.IsImmediate()) { + // Nothing to do, offset is known. + return gds_offset.U32() & 0xFFFF; + } + const auto result = IR::BreadthFirstSearch(&inst, pred); + ASSERT_MSG(result, "Unable to track M0 source"); + + // M0 must be set by some user data register. + const IR::Inst* prod = gds_offset.InstRecursive(); + const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg()); + u32 m0_val = info.user_data[ud_reg] >> 16; + if (prod->GetOpcode() == IR::Opcode::IAdd32) { + m0_val += prod->Arg(1).U32(); + } + return m0_val & 0xFFFF; + }(); + + // Patch instruction. + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + inst.SetArg(0, ir.Imm32(gds_addr >> 2)); + inst.SetArg(1, ir.Imm32(binding)); +} + void ResourceTrackingPass(IR::Program& program) { // Iterate resource instructions and patch them after finding the sharp. auto& info = program.info; @@ -625,6 +670,10 @@ void ResourceTrackingPass(IR::Program& program) { } if (IsImageInstruction(inst)) { PatchImageInstruction(*block, inst, info, descriptors); + continue; + } + if (IsDataRingInstruction(inst)) { + PatchDataRingInstruction(*block, inst, info, descriptors); } } } diff --git a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp index ea27c64f7..54dce0355 100644 --- a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp @@ -44,8 +44,17 @@ struct GotoVariable : FlagTag { u32 index; }; -using Variant = std::variant; +struct ThreadBitScalar : FlagTag { + ThreadBitScalar() = default; + explicit ThreadBitScalar(IR::ScalarReg sgpr_) : sgpr{sgpr_} {} + + auto operator<=>(const ThreadBitScalar&) const noexcept = default; + + IR::ScalarReg sgpr; +}; + +using Variant = std::variant; using ValueMap = std::unordered_map; struct DefTable { @@ -70,6 +79,13 @@ struct DefTable { goto_vars[variable.index].insert_or_assign(block, value); } + const IR::Value& Def(IR::Block* block, ThreadBitScalar variable) { + return block->ssa_sreg_values[RegIndex(variable.sgpr)]; + } + void SetDef(IR::Block* block, ThreadBitScalar variable, const IR::Value& value) { + block->ssa_sreg_values[RegIndex(variable.sgpr)] = value; + } + const IR::Value& Def(IR::Block* block, SccFlagTag) { return scc_flag[block]; } @@ -173,7 +189,7 @@ public: } template - IR::Value ReadVariable(Type variable, IR::Block* root_block, bool is_thread_bit = false) { + IR::Value ReadVariable(Type variable, IR::Block* root_block) { boost::container::small_vector, 64> stack{ ReadState(nullptr), ReadState(root_block), @@ -201,7 +217,7 @@ public: } else if (!block->IsSsaSealed()) { // Incomplete CFG IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; - phi->SetFlags(is_thread_bit ? IR::Type::U1 : IR::TypeOf(UndefOpcode(variable))); + phi->SetFlags(IR::TypeOf(UndefOpcode(variable))); incomplete_phis[block].insert_or_assign(variable, phi); stack.back().result = IR::Value{&*phi}; @@ -214,7 +230,7 @@ public: } else { // Break potential cycles with operandless phi IR::Inst* const phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; - phi->SetFlags(is_thread_bit ? IR::Type::U1 : IR::TypeOf(UndefOpcode(variable))); + phi->SetFlags(IR::TypeOf(UndefOpcode(variable))); WriteVariable(variable, block, IR::Value{phi}); @@ -263,9 +279,7 @@ private: template IR::Value AddPhiOperands(Type variable, IR::Inst& phi, IR::Block* block) { for (IR::Block* const imm_pred : block->ImmPredecessors()) { - const bool is_thread_bit = - std::is_same_v && phi.Flags() == IR::Type::U1; - phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred, is_thread_bit)); + phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred)); } return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable)); } @@ -313,7 +327,11 @@ private: void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { const IR::Opcode opcode{inst.GetOpcode()}; switch (opcode) { - case IR::Opcode::SetThreadBitScalarReg: + case IR::Opcode::SetThreadBitScalarReg: { + const IR::ScalarReg reg{inst.Arg(0).ScalarReg()}; + pass.WriteVariable(ThreadBitScalar{reg}, block, inst.Arg(1)); + break; + } case IR::Opcode::SetScalarRegister: { const IR::ScalarReg reg{inst.Arg(0).ScalarReg()}; pass.WriteVariable(reg, block, inst.Arg(1)); @@ -345,11 +363,15 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::SetM0: pass.WriteVariable(M0Tag{}, block, inst.Arg(0)); break; - case IR::Opcode::GetThreadBitScalarReg: + case IR::Opcode::GetThreadBitScalarReg: { + const IR::ScalarReg reg{inst.Arg(0).ScalarReg()}; + const IR::Value value = pass.ReadVariable(ThreadBitScalar{reg}, block); + inst.ReplaceUsesWith(value); + break; + } case IR::Opcode::GetScalarRegister: { const IR::ScalarReg reg{inst.Arg(0).ScalarReg()}; - const bool thread_bit = opcode == IR::Opcode::GetThreadBitScalarReg; - const IR::Value value = pass.ReadVariable(reg, block, thread_bit); + const IR::Value value = pass.ReadVariable(reg, block); inst.ReplaceUsesWith(value); break; } diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index cee30f755..cbc18aa43 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -465,6 +465,14 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); event_eos->SignalFence(); + if (event_eos->command == PM4CmdEventWriteEos::Command::GdsStore) { + ASSERT(event_eos->size == 1); + if (rasterizer) { + rasterizer->Finish(); + const u32 value = rasterizer->ReadDataFromGds(event_eos->gds_index); + *event_eos->Address() = value; + } + } break; } case PM4ItOpcode::EventWriteEop: { @@ -474,6 +482,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); + if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Gds) { + rasterizer->InlineDataToGds(dma_data->dst_addr_lo, dma_data->data); + } break; } case PM4ItOpcode::WriteData: { diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index 58ade221b..fd7980c17 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -350,6 +350,17 @@ struct PM4CmdEventWriteEop { } }; +enum class DmaDataDst : u32 { + Memory = 0, + Gds = 1, +}; + +enum class DmaDataSrc : u32 { + Memory = 0, + Gds = 1, + Data = 2, +}; + struct PM4DmaData { PM4Type3Header header; union { @@ -357,11 +368,11 @@ struct PM4DmaData { BitField<12, 1, u32> src_atc; BitField<13, 2, u32> src_cache_policy; BitField<15, 1, u32> src_volatile; - BitField<20, 2, u32> dst_sel; + BitField<20, 2, DmaDataDst> dst_sel; BitField<24, 1, u32> dst_atc; BitField<25, 2, u32> dst_cache_policy; BitField<27, 1, u32> dst_volatile; - BitField<29, 2, u32> src_sel; + BitField<29, 2, DmaDataSrc> src_sel; BitField<31, 1, u32> cp_sync; }; union { @@ -502,13 +513,17 @@ struct PM4CmdEventWriteEos { } void SignalFence() const { - switch (command.Value()) { + const auto cmd = command.Value(); + switch (cmd) { case Command::SingalFence: { *Address() = DataDWord(); break; } + case Command::GdsStore: { + break; + } default: { - UNREACHABLE(); + UNREACHABLE_MSG("Unknown command {}", u32(cmd)); } } } diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 89032e990..86af05bf1 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -15,8 +15,9 @@ namespace VideoCore { static constexpr size_t NumVertexBuffers = 32; -static constexpr size_t StagingBufferSize = 512_MB; -static constexpr size_t UboStreamBufferSize = 64_MB; +static constexpr size_t GdsBufferSize = 64_KB; +static constexpr size_t StagingBufferSize = 1_GB; +static constexpr size_t UboStreamBufferSize = 128_MB; BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, const AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_, @@ -25,7 +26,10 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s texture_cache{texture_cache_}, tracker{tracker_}, staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize}, stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize}, + gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, GdsBufferSize}, memory_tracker{&tracker} { + Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer"); + // Ensure the first slot is used for the null buffer void(slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, ReadFlags, 1)); } @@ -232,6 +236,27 @@ u32 BufferCache::BindIndexBuffer(bool& is_indexed, u32 index_offset) { return regs.num_indices; } +void BufferCache::InlineDataToGds(u32 gds_offset, u32 value) { + ASSERT_MSG(gds_offset % 4 == 0, "GDS offset must be dword aligned"); + scheduler.EndRendering(); + const auto cmdbuf = scheduler.CommandBuffer(); + const vk::BufferMemoryBarrier2 buf_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, + .buffer = gds_buffer.Handle(), + .offset = gds_offset, + .size = sizeof(u32), + }; + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &buf_barrier, + }); + cmdbuf.updateBuffer(gds_buffer.Handle(), gds_offset, sizeof(u32), &value); +} + std::pair BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written, bool is_texel_buffer) { static constexpr u64 StreamThreshold = CACHING_PAGESIZE; @@ -258,6 +283,7 @@ std::pair BufferCache::ObtainTempBuffer(VAddr gpu_addr, u32 size) if (buffer_id) { Buffer& buffer = slot_buffers[buffer_id]; if (buffer.IsInBounds(gpu_addr, size)) { + SynchronizeBuffer(buffer, gpu_addr, size, false); return {&buffer, buffer.Offset(gpu_addr)}; } } @@ -541,64 +567,48 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, } bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) { - boost::container::small_vector image_ids; - const u32 inv_size = std::min(size, MaxInvalidateDist); - texture_cache.ForEachImageInRegion(device_addr, inv_size, [&](ImageId image_id, Image& image) { - // Only consider GPU modified images, i.e render targets or storage images. - // Also avoid any CPU modified images as the image data is likely to be stale. - if (True(image.flags & ImageFlagBits::CpuModified) || - False(image.flags & ImageFlagBits::GpuModified)) { - return; - } - // Image must fully overlap with the provided buffer range. - if (image.cpu_addr < device_addr || image.cpu_addr_end > device_addr + size) { - return; - } - image_ids.push_back(image_id); - }); - if (image_ids.empty()) { + static constexpr FindFlags find_flags = + FindFlags::NoCreate | FindFlags::RelaxDim | FindFlags::RelaxFmt | FindFlags::RelaxSize; + ImageInfo info{}; + info.guest_address = device_addr; + info.guest_size_bytes = size; + const ImageId image_id = texture_cache.FindImage(info, find_flags); + if (!image_id) { return false; } - // Sort images by modification tick. If there are overlaps we want to - // copy from least to most recently modified. - std::ranges::sort(image_ids, [&](ImageId lhs_id, ImageId rhs_id) { - const Image& lhs = texture_cache.GetImage(lhs_id); - const Image& rhs = texture_cache.GetImage(rhs_id); - return lhs.tick_accessed_last < rhs.tick_accessed_last; - }); - boost::container::small_vector copies; - for (const ImageId image_id : image_ids) { - copies.clear(); - Image& image = texture_cache.GetImage(image_id); - u32 offset = buffer.Offset(image.cpu_addr); - const u32 num_layers = image.info.resources.layers; - for (u32 m = 0; m < image.info.resources.levels; m++) { - const u32 width = std::max(image.info.size.width >> m, 1u); - const u32 height = std::max(image.info.size.height >> m, 1u); - const u32 depth = - image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u; - const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m]; - copies.push_back({ - .bufferOffset = offset, - .bufferRowLength = static_cast(mip_pitch), - .bufferImageHeight = static_cast(mip_height), - .imageSubresource{ - .aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil, - .mipLevel = m, - .baseArrayLayer = 0, - .layerCount = num_layers, - }, - .imageOffset = {0, 0, 0}, - .imageExtent = {width, height, depth}, - }); - offset += mip_ofs * num_layers; - } - scheduler.EndRendering(); - image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead); - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, - copies); + Image& image = texture_cache.GetImage(image_id); + if (image.info.guest_size_bytes > size) { + return false; } + boost::container::small_vector copies; + u32 offset = buffer.Offset(image.cpu_addr); + const u32 num_layers = image.info.resources.layers; + for (u32 m = 0; m < image.info.resources.levels; m++) { + const u32 width = std::max(image.info.size.width >> m, 1u); + const u32 height = std::max(image.info.size.height >> m, 1u); + const u32 depth = + image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u; + const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m]; + copies.push_back({ + .bufferOffset = offset, + .bufferRowLength = static_cast(mip_pitch), + .bufferImageHeight = static_cast(mip_height), + .imageSubresource{ + .aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil, + .mipLevel = m, + .baseArrayLayer = 0, + .layerCount = num_layers, + }, + .imageOffset = {0, 0, 0}, + .imageExtent = {width, height, depth}, + }); + offset += mip_ofs * num_layers; + } + scheduler.EndRendering(); + image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits::eTransferRead); + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, + copies); return true; } diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index b38b00f07..cd6ea28fc 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -57,6 +57,11 @@ public: PageManager& tracker); ~BufferCache(); + /// Returns a pointer to GDS device local buffer. + [[nodiscard]] const Buffer* GetGdsBuffer() const noexcept { + return &gds_buffer; + } + /// Invalidates any buffer in the logical page range. void InvalidateMemory(VAddr device_addr, u64 size); @@ -66,6 +71,9 @@ public: /// Bind host index buffer for the current draw. u32 BindIndexBuffer(bool& is_indexed, u32 index_offset); + /// Writes a value to GDS buffer. + void InlineDataToGds(u32 gds_offset, u32 value); + /// Obtains a buffer for the specified region. [[nodiscard]] std::pair ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written, bool is_texel_buffer = false); @@ -130,6 +138,7 @@ private: PageManager& tracker; StreamBuffer staging_buffer; StreamBuffer stream_buffer; + Buffer gds_buffer; std::mutex mutex; Common::SlotVector slot_buffers; MemoryTracker memory_tracker; diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 40a1124a6..430fb9ed7 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -585,11 +585,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu vk::Format AdjustColorBufferFormat(vk::Format base_format, Liverpool::ColorBuffer::SwapMode comp_swap, bool is_vo_surface) { - ASSERT_MSG(comp_swap == Liverpool::ColorBuffer::SwapMode::Standard || - comp_swap == Liverpool::ColorBuffer::SwapMode::Alternate, - "Unsupported component swap mode {}", static_cast(comp_swap)); - const bool comp_swap_alt = comp_swap == Liverpool::ColorBuffer::SwapMode::Alternate; + const bool comp_swap_reverse = comp_swap == Liverpool::ColorBuffer::SwapMode::StandardReverse; + const bool comp_swap_alt_reverse = + comp_swap == Liverpool::ColorBuffer::SwapMode::AlternateReverse; if (comp_swap_alt) { switch (base_format) { case vk::Format::eR8G8B8A8Unorm: @@ -605,6 +604,18 @@ vk::Format AdjustColorBufferFormat(vk::Format base_format, default: break; } + } else if (comp_swap_reverse) { + switch (base_format) { + case vk::Format::eR8G8B8A8Unorm: + return vk::Format::eA8B8G8R8UnormPack32; + case vk::Format::eR8G8B8A8Srgb: + return is_vo_surface ? vk::Format::eA8B8G8R8UnormPack32 + : vk::Format::eA8B8G8R8SrgbPack32; + default: + break; + } + } else if (comp_swap_alt_reverse) { + return base_format; } else { if (is_vo_surface && base_format == vk::Format::eR8G8B8A8Srgb) { return vk::Format::eR8G8B8A8Unorm; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index b87d3c915..aeae08138 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -109,37 +109,42 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, u32 binding{}; for (const auto& desc : info->buffers) { - const auto vsharp = desc.GetSharp(*info); - const bool is_storage = desc.IsStorage(vsharp); - const VAddr address = vsharp.base_address; - // Most of the time when a metadata is updated with a shader it gets cleared. It means we - // can skip the whole dispatch and update the tracked state instead. Also, it is not - // intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we will - // need its full emulation anyways. For cases of metadata read a warning will be logged. - if (desc.is_written) { - if (texture_cache.TouchMeta(address, true)) { - LOG_TRACE(Render_Vulkan, "Metadata update skipped"); - return false; - } + bool is_storage = true; + if (desc.is_gds_buffer) { + auto* vk_buffer = buffer_cache.GetGdsBuffer(); + buffer_infos.emplace_back(vk_buffer->Handle(), 0, vk_buffer->SizeBytes()); } else { - if (texture_cache.IsMeta(address)) { - LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)"); + const auto vsharp = desc.GetSharp(*info); + is_storage = desc.IsStorage(vsharp); + const VAddr address = vsharp.base_address; + // Most of the time when a metadata is updated with a shader it gets cleared. It means + // we can skip the whole dispatch and update the tracked state instead. Also, it is not + // intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we + // will need its full emulation anyways. For cases of metadata read a warning will be + // logged. + if (desc.is_written) { + if (texture_cache.TouchMeta(address, true)) { + LOG_TRACE(Render_Vulkan, "Metadata update skipped"); + return false; + } + } else { + if (texture_cache.IsMeta(address)) { + LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)"); + } } + const u32 size = vsharp.GetSize(); + const u32 alignment = + is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment(); + const auto [vk_buffer, offset] = + buffer_cache.ObtainBuffer(address, size, desc.is_written); + const u32 offset_aligned = Common::AlignDown(offset, alignment); + const u32 adjust = offset - offset_aligned; + if (adjust != 0) { + ASSERT(adjust % 4 == 0); + push_data.AddOffset(binding, adjust); + } + buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust); } - const u32 size = vsharp.GetSize(); - if (desc.is_written) { - texture_cache.InvalidateMemory(address, size); - } - const u32 alignment = - is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment(); - const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer(address, size, desc.is_written); - const u32 offset_aligned = Common::AlignDown(offset, alignment); - const u32 adjust = offset - offset_aligned; - if (adjust != 0) { - ASSERT(adjust % 4 == 0); - push_data.AddOffset(binding, adjust); - } - buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust); set_writes.push_back({ .dstSet = VK_NULL_HANDLE, .dstBinding = binding++, @@ -188,7 +193,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, buffer_barriers.emplace_back(*barrier); } if (desc.is_written) { - texture_cache.InvalidateMemory(address, size); + texture_cache.MarkWritten(address, size); } } set_writes.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 6ac4dcf14..a548b70a4 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -432,7 +432,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, buffer_barriers.emplace_back(*barrier); } if (desc.is_written) { - texture_cache.InvalidateMemory(address, size); + texture_cache.MarkWritten(address, size); } } set_writes.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 4419b0f81..b4b256bb0 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -298,6 +298,16 @@ bool PipelineCache::RefreshGraphicsKey() { return false; } + static bool TessMissingLogged = false; + if (auto* pgm = regs.ProgramForStage(3); + regs.stage_enable.IsStageEnabled(3) && pgm->Address() != 0) { + if (!TessMissingLogged) { + LOG_WARNING(Render_Vulkan, "Tess pipeline compilation skipped"); + TessMissingLogged = true; + } + return false; + } + std::tie(infos[i], modules[i], key.stage_hashes[i]) = GetProgram(stage, params, binding); } return true; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 9f72d0448..6344315a5 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -175,6 +175,10 @@ u64 Rasterizer::Flush() { return current_tick; } +void Rasterizer::Finish() { + scheduler.Finish(); +} + void Rasterizer::BeginRendering() { const auto& regs = liverpool->regs; RenderState state; @@ -251,6 +255,17 @@ void Rasterizer::BeginRendering() { scheduler.BeginRendering(state); } +void Rasterizer::InlineDataToGds(u32 gds_offset, u32 value) { + buffer_cache.InlineDataToGds(gds_offset, value); +} + +u32 Rasterizer::ReadDataFromGds(u32 gds_offset) { + auto* gds_buf = buffer_cache.GetGdsBuffer(); + u32 value; + std::memcpy(&value, gds_buf->mapped_data.data() + gds_offset, sizeof(u32)); + return value; +} + void Rasterizer::InvalidateMemory(VAddr addr, u64 size) { buffer_cache.InvalidateMemory(addr, size); texture_cache.InvalidateMemory(addr, size); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 43ab4756d..5aa90c5cc 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -41,12 +41,15 @@ public: void ScopeMarkerEnd(); void ScopedMarkerInsert(const std::string_view& str); + void InlineDataToGds(u32 gds_offset, u32 value); + u32 ReadDataFromGds(u32 gsd_offset); void InvalidateMemory(VAddr addr, u64 size); void MapMemory(VAddr addr, u64 size); void UnmapMemory(VAddr addr, u64 size); void CpSync(); u64 Flush(); + void Finish(); private: void BeginRendering(); diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index f932b25a0..1bbb975ba 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -32,7 +32,6 @@ enum ImageFlagBits : u32 { Registered = 1 << 6, ///< True when the image is registered Picked = 1 << 7, ///< Temporary flag to mark the image as picked MetaRegistered = 1 << 8, ///< True when metadata for this surface is known and registered - Deleted = 1 << 9, ///< Indicates that images was marked for deletion once frame is done }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 66fde5c83..7d87fb666 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -205,7 +205,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, bool force_depth /*= false*/) n pixel_format = LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt()); // Override format if image is forced to be a depth target if (force_depth || tiling_mode == AmdGpu::TilingMode::Depth_MacroTiled) { - if (pixel_format == vk::Format::eR32Sfloat) { + if (pixel_format == vk::Format::eR32Sfloat || pixel_format == vk::Format::eR8Unorm) { pixel_format = vk::Format::eD32SfloatS8Uint; } else if (pixel_format == vk::Format::eR16Unorm) { pixel_format = vk::Format::eD16UnormS8Uint; diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index e554bad7e..bb2d90530 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -128,6 +128,10 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info format = image.info.pixel_format; aspect = vk::ImageAspectFlagBits::eDepth; } + if (image.aspect_mask & vk::ImageAspectFlagBits::eStencil && format == vk::Format::eR8Unorm) { + format = image.info.pixel_format; + aspect = vk::ImageAspectFlagBits::eStencil; + } const vk::ImageViewCreateInfo image_view_ci = { .pNext = usage_override ? &usage_ci : nullptr, diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 996fcad04..37bb5da14 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -40,17 +40,27 @@ TextureCache::~TextureCache() = default; void TextureCache::InvalidateMemory(VAddr address, size_t size) { std::scoped_lock lock{mutex}; ForEachImageInRegion(address, size, [&](ImageId image_id, Image& image) { - const size_t image_dist = - image.cpu_addr > address ? image.cpu_addr - address : address - image.cpu_addr; - if (image_dist < MaxInvalidateDist) { - // Ensure image is reuploaded when accessed again. - image.flags |= ImageFlagBits::CpuModified; - } + // Ensure image is reuploaded when accessed again. + image.flags |= ImageFlagBits::CpuModified; // Untrack image, so the range is unprotected and the guest can write freely. UntrackImage(image_id); }); } +void TextureCache::MarkWritten(VAddr address, size_t max_size) { + static constexpr FindFlags find_flags = + FindFlags::NoCreate | FindFlags::RelaxDim | FindFlags::RelaxFmt | FindFlags::RelaxSize; + ImageInfo info{}; + info.guest_address = address; + info.guest_size_bytes = max_size; + const ImageId image_id = FindImage(info, find_flags); + if (!image_id) { + return; + } + // Ensure image is copied when accessed again. + slot_images[image_id].flags |= ImageFlagBits::CpuModified; +} + void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) { std::scoped_lock lk{mutex}; @@ -199,10 +209,14 @@ ImageId TextureCache::FindImage(const ImageInfo& info, FindFlags flags) { !IsVulkanFormatCompatible(info.pixel_format, cache_image.info.pixel_format)) { continue; } - ASSERT(cache_image.info.type == info.type); + ASSERT(cache_image.info.type == info.type || True(flags & FindFlags::RelaxFmt)); image_id = cache_id; } + if (True(flags & FindFlags::NoCreate) && !image_id) { + return {}; + } + // Try to resolve overlaps (if any) if (!image_id) { for (const auto& cache_id : image_ids) { @@ -211,10 +225,6 @@ ImageId TextureCache::FindImage(const ImageInfo& info, FindFlags flags) { } } - if (True(flags & FindFlags::NoCreate) && !image_id) { - return {}; - } - // Create and register a new image if (!image_id) { image_id = slot_images.insert(instance, scheduler, info); @@ -251,9 +261,6 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo ImageView& TextureCache::FindTexture(const ImageInfo& info, const ImageViewInfo& view_info) { const ImageId image_id = FindImage(info); Image& image = slot_images[image_id]; - if (view_info.is_storage) { - image.flags |= ImageFlagBits::GpuModified; - } UpdateImage(image_id); auto& usage = image.info.usage; @@ -351,7 +358,6 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule if (False(image.flags & ImageFlagBits::CpuModified)) { return; } - // Mark image as validated. image.flags &= ~ImageFlagBits::CpuModified; @@ -485,8 +491,6 @@ void TextureCache::DeleteImage(ImageId image_id) { ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); - image.flags |= ImageFlagBits::Deleted; - // Remove any registered meta areas. const auto& meta_info = image.info.meta_info; if (meta_info.cmask_addr) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 44bc2b431..cc19ac4a8 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -50,6 +50,9 @@ public: /// Invalidates any image in the logical page range. void InvalidateMemory(VAddr address, size_t size); + /// Marks an image as dirty if it exists at the provided address. + void MarkWritten(VAddr address, size_t max_size); + /// Evicts any images that overlap the unmapped range. void UnmapMemory(VAddr cpu_addr, size_t size); From 16363ac692e9d5c01b079c7d9f7cc15c6c8a04af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=A5IGA?= <164882787+Xphalnos@users.noreply.github.com> Date: Fri, 6 Sep 2024 23:26:56 +0200 Subject: [PATCH 07/11] Adding Refresh icon to toolbar (#816) --- src/images/play_icon.png | Bin 2470 -> 1150 bytes src/qt_gui/main_window.cpp | 3 +++ src/qt_gui/main_window_ui.h | 5 +++++ 3 files changed, 8 insertions(+) diff --git a/src/images/play_icon.png b/src/images/play_icon.png index c67831a1e47bae69506b469b6fd2c2ca9a5f1c83..2815be39d8594b213fad22e68c7e8ac16872d639 100644 GIT binary patch literal 1150 zcmV-^1cCdBP)hhkOu0@KGNW1f!&&u~0;WsPNL|!LFt-qchILs;B!!IHF$}y9 zJel{6l!0|qlu2?;z8oQMqkUjb{|d|!p7-9iE{ z0~aI3oB|e6ak3KPm8(rx5(B$OZmuDFXjIN=V*9Y8S}% z1Aj$`xdXfhJWh$QA|x=|FLX6p&rcdZ`XUVXnO%6S2?_M42>f3I>nS)^g#^|BU!(~9 z%P1(;h13TAA0niT`oRA*1)~T;>I46u5n}EZE$|mfNMM!^{38)!e)56eEv%WQh(ZF3 zec*4kurJ)#s~D!pLISIOWlU_~e*uG(K}cW__&&1g@+dGLLzGEKs84pW;#OqfcQI)O z1}K}5K%ale!x3VBwh;KsDkQMbzm{SG|5acMIZ<{YffrNE_)q7N>aU8BFyr3}d>5JO zKh((;%PI+}pYi_|A;zU)IzknN)CT^;5i|aG=m1p~5_rZJgO9hcAodg)BOwo2<+o2X zlkzSZBO#CSMw1${OJt0MG-i8(kg=>&`4ZBYX-)o1GTnG2A@M@oM?hvsNMm;Bm1T#W zO?HGSs2lt2b!n7L(JPCniDBmZR;nzdFEVxVRwpj1ib8r*Bo{yG)WuavNLPyVyQ?ui zsPudL(E^nbw1jH}6*(B8M`b{E7*wI04OXj^)CP-HY;A*C+Kym@UTxU1L6^3)*-4$O z(WXxuxvYL|i?)$moUO(IHnP3FYOP@-{obYKGyg~JnT(+ZX2wv9KVzr~rZLne));D} zYz(y;H-?(=8$-uc7-LVuOkD@M*`8Gw3q+HOUVEJ&aNp0(h z?-3Gn2-t*<&71n+D-&%Rzxs{`u3$|}fg@8Cp`%yZj5{jzObQV?oH#?eeUICuAYVVe z1YXMWg2aEU@JDhd9dQ4d^;WB8WhSu6Paa(_!q=>I@DxG_A%qY@2*JPn1NF_gp-pF8FWQhbW?9;ba!ELWdL_~cP?peYja~^aAhuUa%Y?FJQ@H12{TDV zK~#90?VVe!T~!r^zrTQTQ3O%Bc>z&qv9tjTf=F$nC_*t&f%qUL8U+;th$d8DEQy$+ zV5||7Vxr;lU_ddkK{q(Z{o8HlS7kz4wf{9Bz< z;6z}pq)$px1J^2SAx#Lp^Jf0O3-Ci=d37r^bx?Fk`_sNWsBGCHElm8Z3CPI z+%D;emapAQ2K5{~jdaVtSkmhTltV8V$NOmy&j+zDw-c`9M+=wph}uvL-j;428e|wx6_pI&cVZYanE6 z;51Sb_K~c~jW9zaaEZfTsc>djeNfP1qZ=CO3i%oxlLLpSQi3^gR5FVAwoh zwWQN4mq~#k6}ZOs{kD$+P5}NI2-ykv4zNPfJF+G>A`G>_HMTDY_9Hc68v-GRllGh! zN!m7RazoBY2n=BRiK+>k1FQ{%)aHUUl8(%o+b}aS0@v7H3Csi*0RIVu>;qgY=`u;L z&zjs2GI9a~*nZCTxxjSbx?tGxz&(;q?vulFBn7UqeJ^k%a1!wMK*-L(65s|&Q#bd| zU0|dI25{nHQZ+}-1+z&_*ut(mJV##O8rzRn=7PDvJ%NyIN#=sPB+bp5+yLo}z%{mS zAgxuO4LlnN*%!F3a(M2NGq*v|Ie`IeueZIBWG=WV5YkzPXD0=&vAr62H_2S^&p^m- zBy+))lJ?4&T5EJ#VEA7XR+|f=P1w4RC5+d_)K zo51#sq;>76!*g%on#x@8hOEg2DFSbX%3QFR^gR5_U|4M~7`Vf;2pq)reYQV9a(G4t zyVVZQ<&yT#n%pKR0&gMX9G({fkuibq0qz*o_^2ZA1hDcXLuHY zCxPu1jF}4}4$rrd9Gk17IB65G#I=7Ix&>jNRRxu6w?XA!s|Z2um3AIahQSRmxJ zB!}m(B<+_guOe`ROwi#uhcSm|z!|VTetgs)0wMMIs9Pi*)?_Y4;EYt`qoxDr z6B{3O0Pt%`=QNc=5jZoppR)aB(&U0$0wGg?uS)uO6W1>Sw}kE68Jk=X86R~*6W1*Q zcbvwK)hTQmNe7aaH)jPyE^FetrGhgfX$MK?k;XSh#N7aV!}gDxxPGbNj7T~fxB%EI z7_u5T*>>c0n-HafGazX;iz@@;Mwj*OFCPfi=5=fd& z+7XJ*`?!~MJB|*HnKb1VJpm-`ENKbp`!gc$bEKEurnf0BlnUNrBppwB^oh>gT1l$f z-{#BeZ#y~yIXmSRy%8ksBxy0Qf|$6^lU`PvIZWb0so<9&=~&?VKy-Ti zZKU<%$T0-1Q!2PZ(w>s80DewP+$Tv*)FHzvE|dyJ($>JKz}JE3`@PFZ-?fpWfCfsh z1+F&XmjKZ-m>vR7vmHImYvAiU$h;0GjL?+oWPRikg9gyV94)C z+j)_9dquGDgI$ zos6@KxQU^x;BlMq=Kwnd!dV~i^-SO4=vKn26<`ymK zHINNwcU`cyO`U%O&7!kMLgg>~5 z8)G09T#dQ^G;kgeJuYq;>AN;^?qH#!b%7<#BpKKa35GljEa=iEe4Dg(iyr4>yp@=^ zbIXZSk7Dr-=%Fd^3N44EPlA-Xd=MG$HV6(s$~{xCEGH`$x7T2RRlR zX3_%N-T-`z7mK_OIMVj~zS&fV4K-C*lD1-OZsR)JFEsMOg-Owvz=h#tXe>L0JQRUD kP7%1{6oETV5xC?07l%~%TYdMGPXGV_07*qoM6N<$f(uQ0mjD0& diff --git a/src/qt_gui/main_window.cpp b/src/qt_gui/main_window.cpp index 93969100d..bd2f097ea 100644 --- a/src/qt_gui/main_window.cpp +++ b/src/qt_gui/main_window.cpp @@ -88,6 +88,7 @@ void MainWindow::AddUiWidgets() { ui->toolBar->addWidget(ui->playButton); ui->toolBar->addWidget(ui->pauseButton); ui->toolBar->addWidget(ui->stopButton); + ui->toolBar->addWidget(ui->refreshButton); ui->toolBar->addWidget(ui->settingsButton); ui->toolBar->addWidget(ui->controllerButton); QFrame* line = new QFrame(this); @@ -177,6 +178,7 @@ void MainWindow::CreateConnects() { connect(ui->mw_searchbar, &QLineEdit::textChanged, this, &MainWindow::SearchGameTable); connect(ui->exitAct, &QAction::triggered, this, &QWidget::close); connect(ui->refreshGameListAct, &QAction::triggered, this, &MainWindow::RefreshGameTable); + connect(ui->refreshButton, &QPushButton::clicked, this, &MainWindow::RefreshGameTable); connect(ui->showGameListAct, &QAction::triggered, this, &MainWindow::ShowGameList); connect(this, &MainWindow::ExtractionFinished, this, &MainWindow::RefreshGameTable); @@ -852,6 +854,7 @@ void MainWindow::SetUiIcons(bool isWhite) { ui->playButton->setIcon(RecolorIcon(ui->playButton->icon(), isWhite)); ui->pauseButton->setIcon(RecolorIcon(ui->pauseButton->icon(), isWhite)); ui->stopButton->setIcon(RecolorIcon(ui->stopButton->icon(), isWhite)); + ui->refreshButton->setIcon(RecolorIcon(ui->refreshButton->icon(), isWhite)); ui->settingsButton->setIcon(RecolorIcon(ui->settingsButton->icon(), isWhite)); ui->controllerButton->setIcon(RecolorIcon(ui->controllerButton->icon(), isWhite)); ui->refreshGameListAct->setIcon(RecolorIcon(ui->refreshGameListAct->icon(), isWhite)); diff --git a/src/qt_gui/main_window_ui.h b/src/qt_gui/main_window_ui.h index 0acfade0e..8ae5965f8 100644 --- a/src/qt_gui/main_window_ui.h +++ b/src/qt_gui/main_window_ui.h @@ -38,6 +38,7 @@ public: QPushButton* playButton; QPushButton* pauseButton; QPushButton* stopButton; + QPushButton* refreshButton; QPushButton* settingsButton; QPushButton* controllerButton; @@ -176,6 +177,10 @@ public: stopButton->setFlat(true); stopButton->setIcon(QIcon(":images/stop_icon.png")); stopButton->setIconSize(QSize(40, 40)); + refreshButton = new QPushButton(centralWidget); + refreshButton->setFlat(true); + refreshButton->setIcon(QIcon(":images/refresh_icon.png")); + refreshButton->setIconSize(QSize(32, 32)); settingsButton = new QPushButton(centralWidget); settingsButton->setFlat(true); settingsButton->setIcon(QIcon(":images/settings_icon.png")); From 81f7c830be8b4595ade5aede5de0c4dcf8b2c6c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pl=C3=ADnio=20Larrubia?= Date: Fri, 6 Sep 2024 19:01:55 -0300 Subject: [PATCH 08/11] ci: enable cmake cache on all platforms (SDL/Qt) (#622) - uses actions/cache@v4 and hendrikmuhs/ccache-action@v1.2.14 - Keeps the cache from CMake Cache configuration (Windows, Linux, macOS) - Keeps the cache from CMake build objects (Linux/macOS) - Use ccache for Linux builds - Use sccache for macOS builds - Add hashes to the s/ccache keys - Update cache names with OS-qt/sdl as a prefix - All old caches are invalidated, delete them or wait for cache eviction --- .github/workflows/linux-qt.yml | 23 +++++++++++++++++++++-- .github/workflows/linux.yml | 21 ++++++++++++++++++++- .github/workflows/macos-qt.yml | 23 ++++++++++++++++++++++- .github/workflows/macos.yml | 23 ++++++++++++++++++++++- .github/workflows/windows-qt.yml | 11 +++++++++++ .github/workflows/windows.yml | 11 +++++++++++ 6 files changed, 107 insertions(+), 5 deletions(-) diff --git a/.github/workflows/linux-qt.yml b/.github/workflows/linux-qt.yml index 06e048c0e..6848f203b 100644 --- a/.github/workflows/linux-qt.yml +++ b/.github/workflows/linux-qt.yml @@ -25,8 +25,27 @@ jobs: run: > sudo apt-get update && sudo apt install libx11-dev libxext-dev libwayland-dev libfuse2 clang build-essential qt6-base-dev qt6-tools-dev + - name: Cache CMake dependency source code + uses: actions/cache@v4 + env: + cache-name: ${{ runner.os }}-qt-cache-cmake-dependency-sources + with: + path: | + ${{github.workspace}}/build + key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} + restore-keys: | + ${{ env.cache-name }}- + + - name: Cache CMake dependency build objects + uses: hendrikmuhs/ccache-action@v1.2.14 + env: + cache-name: ${{ runner.os }}-qt-cache-cmake-dependency-builds + with: + append-timestamp: false + key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} + - name: Configure CMake - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DENABLE_QT_GUI=ON + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DENABLE_QT_GUI=ON -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel @@ -44,4 +63,4 @@ jobs: uses: actions/upload-artifact@v4 with: name: shadps4-linux-qt-${{ steps.vars.outputs.date }}-${{ steps.vars.outputs.shorthash }} - path: Shadps4-qt.AppImage \ No newline at end of file + path: Shadps4-qt.AppImage diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index ee1340984..d4402472a 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -25,8 +25,27 @@ jobs: run: > sudo apt-get update && sudo apt install libx11-dev libxext-dev libwayland-dev libfuse2 clang build-essential + - name: Cache CMake dependency source code + uses: actions/cache@v4 + env: + cache-name: ${{ runner.os }}-sdl-cache-cmake-dependency-sources + with: + path: | + ${{github.workspace}}/build + key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} + restore-keys: | + ${{ env.cache-name }}- + + - name: Cache CMake dependency build objects + uses: hendrikmuhs/ccache-action@v1.2.14 + env: + cache-name: ${{ runner.os }}-sdl-cache-cmake-dependency-builds + with: + append-timestamp: false + key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} + - name: Configure CMake - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel diff --git a/.github/workflows/macos-qt.yml b/.github/workflows/macos-qt.yml index f04d3091c..beb927a79 100644 --- a/.github/workflows/macos-qt.yml +++ b/.github/workflows/macos-qt.yml @@ -40,8 +40,29 @@ jobs: arch: clang_64 archives: qtbase qttools + - name: Cache CMake dependency source code + uses: actions/cache@v4 + env: + cache-name: ${{ runner.os }}-qt-cache-cmake-dependency-sources + with: + path: | + ${{github.workspace}}/build + key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} + restore-keys: | + ${{ env.cache-name }}- + + - name: Cache CMake dependency build objects + uses: hendrikmuhs/ccache-action@v1.2.14 + env: + cache-name: ${{runner.os}}-qt-cache-cmake-dependency-builds + with: + append-timestamp: false + create-symlink: true + key: ${{env.cache-name}}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} + variant: sccache + - name: Configure CMake - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DENABLE_QT_GUI=ON + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DENABLE_QT_GUI=ON -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(sysctl -n hw.ncpu) diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 0eb0ad17a..9526c6fd6 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -31,8 +31,29 @@ jobs: arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" arch -x86_64 /usr/local/bin/brew install molten-vk + - name: Cache CMake dependency source code + uses: actions/cache@v4 + env: + cache-name: ${{ runner.os }}-sdl-cache-cmake-dependency-sources + with: + path: | + ${{github.workspace}}/build + key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} + restore-keys: | + ${{ env.cache-name }}- + + - name: Cache CMake dependency build objects + uses: hendrikmuhs/ccache-action@v1.2.14 + env: + cache-name: ${{runner.os}}-sdl-cache-cmake-dependency-builds + with: + append-timestamp: false + create-symlink: true + key: ${{env.cache-name}}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} + variant: sccache + - name: Configure CMake - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(sysctl -n hw.ncpu) diff --git a/.github/workflows/windows-qt.yml b/.github/workflows/windows-qt.yml index 83b1a908b..fee202b5c 100644 --- a/.github/workflows/windows-qt.yml +++ b/.github/workflows/windows-qt.yml @@ -30,6 +30,17 @@ jobs: arch: win64_msvc2019_64 archives: qtbase qttools + - name: Cache CMake dependency source code + uses: actions/cache@v4 + env: + cache-name: ${{ runner.os }}-qt-cache-cmake-dependency-sources + with: + path: | + ${{github.workspace}}/build + key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} + restore-keys: | + ${{ env.cache-name }}- + - name: Configure CMake run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -T ClangCL -DENABLE_QT_GUI=ON diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 413277927..4bea63b16 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -20,6 +20,17 @@ jobs: with: submodules: recursive + - name: Cache CMake dependency source code + uses: actions/cache@v4 + env: + cache-name: ${{ runner.os }}-sdl-cache-cmake-dependency-sources + with: + path: | + ${{github.workspace}}/build + key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} + restore-keys: | + ${{ env.cache-name }}- + - name: Configure CMake run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -T ClangCL From fda5689ddb1d2fb670ce8ceed5c1feb4b1dfc0b8 Mon Sep 17 00:00:00 2001 From: "Daniel R." <47796739+polybiusproxy@users.noreply.github.com> Date: Sat, 7 Sep 2024 11:38:00 +0200 Subject: [PATCH 09/11] core/libraries: reduce log pressure (#829) --- src/core/libraries/kernel/cpu_management.cpp | 2 +- src/core/libraries/kernel/event_flag/event_flag.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/libraries/kernel/cpu_management.cpp b/src/core/libraries/kernel/cpu_management.cpp index 93dc60bd0..3bf609dfe 100644 --- a/src/core/libraries/kernel/cpu_management.cpp +++ b/src/core/libraries/kernel/cpu_management.cpp @@ -8,7 +8,7 @@ namespace Libraries::Kernel { int PS4_SYSV_ABI sceKernelIsNeoMode() { - LOG_INFO(Kernel_Sce, "called"); + LOG_DEBUG(Kernel_Sce, "called"); return Config::isNeoMode(); } diff --git a/src/core/libraries/kernel/event_flag/event_flag.cpp b/src/core/libraries/kernel/event_flag/event_flag.cpp index ec5d6ded2..4d3925127 100644 --- a/src/core/libraries/kernel/event_flag/event_flag.cpp +++ b/src/core/libraries/kernel/event_flag/event_flag.cpp @@ -78,7 +78,7 @@ int PS4_SYSV_ABI sceKernelCloseEventFlag() { return ORBIS_OK; } int PS4_SYSV_ABI sceKernelClearEventFlag(OrbisKernelEventFlag ef, u64 bitPattern) { - LOG_INFO(Kernel_Event, "called"); + LOG_DEBUG(Kernel_Event, "called"); ef->Clear(bitPattern); return ORBIS_OK; } @@ -97,7 +97,7 @@ int PS4_SYSV_ABI sceKernelSetEventFlag(OrbisKernelEventFlag ef, u64 bitPattern) } int PS4_SYSV_ABI sceKernelPollEventFlag(OrbisKernelEventFlag ef, u64 bitPattern, u32 waitMode, u64* pResultPat) { - LOG_INFO(Kernel_Event, "called bitPattern = {:#x} waitMode = {:#x}", bitPattern, waitMode); + LOG_DEBUG(Kernel_Event, "called bitPattern = {:#x} waitMode = {:#x}", bitPattern, waitMode); if (ef == nullptr) { return ORBIS_KERNEL_ERROR_ESRCH; From 749fe92882ff2a03ae13e73c00fe9a1a8ac88f95 Mon Sep 17 00:00:00 2001 From: Blargle Date: Sat, 7 Sep 2024 13:52:48 +0100 Subject: [PATCH 10/11] Small fix for EmitQuadToTriangleListIndices (#831) --- src/video_core/renderer_vulkan/liverpool_to_vk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 430fb9ed7..a97c3dee9 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -660,8 +660,8 @@ void EmitQuadToTriangleListIndices(u8* out_ptr, u32 num_vertices) { *out_data++ = i; *out_data++ = i + 1; *out_data++ = i + 2; - *out_data++ = i + 2; *out_data++ = i; + *out_data++ = i + 2; *out_data++ = i + 3; } } From 047a115b3ea5c989407050465264050b94043a50 Mon Sep 17 00:00:00 2001 From: psucien Date: Sun, 8 Sep 2024 11:12:25 +0200 Subject: [PATCH 11/11] hot-fix: exclude tiling condition from promotion of textures to depth --- src/video_core/texture_cache/image_info.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 7d87fb666..0b0f4278d 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -204,7 +204,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, bool force_depth /*= false*/) n tiling_mode = image.GetTilingMode(); pixel_format = LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt()); // Override format if image is forced to be a depth target - if (force_depth || tiling_mode == AmdGpu::TilingMode::Depth_MacroTiled) { + if (force_depth) { if (pixel_format == vk::Format::eR32Sfloat || pixel_format == vk::Format::eR8Unorm) { pixel_format = vk::Format::eD32SfloatS8Uint; } else if (pixel_format == vk::Format::eR16Unorm) {