From 1b1ad53792e81e858e5f71f0a18a445f111f3b7e Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Sat, 14 Sep 2024 15:53:53 +0300 Subject: [PATCH] work --- CMakeLists.txt | 4 +- src/.vscode/c_cpp_properties.json | 18 ++ src/.vscode/launch.json | 24 +++ src/.vscode/settings.json | 59 +++++++ src/core/libraries/ajm/ajm.cpp | 74 ++++++-- src/core/libraries/ajm/ajm.h | 17 +- src/core/libraries/ajm/ajm_instance.h | 70 ++++++++ src/core/libraries/ajm/ajm_mp3.cpp | 158 ++++++++++++------ src/core/libraries/ajm/ajm_mp3.h | 26 +-- src/core/libraries/avplayer/avplayer.cpp | 2 +- src/core/libraries/gnmdriver/gnmdriver.cpp | 28 +++- src/core/libraries/gnmdriver/gnmdriver.h | 2 +- src/core/libraries/kernel/libkernel.cpp | 1 + .../libraries/kernel/thread_management.cpp | 11 +- src/core/libraries/libs.h | 9 +- src/core/libraries/videoout/driver.cpp | 1 - .../spirv/emit_spirv_context_get_set.cpp | 2 +- .../backend/spirv/emit_spirv_image.cpp | 6 +- .../backend/spirv/spirv_emit_context.cpp | 5 +- .../backend/spirv/spirv_emit_context.h | 1 + .../frontend/control_flow_graph.cpp | 23 ++- .../frontend/translate/translate.h | 2 +- .../frontend/translate/vector_alu.cpp | 16 +- .../frontend/translate/vector_memory.cpp | 1 + .../ir/passes/resource_tracking_pass.cpp | 5 +- .../ir/passes/ssa_rewrite_pass.cpp | 4 +- src/video_core/amdgpu/liverpool.cpp | 14 +- src/video_core/buffer_cache/buffer_cache.cpp | 3 + src/video_core/page_manager.cpp | 2 +- .../renderer_vulkan/liverpool_to_vk.cpp | 2 + .../renderer_vulkan/vk_compute_pipeline.cpp | 8 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- .../renderer_vulkan/vk_instance.cpp | 2 + .../renderer_vulkan/vk_pipeline_cache.cpp | 10 +- src/video_core/texture_cache/image.cpp | 3 + src/video_core/texture_cache/image_view.cpp | 2 +- .../texture_cache/texture_cache.cpp | 2 +- src/video_core/texture_cache/tile_manager.cpp | 2 + 38 files changed, 487 insertions(+), 134 deletions(-) create mode 100644 src/.vscode/c_cpp_properties.json create mode 100644 src/.vscode/launch.json create mode 100644 src/.vscode/settings.json create mode 100644 src/core/libraries/ajm/ajm_instance.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 90496953a..f41545ccf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -667,6 +667,8 @@ if (ENABLE_QT_GUI) ${VIDEO_CORE} ${EMULATOR} src/images/shadPS4.icns + src/core/libraries/ajm/ajm_mp3.h src/core/libraries/ajm/ajm_mp3.cpp + src/core/libraries/ajm/ajm_instance.h ) else() add_executable(shadps4 @@ -688,7 +690,7 @@ endif() create_target_directory_groups(shadps4) -target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API FFmpeg::ffmpeg Dear_ImGui) +target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API FFmpeg::ffmpeg Dear_ImGui libatrac9) target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::SPIRV glslang::glslang SDL3::SDL3) target_compile_definitions(shadps4 PRIVATE IMGUI_USER_CONFIG="imgui/imgui_config.h") diff --git a/src/.vscode/c_cpp_properties.json b/src/.vscode/c_cpp_properties.json new file mode 100644 index 000000000..c2098a2d0 --- /dev/null +++ b/src/.vscode/c_cpp_properties.json @@ -0,0 +1,18 @@ +{ + "configurations": [ + { + "name": "linux-gcc-x64", + "includePath": [ + "${workspaceFolder}/**" + ], + "compilerPath": "/usr/bin/gcc", + "cStandard": "${default}", + "cppStandard": "${default}", + "intelliSenseMode": "linux-gcc-x64", + "compilerArgs": [ + "" + ] + } + ], + "version": 4 +} \ No newline at end of file diff --git a/src/.vscode/launch.json b/src/.vscode/launch.json new file mode 100644 index 000000000..d0452262e --- /dev/null +++ b/src/.vscode/launch.json @@ -0,0 +1,24 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "C/C++ Runner: Debug Session", + "type": "cppdbg", + "request": "launch", + "args": [], + "stopAtEntry": false, + "externalConsole": false, + "cwd": "/home/turtle/Desktop/shadPS4/src", + "program": "/home/turtle/Desktop/shadPS4/src/build/Debug/outDebug", + "MIMode": "gdb", + "miDebuggerPath": "gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ] + } + ] +} \ No newline at end of file diff --git a/src/.vscode/settings.json b/src/.vscode/settings.json new file mode 100644 index 000000000..3e5eb956e --- /dev/null +++ b/src/.vscode/settings.json @@ -0,0 +1,59 @@ +{ + "C_Cpp_Runner.cCompilerPath": "gcc", + "C_Cpp_Runner.cppCompilerPath": "g++", + "C_Cpp_Runner.debuggerPath": "gdb", + "C_Cpp_Runner.cStandard": "", + "C_Cpp_Runner.cppStandard": "", + "C_Cpp_Runner.msvcBatchPath": "", + "C_Cpp_Runner.useMsvc": false, + "C_Cpp_Runner.warnings": [ + "-Wall", + "-Wextra", + "-Wpedantic", + "-Wshadow", + "-Wformat=2", + "-Wcast-align", + "-Wconversion", + "-Wsign-conversion", + "-Wnull-dereference" + ], + "C_Cpp_Runner.msvcWarnings": [ + "/W4", + "/permissive-", + "/w14242", + "/w14287", + "/w14296", + "/w14311", + "/w14826", + "/w44062", + "/w44242", + "/w14905", + "/w14906", + "/w14263", + "/w44265", + "/w14928" + ], + "C_Cpp_Runner.enableWarnings": true, + "C_Cpp_Runner.warningsAsError": false, + "C_Cpp_Runner.compilerArgs": [], + "C_Cpp_Runner.linkerArgs": [], + "C_Cpp_Runner.includePaths": [], + "C_Cpp_Runner.includeSearch": [ + "*", + "**/*" + ], + "C_Cpp_Runner.excludeSearch": [ + "**/build", + "**/build/**", + "**/.*", + "**/.*/**", + "**/.vscode", + "**/.vscode/**" + ], + "C_Cpp_Runner.useAddressSanitizer": false, + "C_Cpp_Runner.useUndefinedSanitizer": false, + "C_Cpp_Runner.useLeakSanitizer": false, + "C_Cpp_Runner.showCompilationTime": false, + "C_Cpp_Runner.useLinkTimeOptimization": false, + "C_Cpp_Runner.msvcSecureNoWarnings": false +} \ No newline at end of file diff --git a/src/core/libraries/ajm/ajm.cpp b/src/core/libraries/ajm/ajm.cpp index 45bfd0dac..5de28a801 100644 --- a/src/core/libraries/ajm/ajm.cpp +++ b/src/core/libraries/ajm/ajm.cpp @@ -8,6 +8,8 @@ #include "common/logging/log.h" #include "core/libraries/ajm/ajm.h" #include "core/libraries/ajm/ajm_error.h" +#include "core/libraries/ajm/ajm_instance.h" +#include "core/libraries/ajm/ajm_mp3.h" #include "core/libraries/error_codes.h" #include "core/libraries/libs.h" @@ -26,10 +28,6 @@ static constexpr u32 AJM_INSTANCE_STATISTICS = 0x80000; static constexpr u32 MaxInstances = 0x2fff; -struct AjmInstance { - AjmCodecType codec_type; -}; - struct AjmDevice { u32 max_prio; u32 min_prio; @@ -37,8 +35,7 @@ struct AjmDevice { u32 release_cursor{MaxInstances - 1}; std::array is_registered{}; std::array free_instances{}; - std::array instances{}; - MP3Decoder mp3dec; + std::array, MaxInstances> instances; bool IsRegistered(AjmCodecType type) const { return is_registered[static_cast(type)]; @@ -66,7 +63,7 @@ int PS4_SYSV_ABI sceAjmBatchErrorDump() { } void* PS4_SYSV_ABI sceAjmBatchJobControlBufferRa(AjmSingleJob* batch_pos, u32 instance, AjmFlags flags, - const u8* in_buffer, u32 in_size, u8* out_buffer, + u8* in_buffer, u32 in_size, u8* out_buffer, u32 out_size, const void* ret_addr) { LOG_INFO(Lib_Ajm, "called instance = {:#x}, flags = {:#x}, cmd = {}, in_size = {:#x}, out_size = {:#x}, ret_addr = {}", instance, flags.raw, magic_enum::enum_name(AjmJobControlFlags(flags.command)), @@ -124,7 +121,7 @@ void* PS4_SYSV_ABI sceAjmBatchJobRunSplitBufferRa(AjmMultiJob* batch_pos, u32 in const AjmBuffer* out_buffers, u64 num_out_buffers, void* sideband_output, u64 sideband_output_size, const void* ret_addr) { - LOG_INFO(Lib_Ajm, "called instance = {}, flags = {:#x}, cmd = {}, sideband_cmd = {} num_input_buffers = {}, num_output_buffers = {}, " + LOG_DEBUG(Lib_Ajm, "called instance = {}, flags = {:#x}, cmd = {}, sideband_cmd = {} num_input_buffers = {}, num_output_buffers = {}, " "ret_addr = {}", instance, flags.raw, magic_enum::enum_name(AjmJobRunFlags(flags.command)), magic_enum::enum_name(AjmJobSidebandFlags(flags.sideband)), num_in_buffers, num_out_buffers, fmt::ptr(ret_addr)); @@ -207,13 +204,18 @@ int PS4_SYSV_ABI sceAjmBatchStartBuffer(u32 context, const u8* batch, u32 batch_ std::memcpy(&header, batch_ptr, sizeof(u64)); const auto& opcode = header.opcode; + const u32 instance = opcode.instance; + const u8* job_ptr = batch_ptr + sizeof(AjmJobHeader) + opcode.is_debug * 16; + if (opcode.is_control) { ASSERT_MSG(!opcode.is_statistic, "Statistic instance is not handled"); const auto command = AjmJobControlFlags(opcode.command_flags); switch (command) { - case AjmJobControlFlags::Reset: + case AjmJobControlFlags::Reset: { LOG_INFO(Lib_Ajm, "Resetting instance {}", opcode.instance); + dev->instances[opcode.instance]->Reset(); break; + } case (AjmJobControlFlags::Initialize | AjmJobControlFlags::Reset): LOG_INFO(Lib_Ajm, "Initializing instance {}", opcode.instance); break; @@ -223,19 +225,55 @@ int PS4_SYSV_ABI sceAjmBatchStartBuffer(u32 context, const u8* batch, u32 batch_ default: break; } + + // Write sideband structures. + const AjmJobBuffer* out_buffer = reinterpret_cast(job_ptr + 24); + auto* result = reinterpret_cast(out_buffer->buffer); + result->result = 0; + result->internal_result = 0; } else { const auto command = AjmJobRunFlags(opcode.command_flags); const auto sideband = AjmJobSidebandFlags(opcode.sideband_flags); - const u8* job_ptr = batch_ptr + sizeof(AjmJobHeader) + opcode.is_debug * 16; const AjmJobBuffer* in_buffer = reinterpret_cast(job_ptr); + const AjmJobBuffer* out_buffer = reinterpret_cast(job_ptr + 24); + job_ptr += 24; + LOG_INFO(Lib_Ajm, "Decode job cmd = {}, sideband = {}, in_addr = {}, in_size = {}", magic_enum::enum_name(command), magic_enum::enum_name(sideband), fmt::ptr(in_buffer->buffer), in_buffer->buf_size); - dev->mp3dec.Decode(in_buffer->buffer, in_buffer->buf_size); + + // Decode as much of the input bitstream as possible. + auto* instance = dev->instances[opcode.instance].get(); + const auto [in_remain, out_remain, num_frames] = + instance->Decode(in_buffer->buffer, in_buffer->buf_size, + out_buffer->buffer, out_buffer->buf_size); + + // Write sideband structures. + auto* sideband_ptr = *reinterpret_cast(job_ptr + 8); + auto* result = reinterpret_cast(sideband_ptr); + result->result = 0; + result->internal_result = 0; + sideband_ptr += sizeof(AjmSidebandResult); + + // Check sideband flags + if (True(sideband & AjmJobSidebandFlags::Stream)) { + auto* stream = reinterpret_cast(sideband_ptr); + stream->input_consumed = in_buffer->buf_size - in_remain; + stream->output_written = out_buffer->buf_size - out_remain; + stream->total_decoded_samples = instance->decoded_samples; + sideband_ptr += sizeof(AjmSidebandStream); + } + if (True(command & AjmJobRunFlags::MultipleFrames)) { + auto* mframe = reinterpret_cast(sideband_ptr); + mframe->num_frames = num_frames; + sideband_ptr += sizeof(AjmSidebandMFrame); + } } batch_ptr += sizeof(AjmJobHeader) + header.job_size; } + static int batch_id = 0; + *out_batch_id = ++batch_id; return ORBIS_OK; } @@ -259,7 +297,7 @@ int PS4_SYSV_ABI sceAjmDecMp3ParseFrame(const u8* buf, u32 stream_size, int pars if ((buf[0] & SYNCWORDH) != SYNCWORDH || (buf[1] & SYNCWORDL) != SYNCWORDL) { return ORBIS_AJM_ERROR_INVALID_PARAMETER; } - return ParseMp3Header(buf, stream_size, parse_ofl, frame); + return AjmMp3Decoder::ParseMp3Header(buf, stream_size, parse_ofl, frame); } int PS4_SYSV_ABI sceAjmFinalize() { @@ -282,7 +320,8 @@ int PS4_SYSV_ABI sceAjmInstanceCodecType() { return ORBIS_OK; } -int PS4_SYSV_ABI sceAjmInstanceCreate(u32 context, AjmCodecType codec_type, AjmInstanceFlags flags, u32* instance) { +int PS4_SYSV_ABI sceAjmInstanceCreate(u32 context, AjmCodecType codec_type, AjmInstanceFlags flags, + u32* out_instance) { if (codec_type >= AjmCodecType::Max) { return ORBIS_AJM_ERROR_INVALID_PARAMETER; } @@ -297,8 +336,12 @@ int PS4_SYSV_ABI sceAjmInstanceCreate(u32 context, AjmCodecType codec_type, AjmI } const u32 index = dev->free_instances[dev->curr_cursor++]; dev->curr_cursor %= MaxInstances; - dev->instances[index].codec_type = codec_type; - *instance = index; + auto instance = std::make_unique(); + instance->index = index; + instance->codec_type = codec_type; + instance->num_channels = flags.channels; + dev->instances[index] = std::move(instance); + *out_instance = index; LOG_INFO(Lib_Ajm, "called codec_type = {}, flags = {:#x}, instance = {}", magic_enum::enum_name(codec_type), flags.raw, index); @@ -315,6 +358,7 @@ int PS4_SYSV_ABI sceAjmInstanceDestroy(u32 context, u32 instance) { dev->free_instances[dev->release_cursor] = instance; dev->release_cursor = next_slot; } + dev->instances[instance].reset(); return ORBIS_OK; } diff --git a/src/core/libraries/ajm/ajm.h b/src/core/libraries/ajm/ajm.h index a78dc0956..c6ab3958c 100644 --- a/src/core/libraries/ajm/ajm.h +++ b/src/core/libraries/ajm/ajm.h @@ -5,7 +5,6 @@ #include "common/types.h" #include "common/enum.h" -#include "core/libraries/ajm/ajm_mp3.h" namespace Core::Loader { class SymbolsResolver; @@ -34,7 +33,7 @@ struct AjmBuffer { struct AjmJobBuffer { u32 props; u32 buf_size; - const u8* buffer; + u8* buffer; }; struct AjmInOutJob { @@ -55,6 +54,7 @@ enum class AjmJobRunFlags : u32 { GetCodecInfo = 1 << 0, MultipleFrames = 1 << 1, }; +DECLARE_ENUM_FLAG_OPERATORS(AjmJobRunFlags) enum class AjmJobSidebandFlags : u32 { GaplessDecode = 1 << 0, @@ -106,14 +106,6 @@ struct AjmMultiJob { }; }; -enum class AjmCodecType : u32 { - Mp3Dec = 0, - At9Dec = 1, - M4aacDec = 2, - Max = 23, -}; -static constexpr u32 NumAjmCodecs = u32(AjmCodecType::Max); - union AjmFlags { u64 raw; struct { @@ -136,10 +128,13 @@ union AjmInstanceFlags { }; }; +struct AjmDecMp3ParseFrame; +enum class AjmCodecType : u32; + int PS4_SYSV_ABI sceAjmBatchCancel(); int PS4_SYSV_ABI sceAjmBatchErrorDump(); void* PS4_SYSV_ABI sceAjmBatchJobControlBufferRa(AjmSingleJob* batch_pos, u32 instance, AjmFlags flags, - const u8* in_buffer, u32 in_size, u8* out_buffer, + u8* in_buffer, u32 in_size, u8* out_buffer, u32 out_size, const void* ret_addr); int PS4_SYSV_ABI sceAjmBatchJobInlineBuffer(); int PS4_SYSV_ABI sceAjmBatchJobRunBufferRa(); diff --git a/src/core/libraries/ajm/ajm_instance.h b/src/core/libraries/ajm/ajm_instance.h new file mode 100644 index 000000000..12e90dd6c --- /dev/null +++ b/src/core/libraries/ajm/ajm_instance.h @@ -0,0 +1,70 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/types.h" + +extern "C" { +struct AVCodec; +struct AVCodecContext; +struct AVCodecParserContext; +} + +namespace Libraries::Ajm { + +enum class AjmCodecType : u32 { + Mp3Dec = 0, + At9Dec = 1, + M4aacDec = 2, + Max = 23, +}; +static constexpr u32 NumAjmCodecs = u32(AjmCodecType::Max); + +enum class AjmFormatEncoding : u32 { + S16 = 0, + S32 = 1, + Float = 2, +}; + +struct AjmSidebandResult { + s32 result; + s32 internal_result; +}; + +struct AjmSidebandMFrame { + u32 num_frames; + u32 reserved; +}; + +struct AjmSidebandStream { + s32 input_consumed; + s32 output_written; + u64 total_decoded_samples; +}; + +struct AjmSidebandFormat { + u32 num_channels; + u32 channel_mask; + u32 sampl_freq; + AjmFormatEncoding sample_encoding; + u32 bitrate; + u32 reserved; +}; + +struct AjmInstance { + AjmCodecType codec_type; + u32 decoded_samples{}; + AjmFormatEncoding fmt{}; + u32 num_channels{}; + + explicit AjmInstance() = default; + virtual ~AjmInstance() = default; + + virtual void Reset() = 0; + + virtual std::tuple Decode(const u8* in_buf, u32 in_size, + u8* out_buf, u32 out_size) = 0; +}; + +} // namespace Libraries::Ajm diff --git a/src/core/libraries/ajm/ajm_mp3.cpp b/src/core/libraries/ajm/ajm_mp3.cpp index 06305ee84..6dd41de69 100644 --- a/src/core/libraries/ajm/ajm_mp3.cpp +++ b/src/core/libraries/ajm/ajm_mp3.cpp @@ -1,8 +1,8 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later + #pragma clang optimize off #include "common/assert.h" -#include "core/libraries/ajm/ajm_error.h" #include "core/libraries/ajm/ajm_mp3.h" extern "C" { @@ -25,8 +25,108 @@ static constexpr std::array, 2> BitrateTable = {{ static constexpr std::array UnkTable = {0x48, 0x90}; -int ParseMp3Header(const u8* buf, u32 stream_size, int parse_ofl, - AjmDecMp3ParseFrame* frame) { +SwrContext* swr_context{}; + +AVFrame* ConvertAudioFrame(AVFrame* frame) { + auto pcm16_frame = av_frame_clone(frame); + pcm16_frame->format = AV_SAMPLE_FMT_S16; + + if (swr_context) { + swr_free(&swr_context); + swr_context = nullptr; + } + AVChannelLayout in_ch_layout = frame->ch_layout; + AVChannelLayout out_ch_layout = pcm16_frame->ch_layout; + swr_alloc_set_opts2(&swr_context, &out_ch_layout, AV_SAMPLE_FMT_S16, frame->sample_rate, + &in_ch_layout, AVSampleFormat(frame->format), frame->sample_rate, 0, + nullptr); + swr_init(swr_context); + const auto res = swr_convert_frame(swr_context, pcm16_frame, frame); + if (res < 0) { + LOG_ERROR(Lib_AvPlayer, "Could not convert to S16: {}", av_err2str(res)); + return nullptr; + } + av_frame_free(&frame); + return pcm16_frame; +} + +AjmMp3Decoder::AjmMp3Decoder() { + codec = avcodec_find_decoder(AV_CODEC_ID_MP3); + ASSERT_MSG(codec, "MP3 codec not found"); + parser = av_parser_init(codec->id); + ASSERT_MSG(parser, "Parser not found"); + AjmMp3Decoder::Reset(); +} + +AjmMp3Decoder::~AjmMp3Decoder() { + avcodec_free_context(&c); + av_free(c); +} + +void AjmMp3Decoder::Reset() { + if (c) { + avcodec_free_context(&c); + av_free(c); + } + c = avcodec_alloc_context3(codec); + ASSERT_MSG(c, "Could not allocate audio codec context"); + int ret = avcodec_open2(c, codec, nullptr); + ASSERT_MSG(ret >= 0, "Could not open codec"); + decoded_samples = 0; + static int filename = 0; + file.close(); + file.open(fmt::format("inst{}_{}.raw", index, ++filename), std::ios::out | std::ios::binary); +} + +std::tuple AjmMp3Decoder::Decode(const u8* buf, u32 in_size, + u8* out_buf, u32 out_size) { + u32 num_frames = 0; + AVPacket* pkt = av_packet_alloc(); + while (in_size > 0 && out_size > 0) { + int ret = av_parser_parse2(parser, c, &pkt->data, &pkt->size, + buf, in_size, + AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0); + ASSERT_MSG(ret >= 0, "Error while parsing {}", ret); + buf += ret; + in_size -= ret; + + if (pkt->size) { + // Send the packet with the compressed data to the decoder + pkt->pts = parser->pts; + pkt->dts = parser->dts; + pkt->flags = (parser->key_frame == 1) ? AV_PKT_FLAG_KEY : 0; + ret = avcodec_send_packet(c, pkt); + ASSERT_MSG(ret >= 0, "Error submitting the packet to the decoder {}", ret); + + // Read all the output frames (in general there may be any number of them + while (ret >= 0) { + AVFrame* frame = av_frame_alloc(); + ret = avcodec_receive_frame(c, frame); + if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { + break; + } else if (ret < 0) { + UNREACHABLE_MSG("Error during decoding"); + } + if (frame->format != AV_SAMPLE_FMT_S16) { + frame = ConvertAudioFrame(frame); + } + const auto size = frame->ch_layout.nb_channels * frame->nb_samples * sizeof(u16); + std::memcpy(out_buf, frame->data[0], size); + file.write((const char*)frame->data[0], size); + out_buf += size; + out_size -= size; + decoded_samples += frame->nb_samples; + num_frames++; + av_frame_free(&frame); + } + } + } + av_packet_free(&pkt); + return std::make_tuple(in_size, out_size, num_frames); +} + +int AjmMp3Decoder::ParseMp3Header(const u8* buf, u32 stream_size, int parse_ofl, + AjmDecMp3ParseFrame* frame) { const u32 unk_idx = buf[1] >> 3 & 1; const s32 version_idx = (buf[1] >> 3 & 3) ^ 2; const s32 sr_idx = buf[2] >> 2 & 3; @@ -46,56 +146,4 @@ int ParseMp3Header(const u8* buf, u32 stream_size, int parse_ofl, return 0; } -MP3Decoder::MP3Decoder() { - codec = avcodec_find_decoder(AV_CODEC_ID_MP3); - ASSERT_MSG(codec, "MP3 codec not found"); - parser = av_parser_init(codec->id); - ASSERT_MSG(parser, "Parser not found"); - c = avcodec_alloc_context3(codec); - ASSERT_MSG(c, "Could not allocate audio codec context"); - int ret = avcodec_open2(c, codec, nullptr); - ASSERT_MSG(ret >= 0, "Could not open codec"); -} - -MP3Decoder::~MP3Decoder() { - avcodec_free_context(&c); - av_free(c); -} - -void MP3Decoder::Decode(const u8* buf, u32 buf_size) { - AVPacket* pkt = av_packet_alloc(); - AVFrame* frame = av_frame_alloc(); - while (buf_size > 0) { - int ret = av_parser_parse2(parser, c, &pkt->data, &pkt->size, - buf, buf_size, - AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0); - ASSERT_MSG(ret >= 0, "Error while parsing {}", ret); - buf += ret; - buf_size -= ret; - - if (pkt->size) { - // Send the packet with the compressed data to the decoder - pkt->pts = parser->pts; - pkt->dts = parser->dts; - pkt->flags = (parser->key_frame == 1) ? AV_PKT_FLAG_KEY : 0; - ret = avcodec_send_packet(c, pkt); - ASSERT_MSG(ret >= 0, "Error submitting the packet to the decoder {}", ret); - - // Read all the output frames (in general there may be any number of them - while (ret >= 0) { - LOG_INFO(Lib_Ajm, "Receive MP3 frame"); - ret = avcodec_receive_frame(c, frame); - if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { - break; - } else if (ret < 0) { - UNREACHABLE_MSG("Error during decoding"); - } - const s32 bps = av_get_bytes_per_sample(c->sample_fmt); - } - } - } - av_frame_free(&frame); - av_packet_free(&pkt); -} - } // namespace Libraries::Ajm diff --git a/src/core/libraries/ajm/ajm_mp3.h b/src/core/libraries/ajm/ajm_mp3.h index abd78f56d..b7a36d5c2 100644 --- a/src/core/libraries/ajm/ajm_mp3.h +++ b/src/core/libraries/ajm/ajm_mp3.h @@ -3,7 +3,9 @@ #pragma once +#include #include "common/types.h" +#include "core/libraries/ajm/ajm_instance.h" extern "C" { struct AVCodec; @@ -55,28 +57,28 @@ struct AjmSidebandDecMp3CodecInfo { u16 reserved[3]; }; -struct AjmSidebandResult { - s32 result; - s32 internal_result; -}; - struct AjmDecMp3GetCodecInfoResult { AjmSidebandResult result; AjmSidebandDecMp3CodecInfo codec_info; }; -struct MP3Decoder { +struct AjmMp3Decoder : public AjmInstance { const AVCodec* codec = nullptr; AVCodecContext* c = nullptr; AVCodecParserContext* parser = nullptr; + u32 index; + std::ofstream file; - explicit MP3Decoder(); - ~MP3Decoder(); + explicit AjmMp3Decoder(); + ~AjmMp3Decoder() override; - void Decode(const u8* in_buf, u32 frame_size); + void Reset() override; + + std::tuple Decode(const u8* in_buf, u32 in_size, + u8* out_buf, u32 out_size) override; + + static int ParseMp3Header(const u8* buf, u32 stream_size, int parse_ofl, + AjmDecMp3ParseFrame* frame); }; -int ParseMp3Header(const u8* buf, u32 stream_size, int parse_ofl, - AjmDecMp3ParseFrame* frame); - } // namespace Libraries::Ajm diff --git a/src/core/libraries/avplayer/avplayer.cpp b/src/core/libraries/avplayer/avplayer.cpp index 23e1e987a..d9006edc9 100644 --- a/src/core/libraries/avplayer/avplayer.cpp +++ b/src/core/libraries/avplayer/avplayer.cpp @@ -309,7 +309,7 @@ void RegisterlibSceAvPlayer(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("XC9wM+xULz8", "libSceAvPlayer", 1, "libSceAvPlayer", 1, 0, sceAvPlayerJumpToTime); LIB_FUNCTION("9y5v+fGN4Wk", "libSceAvPlayer", 1, "libSceAvPlayer", 1, 0, sceAvPlayerPause); LIB_FUNCTION("HD1YKVU26-M", "libSceAvPlayer", 1, "libSceAvPlayer", 1, 0, sceAvPlayerPostInit); - LIB_FUNCTION("agig-iDRrTE", "libSceAvPlayer", 1, "libSceAvPlayer", 1, 0, sceAvPlayerPrintf); + //LIB_FUNCTION("agig-iDRrTE", "libSceAvPlayer", 1, "libSceAvPlayer", 1, 0, sceAvPlayerPrintf); LIB_FUNCTION("w5moABNwnRY", "libSceAvPlayer", 1, "libSceAvPlayer", 1, 0, sceAvPlayerResume); LIB_FUNCTION("k-q+xOxdc3E", "libSceAvPlayer", 1, "libSceAvPlayer", 1, 0, sceAvPlayerSetAvSyncMode); diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 645bcf423..5fb075386 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -1112,9 +1112,31 @@ int PS4_SYSV_ABI sceGnmInsertSetColorMarker() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmInsertSetMarker() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); - return ORBIS_OK; +int PS4_SYSV_ABI sceGnmInsertSetMarker(u32 *param_1,int param_2,char *param_3) { + int iVar1; + int iVar2; + u32 uVar3; + size_t sVar4; + u32 uVar5; + u64 uVar6; + + uVar6 = 0xffffffff; + if ((param_1 != (u32 *)0x0) && (param_3 != (char *)0x0)) { + sVar4 = strlen(param_3); + iVar2 = (int)sVar4; + uVar3 = (iVar2 + 0xcU) >> 3; + uVar5 = (iVar2 + 8U) >> 2; + if (uVar5 + 2 + uVar3 * 2 == param_2) { + iVar1 = uVar5 + uVar3 * 2; + uVar3 = iVar2 + 1; + *param_1 = iVar1 * 0x10000 | 0xc0001000; + param_1[1] = 0x68750003; + memcpy(param_1 + 2,param_3,(ulong)uVar3); + uVar6 = 0; + memset((void *)((long)(param_1 + 2) + (ulong)uVar3),0,(ulong)(iVar1 * 4 - uVar3)); + } + } + return uVar6; } int PS4_SYSV_ABI sceGnmInsertThreadTraceMarker() { diff --git a/src/core/libraries/gnmdriver/gnmdriver.h b/src/core/libraries/gnmdriver/gnmdriver.h index 754d488f8..3089b1d17 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.h +++ b/src/core/libraries/gnmdriver/gnmdriver.h @@ -108,7 +108,7 @@ s32 PS4_SYSV_ABI sceGnmInsertPopMarker(u32* cmdbuf, u32 size); int PS4_SYSV_ABI sceGnmInsertPushColorMarker(); s32 PS4_SYSV_ABI sceGnmInsertPushMarker(u32* cmdbuf, u32 size, const char* marker); int PS4_SYSV_ABI sceGnmInsertSetColorMarker(); -int PS4_SYSV_ABI sceGnmInsertSetMarker(); +int PS4_SYSV_ABI sceGnmInsertSetMarker(u32 *param_1,int param_2,char *param_3); int PS4_SYSV_ABI sceGnmInsertThreadTraceMarker(); s32 PS4_SYSV_ABI sceGnmInsertWaitFlipDone(u32* cmdbuf, u32 size, s32 vo_handle, u32 buf_idx); int PS4_SYSV_ABI sceGnmIsCoredumpValid(); diff --git a/src/core/libraries/kernel/libkernel.cpp b/src/core/libraries/kernel/libkernel.cpp index d56f4dc41..3a3a63125 100644 --- a/src/core/libraries/kernel/libkernel.cpp +++ b/src/core/libraries/kernel/libkernel.cpp @@ -481,6 +481,7 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) { sceLibcHeapGetTraceInfo); LIB_FUNCTION("FxVZqBAA7ks", "libkernel", 1, "libkernel", 1, 1, ps4__write); LIB_FUNCTION("6XG4B33N09g", "libScePosix", 1, "libkernel", 1, 1, sched_yield); + LIB_FUNCTION("6XG4B33N09g", "libkernel", 1, "libkernel", 1, 1, sched_yield); } } // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/thread_management.cpp b/src/core/libraries/kernel/thread_management.cpp index ec8c15afa..d09f47b5e 100644 --- a/src/core/libraries/kernel/thread_management.cpp +++ b/src/core/libraries/kernel/thread_management.cpp @@ -1041,7 +1041,7 @@ int PS4_SYSV_ABI scePthreadCreate(ScePthread* thread, const ScePthreadAttr* attr pthread_attr_setstacksize(&(*attr)->pth_attr, 2_MB); result = pthread_create(&(*thread)->pth, &(*attr)->pth_attr, run_thread, *thread); - LOG_INFO(Kernel_Pthread, "thread create name = {}", (*thread)->name); + //LOG_INFO(Kernel_Pthread, "thread create name = {}", (*thread)->name); switch (result) { case 0: @@ -1069,7 +1069,16 @@ ScePthread PThreadPool::Create(const char* name) { } } +#ifdef _WIN64 auto* ret = new PthreadInternal{}; +#else + // TODO: Linux specific hack + static u8* hint_address = reinterpret_cast(0x7FFFFC000ULL); + auto* ret = reinterpret_cast( + mmap(hint_address, sizeof(PthreadInternal), PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0)); + hint_address += Common::AlignUp(sizeof(PthreadInternal), 4_KB); +#endif ret->is_free = false; ret->is_detached = false; ret->is_almost_done = false; diff --git a/src/core/libraries/libs.h b/src/core/libraries/libs.h index ea928101e..e6502876f 100644 --- a/src/core/libraries/libs.h +++ b/src/core/libraries/libs.h @@ -25,12 +25,13 @@ template { static R PS4_SYSV_ABI wrap(Args... args) { if (std::string_view(name.value) != "scePthreadEqual" && - std::string_view(name.value) != "sceUserServiceGetEvent") { - // LOG_WARNING(Core_Linker, "Function {} called", name.value); + std::string_view(name.value) != "sceUserServiceGetEvent" && + !std::string_view(name.value).contains("scePthreadMutex")) { + //LOG_WARNING(Core_Linker, "Function {} called", name.value); } if constexpr (std::is_same_v || std::is_same_v) { const u32 ret = f(args...); - if (ret != 0 && std::string_view(name.value) != "scePthreadEqual") { + if (ret != 0 && std::string_view(name.value) != "scePthreadEqual" && !std::string_view(name.value).contains("Cond")) { LOG_WARNING(Core_Linker, "Function {} returned {:#x}", name.value, ret); } return ret; @@ -43,7 +44,7 @@ struct wrapper_impl { template constexpr auto wrapper = wrapper_impl::wrap; -// #define W(foo) wrapper<#foo, decltype(&foo), foo> +//#define W(foo) wrapper<#foo, decltype(&foo), foo> #define W(foo) foo #define LIB_FUNCTION(nid, lib, libversion, mod, moduleVersionMajor, moduleVersionMinor, function) \ diff --git a/src/core/libraries/videoout/driver.cpp b/src/core/libraries/videoout/driver.cpp index 27fe773b6..aa5d9a986 100644 --- a/src/core/libraries/videoout/driver.cpp +++ b/src/core/libraries/videoout/driver.cpp @@ -297,7 +297,6 @@ void VideoOutDriver::PresentThread(std::stop_token token) { { // Needs lock here as can be concurrently read by `sceVideoOutGetVblankStatus` - std::unique_lock lock{main_port.vo_mutex}; vblank_status.count++; vblank_status.processTime = Libraries::Kernel::sceKernelGetProcessTime(); vblank_status.tsc = Libraries::Kernel::sceKernelReadTsc(); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 7df62a910..5fed9b4db 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -305,7 +305,7 @@ void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a const Id tex_buffer = ctx.OpLoad(buffer.image_type, buffer.id); const Id coord = ctx.OpIAdd(ctx.U32[1], address, buffer.coord_offset); if (buffer.is_integer) { - value = ctx.OpBitcast(ctx.S32[4], value); + value = ctx.OpBitcast(buffer.result_type, value); } ctx.OpImageWrite(tex_buffer, coord, value); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 530f381d7..26c5c72d6 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -157,8 +157,10 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const ImageOperands operands; operands.AddOffset(ctx, offset); operands.Add(spv::ImageOperandsMask::Lod, lod); - return ctx.OpBitcast( - ctx.F32[4], ctx.OpImageFetch(result_type, image, coords, operands.mask, operands.operands)); + const Id texel = texture.is_storage ? ctx.OpImageRead(result_type, image, coords) + : ctx.OpImageFetch(result_type, image, coords, + operands.mask, operands.operands); + return ctx.OpBitcast(ctx.F32[4], texel); } Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips) { diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 8554f8615..3838d614e 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -365,7 +365,7 @@ void EmitContext::DefineBuffers() { const auto* data_types = True(desc.used_types & IR::Type::F32) ? &F32 : &U32; const Id data_type = (*data_types)[1]; const Id record_array_type{is_storage ? TypeRuntimeArray(data_type) - : TypeArray(data_type, ConstU32(sharp.NumDwords()))}; + : TypeArray(data_type, ConstU32(16384U))}; const Id struct_type{define_struct(record_array_type, desc.is_instance_data)}; const auto storage_class = @@ -501,6 +501,8 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) { return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, false, sampled, format); case AmdGpu::ImageType::Color2DArray: return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, true, false, sampled, format); + case AmdGpu::ImageType::Color2DMsaa: + return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, true, sampled, format); case AmdGpu::ImageType::Color3D: return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, false, false, false, sampled, format); case AmdGpu::ImageType::Cube: @@ -528,6 +530,7 @@ void EmitContext::DefineImagesAndSamplers() { .sampled_type = image_desc.is_storage ? sampled_type : TypeSampledImage(image_type), .pointer_type = pointer_type, .image_type = image_type, + .is_storage = image_desc.is_storage, }); interfaces.push_back(id); ++binding; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 0908b7f82..06faeb13a 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -200,6 +200,7 @@ public: Id sampled_type; Id pointer_type; Id image_type; + bool is_storage; }; struct BufferDefinition { diff --git a/src/shader_recompiler/frontend/control_flow_graph.cpp b/src/shader_recompiler/frontend/control_flow_graph.cpp index 276bd9db0..03017762c 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.cpp +++ b/src/shader_recompiler/frontend/control_flow_graph.cpp @@ -23,7 +23,6 @@ struct Compare { static IR::Condition MakeCondition(const GcnInst& inst) { if (inst.IsCmpx()) { - ASSERT(inst.opcode == Opcode::V_CMPX_NE_U32); return IR::Condition::Execnz; } @@ -99,7 +98,7 @@ void CFG::EmitDivergenceLabels() { // with SAVEEXEC to mask the threads that didn't pass the condition // of initial branch. (inst.opcode == Opcode::S_ANDN2_B64 && inst.dst[0].field == OperandField::ExecLo) || - inst.opcode == Opcode::V_CMPX_NE_U32; + inst.IsCmpx(); }; const auto is_close_scope = [](const GcnInst& inst) { // Closing an EXEC scope can be either a branch instruction @@ -110,6 +109,7 @@ void CFG::EmitDivergenceLabels() { // Those instructions need to be wrapped in the condition as well so allow branch // as end scope instruction. inst.opcode == Opcode::S_CBRANCH_EXECZ || + inst.opcode == Opcode::S_ENDPGM || (inst.opcode == Opcode::S_ANDN2_B64 && inst.dst[0].field == OperandField::ExecLo); }; @@ -127,7 +127,8 @@ void CFG::EmitDivergenceLabels() { s32 curr_begin = -1; for (size_t index = GetIndex(start); index < end_index; index++) { const auto& inst = inst_list[index]; - if (is_close_scope(inst) && curr_begin != -1) { + const bool is_close = is_close_scope(inst); + if (is_close && curr_begin != -1) { // If there are no instructions inside scope don't do anything. if (index - curr_begin == 1) { curr_begin = -1; @@ -138,8 +139,16 @@ void CFG::EmitDivergenceLabels() { const auto& save_inst = inst_list[curr_begin]; const Label label = index_to_pc[curr_begin] + save_inst.length; AddLabel(label); - // Add a label to the close scope instruction as well. - AddLabel(index_to_pc[index]); + // Add a label to the close scope instruction. + // There are 3 cases of when we need to close a scope. + // * Close scope instruction inside the block + // * Close scope instruction at end of the block (cbranch of endpgm) + // * Normal instruction at end of block + // For the last case we must NOT add a label as that would cause + // the last instruction to be separated into its own basic block + if (is_close) { + AddLabel(index_to_pc[index]); + } // Reset scope begin. curr_begin = -1; } @@ -194,14 +203,16 @@ void CFG::LinkBlocks() { const auto end_inst{block.end_inst}; // Handle divergence block inserted here. if (end_inst.opcode == Opcode::S_AND_SAVEEXEC_B64 || - end_inst.opcode == Opcode::S_ANDN2_B64 || end_inst.opcode == Opcode::V_CMPX_NE_U32) { + end_inst.opcode == Opcode::S_ANDN2_B64 || end_inst.IsCmpx()) { // Blocks are stored ordered by address in the set auto next_it = std::next(it); + ASSERT(next_it != blocks.end()); auto* target_block = &(*next_it); ++target_block->num_predecessors; block.branch_true = target_block; auto merge_it = std::next(next_it); + ASSERT(merge_it != blocks.end()); auto* merge_block = &(*merge_it); ++merge_block->num_predecessors; block.branch_false = merge_block; diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 888d3451b..4ca2f73af 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -132,7 +132,7 @@ public: void V_ADD_F32(const GcnInst& inst); void V_CVT_OFF_F32_I4(const GcnInst& inst); void V_MED3_F32(const GcnInst& inst); - void V_MED3_I32(const GcnInst& inst); + void V_MED3_I32(bool is_signed, const GcnInst& inst); void V_FLOOR_F32(const GcnInst& inst); void V_SUB_F32(const GcnInst& inst); void V_RCP_F32(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index b4470ee39..6f8b72cf2 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -188,7 +188,9 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { case Opcode::V_MED3_F32: return V_MED3_F32(inst); case Opcode::V_MED3_I32: - return V_MED3_I32(inst); + return V_MED3_I32(true, inst); + case Opcode::V_MED3_U32: + return V_MED3_I32(false, inst); case Opcode::V_FLOOR_F32: return V_FLOOR_F32(inst); case Opcode::V_SUB_F32: @@ -307,6 +309,10 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_CMP_U32(ConditionOp::TRU, false, true, inst); case Opcode::V_CMPX_LG_I32: return V_CMP_U32(ConditionOp::LG, true, true, inst); + case Opcode::V_CMPX_EQ_I32: + return V_CMP_U32(ConditionOp::EQ, true, true, inst); + case Opcode::V_CMPX_LE_I32: + return V_CMP_U32(ConditionOp::LE, true, true, inst); case Opcode::V_MBCNT_LO_U32_B32: return V_MBCNT_U32_B32(true, inst); @@ -503,12 +509,12 @@ void Translator::V_MED3_F32(const GcnInst& inst) { SetDst(inst.dst[0], ir.FPMax(ir.FPMin(src0, src1), mmx)); } -void Translator::V_MED3_I32(const GcnInst& inst) { +void Translator::V_MED3_I32(bool is_signed, const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; const IR::U32 src2{GetSrc(inst.src[2])}; - const IR::U32 mmx = ir.SMin(ir.SMax(src0, src1), src2); - SetDst(inst.dst[0], ir.SMax(ir.SMin(src0, src1), mmx)); + const IR::U32 mmx = ir.IMin(ir.IMax(src0, src1, is_signed), src2, is_signed); + SetDst(inst.dst[0], ir.IMax(ir.IMin(src0, src1, is_signed), mmx, is_signed)); } void Translator::V_FLOOR_F32(const GcnInst& inst) { @@ -958,6 +964,8 @@ void Translator::V_CMP_CLASS_F32(const GcnInst& inst) { switch (inst.dst[1].field) { case OperandField::VccLo: return ir.SetVcc(value); + case OperandField::ScalarGPR: + return ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), value); default: UNREACHABLE(); } diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index f602e762e..f5e2f551b 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -48,6 +48,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { case Opcode::IMAGE_GET_LOD: return IMAGE_GET_LOD(inst); case Opcode::IMAGE_STORE: + case Opcode::IMAGE_STORE_MIP: return IMAGE_STORE(inst); case Opcode::IMAGE_LOAD_MIP: return IMAGE_LOAD(true, inst); diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 6b2aa8bbf..19a9cca58 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -200,9 +200,10 @@ public: u32 Add(const ImageResource& desc) { const u32 index{Add(image_resources, desc, [&desc](const auto& existing) { return desc.sgpr_base == existing.sgpr_base && - desc.dword_offset == existing.dword_offset && desc.type == existing.type && - desc.is_storage == existing.is_storage; + desc.dword_offset == existing.dword_offset; })}; + auto& image = image_resources[index]; + image.is_storage |= desc.is_storage; return index; } diff --git a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp index 54dce0355..df73c1bc8 100644 --- a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp @@ -80,10 +80,10 @@ struct DefTable { } const IR::Value& Def(IR::Block* block, ThreadBitScalar variable) { - return block->ssa_sreg_values[RegIndex(variable.sgpr)]; + return block->ssa_sbit_values[RegIndex(variable.sgpr)]; } void SetDef(IR::Block* block, ThreadBitScalar variable, const IR::Value& value) { - block->ssa_sreg_values[RegIndex(variable.sgpr)] = value; + block->ssa_sbit_values[RegIndex(variable.sgpr)] = value; } const IR::Value& Def(IR::Block* block, SccFlagTag) { diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index a2bd60f2e..2c04b2f4b 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -46,6 +46,14 @@ Liverpool::~Liverpool() { void Liverpool::Process(std::stop_token stoken) { Common::SetCurrentThreadName("GPU_CommandProcessor"); + for (int i = 0; i < NumTotalQueues; i++) { + GpuQueue& queue = mapped_queues[i]; + std::scoped_lock lk(queue.m_access); + + queue.ccb_buffer.reserve(1024 * 1024); + queue.dcb_buffer.reserve(1024 * 1024); + } + while (!stoken.stop_requested()) { { std::unique_lock lk{submit_mutex}; @@ -97,8 +105,8 @@ void Liverpool::Process(std::stop_token stoken) { std::scoped_lock lock{queue.m_access}; queue.submits.pop(); - --num_submits; std::scoped_lock lock2{submit_mutex}; + --num_submits; submit_cv.notify_all(); } } @@ -462,6 +470,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanAddress(); break; } + case PM4ItOpcode::SetPredication: case PM4ItOpcode::EventWrite: { // const auto* event = reinterpret_cast(header); break; @@ -603,6 +612,9 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { (count - 1) * sizeof(u32)); break; } + case PM4ItOpcode::DmaData: { + break; + } case PM4ItOpcode::DispatchDirect: { const auto* dispatch_direct = reinterpret_cast(header); regs.cs_program.dim_x = dispatch_direct->dim_x; diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 2ed0ddc87..3b92a8e1a 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -581,6 +581,9 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 offset = buffer.Offset(image.cpu_addr); const u32 num_layers = image.info.resources.layers; for (u32 m = 0; m < image.info.resources.levels; m++) { + if (offset >= buffer.SizeBytes()) { + break; + } const u32 width = std::max(image.info.size.width >> m, 1u); const u32 height = std::max(image.info.size.height >> m, 1u); const u32 depth = diff --git a/src/video_core/page_manager.cpp b/src/video_core/page_manager.cpp index d62077b04..1c2b28d5b 100644 --- a/src/video_core/page_manager.cpp +++ b/src/video_core/page_manager.cpp @@ -8,7 +8,7 @@ #include "common/error.h" #include "video_core/page_manager.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" - +#define ENABLE_USERFAULTFD 1 #ifndef _WIN64 #include #include diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index a97c3dee9..b61bc4706 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -203,6 +203,8 @@ vk::SamplerAddressMode ClampMode(AmdGpu::ClampMode mode) { return vk::SamplerAddressMode::eMirrorClampToEdge; case AmdGpu::ClampMode::ClampBorder: return vk::SamplerAddressMode::eClampToBorder; + case AmdGpu::ClampMode::ClampHalfBorder: + return vk::SamplerAddressMode::eClampToBorder; default: UNREACHABLE(); } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 96358bf67..5c6d8a583 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -166,8 +166,8 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, const auto vsharp = desc.GetSharp(*info); vk::BufferView& buffer_view = buffer_views.emplace_back(VK_NULL_HANDLE); const u32 size = vsharp.GetSize(); - if (vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid && size != 0) { - const VAddr address = vsharp.base_address; + const VAddr address = vsharp.base_address; + if (vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid && address != 0 && size != 0) { if (desc.is_written) { if (texture_cache.TouchMeta(address, true)) { LOG_TRACE(Render_Vulkan, "Metadata update skipped"); @@ -182,8 +182,8 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer(address, size, desc.is_written, true); const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3; - ASSERT_MSG(fmt_stride == vsharp.GetStride(), - "Texel buffer stride must match format stride"); + //ASSERT_MSG(fmt_stride == vsharp.GetStride(), + // "Texel buffer stride must match format stride"); const u32 offset_aligned = Common::AlignDown(offset, alignment); const u32 adjust = offset - offset_aligned; if (adjust != 0) { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 2f5209eb2..5c674c2fa 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -452,7 +452,7 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, boost::container::static_vector tsharps; for (const auto& image_desc : stage->images) { const auto tsharp = image_desc.GetSharp(*stage); - if (tsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) { + if (tsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid && tsharp.data_format != 15) { tsharps.emplace_back(tsharp); VideoCore::ImageInfo image_info{tsharp, image_desc.is_depth}; VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage}; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 769a808e1..035830d3e 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -336,6 +336,8 @@ bool Instance::CreateDevice() { .workgroupMemoryExplicitLayout16BitAccess = true, }, vk::PhysicalDeviceRobustness2FeaturesEXT{ + .robustBufferAccess2 = true, + .robustImageAccess2 = true, .nullDescriptor = true, }, vk::PhysicalDeviceSynchronization2Features{ diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index e19467b00..0a5bb081a 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -184,7 +184,15 @@ const ComputePipeline* PipelineCache::GetComputePipeline() { } bool ShouldSkipShader(u64 shader_hash, const char* shader_type) { - static constexpr std::array skip_hashes = {}; + static constexpr std::array skip_hashes = { + 0x35e6d1c958998c1eULL, + // debugbreak called if not skipped + 0xdd2e7072, 0xc5f6bede, + // Unhandled exception at 0x0000000900BD6A88 in shadps4.exe: 0xC0000005: Access violation + // reading location 0xFFFFFFFFFFFFFFFF + //(int 0x41 in game disassembly) if not skipped + 0x1651ce59, 0x9f991c28 // causes above error, disables splash screen. + }; if (std::ranges::contains(skip_hashes, shader_hash)) { LOG_WARNING(Render_Vulkan, "Skipped {} shader hash {:#x}.", shader_type, shader_hash); return true; diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index d494322a9..9e46bf7bd 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -118,6 +118,9 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{info.guest_address}, cpu_addr_end{cpu_addr + info.guest_size_bytes} { mip_hashes.resize(info.resources.levels); + if (info.size.height == 1620) { + printf("bad\n"); + } ASSERT(info.pixel_format != vk::Format::eUndefined); // Here we force `eExtendedUsage` as don't know all image usage cases beforehand. In normal case // the texture cache should re-create the resource with the usage requested diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index f94c1a37b..770404bef 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -149,7 +149,7 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info .baseMipLevel = info.range.base.level, .levelCount = info.range.extent.levels - info.range.base.level, .baseArrayLayer = info.range.base.layer, - .layerCount = info.range.extent.layers - info.range.base.layer, + .layerCount = VK_REMAINING_ARRAY_LAYERS, }, }; image_view = instance.GetDevice().createImageViewUnique(image_view_ci); diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index c4548a790..3219f45b9 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -29,7 +29,7 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& info.UpdateSize(); const ImageId null_id = slot_images.insert(instance, scheduler, info); ASSERT(null_id.index == 0); - slot_images[null_id].flags = ImageFlagBits{}; + slot_images[null_id].flags = ImageFlagBits::Tracked; ImageViewInfo view_info; void(slot_image_views.insert(instance, view_info, slot_images[null_id], null_id)); diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index 7e06291e7..bdbe0a0b9 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -186,10 +186,12 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) { case vk::Format::eB8G8R8A8Unorm: case vk::Format::eR8G8B8A8Unorm: case vk::Format::eR8G8B8A8Uint: + case vk::Format::eR8G8B8A8Snorm: case vk::Format::eR32Sfloat: case vk::Format::eR32Uint: case vk::Format::eR16G16Sfloat: case vk::Format::eR16G16Unorm: + case vk::Format::eR16G16Snorm: case vk::Format::eB10G11R11UfloatPack32: return vk::Format::eR32Uint; case vk::Format::eBc1RgbaSrgbBlock: