diff --git a/CMakeLists.txt b/CMakeLists.txt index 781e93e10..297c412ae 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -616,6 +616,8 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/buffer_cache/word_manager.h src/video_core/renderer_vulkan/liverpool_to_vk.cpp src/video_core/renderer_vulkan/liverpool_to_vk.h + src/video_core/renderer_vulkan/number_utils.cpp + src/video_core/renderer_vulkan/number_utils.h src/video_core/renderer_vulkan/renderer_vulkan.cpp src/video_core/renderer_vulkan/renderer_vulkan.h src/video_core/renderer_vulkan/vk_common.cpp @@ -771,7 +773,7 @@ endif() create_target_directory_groups(shadps4) -target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API FFmpeg::ffmpeg Dear_ImGui gcn) +target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API FFmpeg::ffmpeg Dear_ImGui gcn half) target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::SPIRV glslang::glslang SDL3::SDL3 pugixml::pugixml) target_compile_definitions(shadps4 PRIVATE IMGUI_USER_CONFIG="imgui/imgui_config.h") @@ -794,9 +796,6 @@ if (APPLE) # Replacement for std::chrono::time_zone target_link_libraries(shadps4 PRIVATE date::date-tz) - - # Half float conversions for F16C patches - target_link_libraries(shadps4 PRIVATE half) endif() if (NOT ENABLE_QT_GUI) diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 2f9336c21..9cae34381 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -141,11 +141,11 @@ if (WIN32) target_compile_options(sirit PUBLIC "-Wno-error=unused-command-line-argument") endif() -if (APPLE) - # half - add_library(half INTERFACE) - target_include_directories(half INTERFACE half/include) +# half +add_library(half INTERFACE) +target_include_directories(half INTERFACE half/include) +if (APPLE) # date if (NOT TARGET date::date-tz) option(BUILD_TZ_LIB "" ON) diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index a68ec1e74..55990c0ee 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -4,9 +4,12 @@ #include "common/assert.h" #include "video_core/amdgpu/pixel_format.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" +#include "video_core/renderer_vulkan/number_utils.h" #include +#define INVALID_NUMBER_FORMAT_COMBO LOG_ERROR(Render_Vulkan, "Unsupported number type {} for format {}", number_type, format); + namespace Vulkan::LiverpoolToVK { using DepthBuffer = Liverpool::DepthBuffer; @@ -725,55 +728,287 @@ void EmitQuadToTriangleListIndices(u8* out_ptr, u32 num_vertices) { } } -static constexpr float U8ToUnorm(u8 v) { - static constexpr auto c = 1.0f / 255.0f; - return float(v * c); -} - vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color_buffer) { const auto comp_swap = color_buffer.info.comp_swap.Value(); - ASSERT_MSG(comp_swap == Liverpool::ColorBuffer::SwapMode::Standard || - comp_swap == Liverpool::ColorBuffer::SwapMode::Alternate, - "Unsupported component swap mode {}", static_cast(comp_swap)); - - const bool comp_swap_alt = comp_swap == Liverpool::ColorBuffer::SwapMode::Alternate; + const auto format = color_buffer.info.format.Value(); + const auto number_type = color_buffer.info.number_type.Value(); const auto& c0 = color_buffer.clear_word0; const auto& c1 = color_buffer.clear_word1; const auto num_bits = AmdGpu::NumBits(color_buffer.info.format); vk::ClearColorValue color{}; - switch (color_buffer.info.number_type) { - case AmdGpu::NumberFormat::Snorm: - [[fallthrough]]; - case AmdGpu::NumberFormat::SnormNz: - [[fallthrough]]; - case AmdGpu::NumberFormat::Unorm: - [[fallthrough]]; - case AmdGpu::NumberFormat::Srgb: { - switch (num_bits) { - case 32: { - color.float32 = std::array{ - U8ToUnorm((c0 >> (comp_swap_alt ? 16 : 0)) & 0xff), - U8ToUnorm((c0 >> 8) & 0xff), - U8ToUnorm((c0 >> (comp_swap_alt ? 0 : 16)) & 0xff), - U8ToUnorm((c0 >> 24) & 0xff), - }; + + switch (format) { + case AmdGpu::DataFormat::Format8: + switch (number_type) { + case AmdGpu::NumberFormat::Unorm: + case AmdGpu::NumberFormat::Srgb: // Should we handle gamma correction here? + color.float32[0] = NumberUtils::U8ToUnorm(c0 & 0xff); + break; + break; + case AmdGpu::NumberFormat::Snorm: + color.float32[0] = NumberUtils::S8ToSnorm(c0 & 0xff); + break; + case AmdGpu::NumberFormat::Uint: + case AmdGpu::NumberFormat::Sint: + color.uint32[0] = c0; + break; + default: + INVALID_NUMBER_FORMAT_COMBO; break; } - default: { - LOG_ERROR(Render_Vulkan, "Missing clear color conversion for bits {}", num_bits); + break; + case AmdGpu::DataFormat::Format16: + switch (number_type) { + case AmdGpu::NumberFormat::Unorm: + color.float32[0] = NumberUtils::U16ToUnorm(c0 & 0xffff); + break; + case AmdGpu::NumberFormat::Snorm: + color.float32[0] = NumberUtils::S16ToSnorm(c0 & 0xffff); + break; + case AmdGpu::NumberFormat::Uint: + case AmdGpu::NumberFormat::Sint: + color.uint32[0] = c0; + break; + case AmdGpu::NumberFormat::Float: + color.float32[0] = NumberUtils::Uf16ToF32(c0 & 0xffff); + break; + default: + INVALID_NUMBER_FORMAT_COMBO; break; } + break; + case AmdGpu::DataFormat::Format8_8: + switch (number_type) { + case AmdGpu::NumberFormat::Unorm: + case AmdGpu::NumberFormat::Srgb: // Should we handle gamma correction here? + color.float32[0] = NumberUtils::U8ToUnorm( c0 & 0xff); + color.float32[1] = NumberUtils::U8ToUnorm((c0 >> 8) & 0xff); + break; + case AmdGpu::NumberFormat::Snorm: + color.float32[0] = NumberUtils::S8ToSnorm( c0 & 0xff); + color.float32[1] = NumberUtils::S8ToSnorm((c0 >> 8) & 0xff); + break; + case AmdGpu::NumberFormat::Uint: + case AmdGpu::NumberFormat::Sint: + color.uint32[0] = c0 & 0xff; + color.uint32[1] = (c0 >> 8) & 0xff; + break; + default: + INVALID_NUMBER_FORMAT_COMBO; + break; + } + break; + case AmdGpu::DataFormat::Format32: + switch (number_type) { + case AmdGpu::NumberFormat::Uint: + case AmdGpu::NumberFormat::Sint: + color.uint32[0] = c0; + break; + case AmdGpu::NumberFormat::Float: + color.float32[0] = *(reinterpret_cast(&c0)); + break; + default: + INVALID_NUMBER_FORMAT_COMBO; + break; + } + break; + case AmdGpu::DataFormat::Format16_16: + switch (number_type) { + case AmdGpu::NumberFormat::Unorm: + color.float32[0] = NumberUtils::U16ToUnorm( c0 & 0xffff); + color.float32[1] = NumberUtils::U16ToUnorm((c0 >> 16) & 0xffff); + break; + case AmdGpu::NumberFormat::Snorm: + color.float32[0] = NumberUtils::S16ToSnorm( c0 & 0xffff); + color.float32[1] = NumberUtils::S16ToSnorm((c0 >> 16) & 0xffff); + break; + case AmdGpu::NumberFormat::Uint: + case AmdGpu::NumberFormat::Sint: + color.uint32[0] = c0 & 0xffff; + color.uint32[1] = (c0 >> 16) & 0xffff; + break; + case AmdGpu::NumberFormat::Float: + color.float32[0] = NumberUtils::Uf16ToF32( c0 & 0xffff); + color.float32[1] = NumberUtils::Uf16ToF32((c0 >> 16) & 0xffff); + break; + default: + INVALID_NUMBER_FORMAT_COMBO; + break; + } + break; + case AmdGpu::DataFormat::Format10_11_11: + color.float32[0] = NumberUtils::Uf11ToF32( c0 & 0x7ff); + color.float32[1] = NumberUtils::Uf11ToF32((c0 >> 11) & 0x7ff); + color.float32[2] = NumberUtils::Uf10ToF32((c0 >> 22) & 0x3ff); + break; + case AmdGpu::DataFormat::Format11_11_10: + color.float32[0] = NumberUtils::Uf10ToF32( c0 & 0x3ff); + color.float32[1] = NumberUtils::Uf11ToF32((c0 >> 10) & 0x7ff); + color.float32[2] = NumberUtils::Uf11ToF32((c0 >> 21) & 0x7ff); + break; + case AmdGpu::DataFormat::Format5_9_9_9: { + int exponent; + union { float f; u32 u; } scale; + + exponent = (c0 >> 27) - 10; + scale.u = (exponent + 127) << 23; + + color.float32[0] = ( c0 & 0x1ff) * scale.f; + color.float32[1] = ((c0 >> 9) & 0x1ff) * scale.f; + color.float32[2] = ((c0 >> 18) & 0x1ff) * scale.f; + break; + } + case AmdGpu::DataFormat::Format10_10_10_2: + switch (number_type) { + case AmdGpu::NumberFormat::Unorm: + color.float32[0] = NumberUtils::U2ToUnorm( c0 & 0x3); + color.float32[1] = NumberUtils::U10ToUnorm((c0 >> 2) & 0x3ff); + color.float32[2] = NumberUtils::U10ToUnorm((c0 >> 12) & 0x3ff); + color.float32[3] = NumberUtils::U10ToUnorm( c0 >> 22); + break; + case AmdGpu::NumberFormat::Snorm: + color.float32[0] = NumberUtils::S2ToSnorm( c0 & 0x3); + color.float32[1] = NumberUtils::S10ToSnorm((c0 >> 2) & 0x3ff); + color.float32[2] = NumberUtils::S10ToSnorm((c0 >> 12) & 0x3ff); + color.float32[3] = NumberUtils::S2ToSnorm( c0 >> 22); + break; + case AmdGpu::NumberFormat::Uint: + case AmdGpu::NumberFormat::Sint: + color.uint32[0] = c0 & 0x3; + color.uint32[1] = (c0 >> 2) & 0x3ff; + color.uint32[2] = (c0 >> 12) & 0x3ff; + color.uint32[3] = c0 >> 22; + break; + default: + INVALID_NUMBER_FORMAT_COMBO; + break; + } + break; + case AmdGpu::DataFormat::Format2_10_10_10: + switch (number_type) { + case AmdGpu::NumberFormat::Unorm: + color.float32[0] = NumberUtils::U10ToUnorm( c0 & 0x3ff); + color.float32[1] = NumberUtils::U10ToUnorm((c0 >> 10) & 0x3ff); + color.float32[2] = NumberUtils::U10ToUnorm((c0 >> 20) & 0x3ff); + color.float32[3] = NumberUtils::U2ToUnorm( c0 >> 30); + break; + case AmdGpu::NumberFormat::Snorm: + color.float32[0] = NumberUtils::S10ToSnorm( c0 & 0x3ff); + color.float32[1] = NumberUtils::S10ToSnorm((c0 >> 10) & 0x3ff); + color.float32[2] = NumberUtils::S10ToSnorm((c0 >> 20) & 0x3ff); + color.float32[3] = NumberUtils::S2ToSnorm( c0 >> 30); + break; + case AmdGpu::NumberFormat::Uint: + case AmdGpu::NumberFormat::Sint: + color.uint32[0] = c0 & 0x3ff; + color.uint32[1] = (c0 >> 10) & 0x3ff; + color.uint32[2] = (c0 >> 20) & 0x3ff; + color.uint32[3] = c0 >> 30; + break; + default: + INVALID_NUMBER_FORMAT_COMBO; + break; + } + break; + case AmdGpu::DataFormat::Format8_8_8_8: + switch (number_type) { + case AmdGpu::NumberFormat::Unorm: + case AmdGpu::NumberFormat::Srgb: // Should we handle gamma correction here? + color.float32[0] = NumberUtils::U8ToUnorm( c0 & 0xff); + color.float32[1] = NumberUtils::U8ToUnorm((c0 >> 8) & 0xff); + color.float32[2] = NumberUtils::U8ToUnorm((c0 >> 16) & 0xff); + color.float32[3] = NumberUtils::U8ToUnorm( c0 >> 24); + break; + case AmdGpu::NumberFormat::Snorm: + color.float32[0] = NumberUtils::S8ToSnorm( c0 & 0xff); + color.float32[1] = NumberUtils::S8ToSnorm((c0 >> 8) & 0xff); + color.float32[2] = NumberUtils::S8ToSnorm((c0 >> 16) & 0xff); + color.float32[3] = NumberUtils::S8ToSnorm( c0 >> 24); + break; + case AmdGpu::NumberFormat::Uint: + case AmdGpu::NumberFormat::Sint: + color.uint32[0] = c0 & 0xff; + color.uint32[1] = (c0 >> 8) & 0xff; + color.uint32[2] = (c0 >> 16) & 0xff; + color.uint32[3] = c0 >> 24; + break; + default: + INVALID_NUMBER_FORMAT_COMBO; + break; + } + break; + case AmdGpu::DataFormat::Format32_32: + switch (number_type) { + case AmdGpu::NumberFormat::Uint: + case AmdGpu::NumberFormat::Sint: + color.uint32[0] = c0; + color.uint32[1] = c1; + break; + case AmdGpu::NumberFormat::Float: + color.float32[0] = *(reinterpret_cast(&c0)); + color.float32[1] = *(reinterpret_cast(&c1)); + break; + default: + INVALID_NUMBER_FORMAT_COMBO; + break; + } + break; + case AmdGpu::DataFormat::Format16_16_16_16: + switch (number_type) { + case AmdGpu::NumberFormat::Unorm: + color.float32[0] = NumberUtils::U16ToUnorm( c0 & 0xffff); + color.float32[1] = NumberUtils::U16ToUnorm((c0 >> 16) & 0xffff); + color.float32[2] = NumberUtils::U16ToUnorm( c1 & 0xffff); + color.float32[3] = NumberUtils::U16ToUnorm((c1 >> 16) & 0xffff); + break; + case AmdGpu::NumberFormat::Snorm: + color.float32[0] = NumberUtils::S16ToSnorm( c0 & 0xffff); + color.float32[1] = NumberUtils::S16ToSnorm((c0 >> 16) & 0xffff); + color.float32[2] = NumberUtils::S16ToSnorm( c1 & 0xffff); + color.float32[3] = NumberUtils::S16ToSnorm((c1 >> 16) & 0xffff); + break; + case AmdGpu::NumberFormat::Uint: + case AmdGpu::NumberFormat::Sint: + color.uint32[0] = c0 & 0xffff; + color.uint32[1] = (c0 >> 16) & 0xffff; + color.uint32[2] = c1 & 0xffff; + color.uint32[3] = (c1 >> 16) & 0xffff; + break; + case AmdGpu::NumberFormat::Float: + color.float32[0] = NumberUtils::Uf16ToF32( c0 & 0xffff); + color.float32[1] = NumberUtils::Uf16ToF32((c0 >> 16) & 0xffff); + color.float32[2] = NumberUtils::Uf16ToF32( c1 & 0xffff); + color.float32[3] = NumberUtils::Uf16ToF32((c1 >> 16) & 0xffff); + break; + default: + INVALID_NUMBER_FORMAT_COMBO; + break; + } + break; + case AmdGpu::DataFormat::Format32_32_32_32: + switch (number_type) { + case AmdGpu::NumberFormat::Uint: + case AmdGpu::NumberFormat::Sint: + color.uint32[0] = c0; + color.uint32[1] = c0; + color.uint32[2] = c0; + color.uint32[3] = c1; + break; + case AmdGpu::NumberFormat::Float: + color.float32[0] = *(reinterpret_cast(&c0)); + color.float32[1] = *(reinterpret_cast(&c0)); + color.float32[2] = *(reinterpret_cast(&c0)); + color.float32[3] = *(reinterpret_cast(&c1)); + break; + default: + INVALID_NUMBER_FORMAT_COMBO; + break; } break; } - default: { - LOG_ERROR(Render_Vulkan, "Missing clear color conversion for type {}", - color_buffer.info.number_type.Value()); - break; - } - } + return {.color = color}; } diff --git a/src/video_core/renderer_vulkan/number_utils.cpp b/src/video_core/renderer_vulkan/number_utils.cpp new file mode 100644 index 000000000..b2361be41 --- /dev/null +++ b/src/video_core/renderer_vulkan/number_utils.cpp @@ -0,0 +1,162 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include + +#include "video_core/amdgpu/pixel_format.h" +#include "video_core/amdgpu/types.h" +#include "video_core/renderer_vulkan/number_utils.h" + +#define UF11_EXPONENT_SHIFT 6 +#define UF10_EXPONENT_SHIFT 5 + +#define RGB9E5_MANTISSA_BITS 9 +#define RGB9E5_EXP_BIAS 1 + +#define F32_INFINITY 0x7f800000 + +namespace Vulkan::NumberUtils { + +float Uf11ToF32(u16 val) { + union { + float f; + u32 ui; + } f32; + + int exponent = (val & 0x07c0) >> UF11_EXPONENT_SHIFT; + int mantissa = (val & 0x003f); + + f32.f = 0.0; + + if (exponent == 0) { + if (mantissa != 0) { + const float scale = 1.0 / (1 << 20); + f32.f = scale * mantissa; + } + } else if (exponent == 31) { + f32.ui = F32_INFINITY | mantissa; + } else { + float scale, decimal; + exponent -= 15; + if (exponent < 0) { + scale = 1.0f / (1 << -exponent); + } else { + scale = (float) (1 << exponent); + } + decimal = 1.0f + (float) mantissa / 64; + f32.f = scale * decimal; + } + + return f32.f; +} + +float Uf10ToF32(u16 val) { + union { + float f; + u32 ui; + } f32; + + int exponent = (val & 0x03e0) >> UF10_EXPONENT_SHIFT; + int mantissa = (val & 0x001f); + + f32.f = 0.0; + + if (exponent == 0) { + if (mantissa != 0) { + const float scale = 1.0 / (1 << 19); + f32.f = scale * mantissa; + } + } else if (exponent == 31) { + f32.ui = F32_INFINITY | mantissa; + } else { + float scale, decimal; + exponent -= 15; + if (exponent < 0) { + scale = 1.0f / (1 << -exponent); + } + else { + scale = (float) (1 << exponent); + } + decimal = 1.0f + (float) mantissa / 32; + f32.f = scale * decimal; + } + + return f32.f; +} + +float Uf16ToF32(u16 val) { + return half_float::half_cast(reinterpret_cast(val)); +} + +float U2ToUnorm(u8 val) { + static constexpr auto c = 1.0f / 3.0f; + return float(val * c); +} + +float S2ToSnorm(s8 val) { + static constexpr auto c = 1.0f / 1.0f; + return float(val * c); +} + +float U4ToUnorm(u8 val) { + static constexpr auto c = 1.0f / 15.0f; + return float(val * c); +} + +float S4ToSnorm(s8 val) { + static constexpr auto c = 1.0f / 7.0f; + return float(val * c); +} + +float U5ToUnorm(u8 val) { + static constexpr auto c = 1.0f / 31.0f; + return float(val * c); +} + +float S5ToSnorm(s8 val) { + static constexpr auto c = 1.0f / 15.0f; + return float(val * c); +} + +float U6ToUnorm(u8 val) { + static constexpr auto c = 1.0f / 63.0f; + return float(val * c); +} + +float S6ToSnorm(s8 val) { + static constexpr auto c = 1.0f / 31.0f; + return float(val * c); +} + +float U8ToUnorm(u8 val) { + static constexpr auto c = 1.0f / 255.0f; + return float(val * c); +} + +float S8ToSnorm(s8 val) { + static constexpr auto c = 1.0f / 127.0f; + return float(val * c); +} + +float U10ToUnorm(u16 val) { + static constexpr auto c = 1.0f / 1023.0f; + return float(val * c); +} + +float S10ToSnorm(s16 val) { + static constexpr auto c = 1.0f / 511.0f; + return float(val * c); +} + +float U16ToUnorm(u16 val) { + static constexpr auto c = 1.0f / 65535.0f; + return float(val * c); +} + +float S16ToSnorm(s16 val) { + static constexpr auto c = 1.0f / 32767.0f; + return float(val * c); +} + +} // namespace Vulkan::NumberUtils \ No newline at end of file diff --git a/src/video_core/renderer_vulkan/number_utils.h b/src/video_core/renderer_vulkan/number_utils.h new file mode 100644 index 000000000..3f9e9d041 --- /dev/null +++ b/src/video_core/renderer_vulkan/number_utils.h @@ -0,0 +1,28 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/types.h" + +namespace Vulkan::NumberUtils { + +float Uf11ToF32(u16 val); +float Uf10ToF32(u16 val); +float Uf16ToF32(u16 val); +float U2ToUnorm(u8 val); +float S2ToSnorm(s8 val); +float U4ToUnorm(u8 val); +float S4ToSnorm(s8 val); +float U5ToUnorm(u8 val); +float S5ToSnorm(s8 val); +float U6ToUnorm(u8 val); +float S6ToSnorm(s8 val); +float U8ToUnorm(u8 val); +float S8ToSnorm(s8 val); +float U10ToUnorm(u16 val); +float S10ToSnorm(s16 val); +float U16ToUnorm(u16 val); +float S16ToSnorm(s16 val); + +} // namespace Vulkan::NumberUtils