From 03b1fef3318ce5f972e9d1c3590556e556343345 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Tue, 8 Apr 2025 07:22:16 -0700 Subject: [PATCH 01/49] renderer_vulkan: Only update dynamic state when changed. (#2751) --- .../renderer_vulkan/vk_rasterizer.cpp | 139 +++++------ .../renderer_vulkan/vk_rasterizer.h | 6 +- .../renderer_vulkan/vk_scheduler.cpp | 136 +++++++++++ src/video_core/renderer_vulkan/vk_scheduler.h | 219 ++++++++++++++++++ 4 files changed, 414 insertions(+), 86 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index ecb0c0a75..600c205e3 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -946,19 +946,19 @@ void Rasterizer::UnmapMemory(VAddr addr, u64 size) { mapped_ranges -= boost::icl::interval::right_open(addr, addr + size); } -void Rasterizer::UpdateDynamicState(const GraphicsPipeline& pipeline) { +void Rasterizer::UpdateDynamicState(const GraphicsPipeline& pipeline) const { UpdateViewportScissorState(); UpdateDepthStencilState(); - const auto& regs = liverpool->regs; - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.setBlendConstants(®s.blend_constants.red); - if (instance.IsDynamicColorWriteMaskSupported()) { - cmdbuf.setColorWriteMaskEXT(0, pipeline.GetWriteMasks()); - } + auto& dynamic_state = scheduler.GetDynamicState(); + dynamic_state.SetBlendConstants(&liverpool->regs.blend_constants.red); + dynamic_state.SetColorWriteMasks(pipeline.GetWriteMasks()); + + // Commit new dynamic state to the command buffer. + dynamic_state.Commit(instance, scheduler.CommandBuffer()); } -void Rasterizer::UpdateViewportScissorState() { +void Rasterizer::UpdateViewportScissorState() const { const auto& regs = liverpool->regs; const auto combined_scissor_value_tl = [](s16 scr, s16 win, s16 gen, s16 win_offset) { @@ -1071,92 +1071,65 @@ void Rasterizer::UpdateViewportScissorState() { scissors.push_back(empty_scissor); } - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.setViewportWithCountEXT(viewports); - cmdbuf.setScissorWithCountEXT(scissors); + auto& dynamic_state = scheduler.GetDynamicState(); + dynamic_state.SetViewports(viewports); + dynamic_state.SetScissors(scissors); } -void Rasterizer::UpdateDepthStencilState() { - auto& regs = liverpool->regs; - const auto cmdbuf = scheduler.CommandBuffer(); +void Rasterizer::UpdateDepthStencilState() const { + const auto& regs = liverpool->regs; + auto& dynamic_state = scheduler.GetDynamicState(); - bool depth_test = regs.depth_control.depth_enable && regs.depth_buffer.DepthValid(); - cmdbuf.setDepthTestEnableEXT(depth_test); - cmdbuf.setDepthWriteEnableEXT(regs.depth_control.depth_write_enable && - !regs.depth_render_control.depth_clear_enable); - if (depth_test) { - cmdbuf.setDepthCompareOpEXT(LiverpoolToVK::CompareOp(regs.depth_control.depth_func)); + const auto depth_test_enabled = + regs.depth_control.depth_enable && regs.depth_buffer.DepthValid(); + dynamic_state.SetDepthTestEnabled(depth_test_enabled); + if (depth_test_enabled) { + dynamic_state.SetDepthWriteEnabled(regs.depth_control.depth_write_enable && + !regs.depth_render_control.depth_clear_enable); + dynamic_state.SetDepthCompareOp(LiverpoolToVK::CompareOp(regs.depth_control.depth_func)); } - if (instance.IsDepthBoundsSupported()) { - cmdbuf.setDepthBoundsTestEnableEXT(regs.depth_control.depth_bounds_enable); - if (regs.depth_control.depth_bounds_enable) { - cmdbuf.setDepthBounds(regs.depth_bounds_min, regs.depth_bounds_max); - } + const auto depth_bounds_test_enabled = regs.depth_control.depth_bounds_enable; + dynamic_state.SetDepthBoundsTestEnabled(depth_bounds_test_enabled); + if (depth_bounds_test_enabled) { + dynamic_state.SetDepthBounds(regs.depth_bounds_min, regs.depth_bounds_max); } - cmdbuf.setDepthBiasEnableEXT(regs.polygon_control.NeedsBias()); - if (regs.polygon_control.enable_polygon_offset_front) { - cmdbuf.setDepthBias(regs.poly_offset.front_offset, regs.poly_offset.depth_bias, - regs.poly_offset.front_scale / 16.f); - } else if (regs.polygon_control.enable_polygon_offset_back) { - cmdbuf.setDepthBias(regs.poly_offset.back_offset, regs.poly_offset.depth_bias, - regs.poly_offset.back_scale / 16.f); + const auto depth_bias_enabled = regs.polygon_control.NeedsBias(); + if (depth_bias_enabled) { + dynamic_state.SetDepthBias( + regs.polygon_control.enable_polygon_offset_front ? regs.poly_offset.front_offset + : regs.poly_offset.back_offset, + regs.poly_offset.depth_bias, + (regs.polygon_control.enable_polygon_offset_front ? regs.poly_offset.front_scale + : regs.poly_offset.back_scale) / + 16.f); } - cmdbuf.setStencilTestEnableEXT(regs.depth_control.stencil_enable && - regs.depth_buffer.StencilValid()); - if (regs.depth_control.stencil_enable) { - const auto front_fail_op = - LiverpoolToVK::StencilOp(regs.stencil_control.stencil_fail_front); - const auto front_pass_op = - LiverpoolToVK::StencilOp(regs.stencil_control.stencil_zpass_front); - const auto front_depth_fail_op = - LiverpoolToVK::StencilOp(regs.stencil_control.stencil_zfail_front); - const auto front_compare_op = LiverpoolToVK::CompareOp(regs.depth_control.stencil_ref_func); - if (regs.depth_control.backface_enable) { - const auto back_fail_op = - LiverpoolToVK::StencilOp(regs.stencil_control.stencil_fail_back); - const auto back_pass_op = - LiverpoolToVK::StencilOp(regs.stencil_control.stencil_zpass_back); - const auto back_depth_fail_op = - LiverpoolToVK::StencilOp(regs.stencil_control.stencil_zfail_back); - const auto back_compare_op = - LiverpoolToVK::CompareOp(regs.depth_control.stencil_bf_func); - cmdbuf.setStencilOpEXT(vk::StencilFaceFlagBits::eFront, front_fail_op, front_pass_op, - front_depth_fail_op, front_compare_op); - cmdbuf.setStencilOpEXT(vk::StencilFaceFlagBits::eBack, back_fail_op, back_pass_op, - back_depth_fail_op, back_compare_op); - } else { - cmdbuf.setStencilOpEXT(vk::StencilFaceFlagBits::eFrontAndBack, front_fail_op, - front_pass_op, front_depth_fail_op, front_compare_op); - } + const auto stencil_test_enabled = + regs.depth_control.stencil_enable && regs.depth_buffer.StencilValid(); + dynamic_state.SetStencilTestEnabled(stencil_test_enabled); + if (stencil_test_enabled) { + const StencilOps front_ops{ + .fail_op = LiverpoolToVK::StencilOp(regs.stencil_control.stencil_fail_front), + .pass_op = LiverpoolToVK::StencilOp(regs.stencil_control.stencil_zpass_front), + .depth_fail_op = LiverpoolToVK::StencilOp(regs.stencil_control.stencil_zfail_front), + .compare_op = LiverpoolToVK::CompareOp(regs.depth_control.stencil_ref_func), + }; + const StencilOps back_ops = regs.depth_control.backface_enable ? StencilOps{ + .fail_op = LiverpoolToVK::StencilOp(regs.stencil_control.stencil_fail_back), + .pass_op = LiverpoolToVK::StencilOp(regs.stencil_control.stencil_zpass_back), + .depth_fail_op = LiverpoolToVK::StencilOp(regs.stencil_control.stencil_zfail_back), + .compare_op = LiverpoolToVK::CompareOp(regs.depth_control.stencil_bf_func), + } : front_ops; + dynamic_state.SetStencilOps(front_ops, back_ops); const auto front = regs.stencil_ref_front; - const auto back = regs.stencil_ref_back; - if (front.stencil_test_val == back.stencil_test_val) { - cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, - front.stencil_test_val); - } else { - cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFront, front.stencil_test_val); - cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eBack, back.stencil_test_val); - } - - if (front.stencil_write_mask == back.stencil_write_mask) { - cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack, - front.stencil_write_mask); - } else { - cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFront, front.stencil_write_mask); - cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eBack, back.stencil_write_mask); - } - - if (front.stencil_mask == back.stencil_mask) { - cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack, - front.stencil_mask); - } else { - cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFront, front.stencil_mask); - cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eBack, back.stencil_mask); - } + const auto back = + regs.depth_control.backface_enable ? regs.stencil_ref_back : regs.stencil_ref_front; + dynamic_state.SetStencilReferences(front.stencil_test_val, back.stencil_test_val); + dynamic_state.SetStencilWriteMasks(front.stencil_write_mask, back.stencil_write_mask); + dynamic_state.SetStencilCompareMasks(front.stencil_mask, back.stencil_mask); } } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 8e5d0065b..02c24c7ec 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -75,9 +75,9 @@ private: void DepthStencilCopy(bool is_depth, bool is_stencil); void EliminateFastClear(); - void UpdateDynamicState(const GraphicsPipeline& pipeline); - void UpdateViewportScissorState(); - void UpdateDepthStencilState(); + void UpdateDynamicState(const GraphicsPipeline& pipeline) const; + void UpdateViewportScissorState() const; + void UpdateDepthStencilState() const; bool FilterDraw(); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index fd84c54ed..6b872bdaa 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -97,6 +97,9 @@ void Scheduler::AllocateWorkerCommandBuffers() { ASSERT_MSG(begin_result == vk::Result::eSuccess, "Failed to begin command buffer: {}", vk::to_string(begin_result)); + // Invalidate dynamic state so it gets applied to the new command buffer. + dynamic_state.Invalidate(); + #if TRACY_GPU_ENABLED auto* profiler_ctx = instance.GetProfilerContext(); if (profiler_ctx) { @@ -164,4 +167,137 @@ void Scheduler::SubmitExecution(SubmitInfo& info) { } } +void DynamicState::Commit(const Instance& instance, const vk::CommandBuffer& cmdbuf) { + if (dirty_state.viewports) { + dirty_state.viewports = false; + cmdbuf.setViewportWithCountEXT(viewports); + } + if (dirty_state.scissors) { + dirty_state.scissors = false; + cmdbuf.setScissorWithCountEXT(scissors); + } + if (dirty_state.depth_test_enabled) { + dirty_state.depth_test_enabled = false; + cmdbuf.setDepthTestEnableEXT(depth_test_enabled); + } + if (dirty_state.depth_write_enabled) { + dirty_state.depth_write_enabled = false; + // Note that this must be set in a command buffer even if depth test is disabled. + cmdbuf.setDepthWriteEnableEXT(depth_write_enabled); + } + if (depth_test_enabled && dirty_state.depth_compare_op) { + dirty_state.depth_compare_op = false; + cmdbuf.setDepthCompareOpEXT(depth_compare_op); + } + if (dirty_state.depth_bounds_test_enabled) { + dirty_state.depth_bounds_test_enabled = false; + if (instance.IsDepthBoundsSupported()) { + cmdbuf.setDepthBoundsTestEnableEXT(depth_bounds_test_enabled); + } + } + if (depth_bounds_test_enabled && dirty_state.depth_bounds) { + dirty_state.depth_bounds = false; + if (instance.IsDepthBoundsSupported()) { + cmdbuf.setDepthBounds(depth_bounds_min, depth_bounds_max); + } + } + if (dirty_state.depth_bias_enabled) { + dirty_state.depth_bias_enabled = false; + cmdbuf.setDepthBiasEnableEXT(depth_bias_enabled); + } + if (depth_bias_enabled && dirty_state.depth_bias) { + dirty_state.depth_bias = false; + cmdbuf.setDepthBias(depth_bias_constant, depth_bias_clamp, depth_bias_slope); + } + if (dirty_state.stencil_test_enabled) { + dirty_state.stencil_test_enabled = false; + cmdbuf.setStencilTestEnableEXT(stencil_test_enabled); + } + if (stencil_test_enabled) { + if (dirty_state.stencil_front_ops && dirty_state.stencil_back_ops && + stencil_front_ops == stencil_back_ops) { + dirty_state.stencil_front_ops = false; + dirty_state.stencil_back_ops = false; + cmdbuf.setStencilOpEXT(vk::StencilFaceFlagBits::eFrontAndBack, + stencil_front_ops.fail_op, stencil_front_ops.pass_op, + stencil_front_ops.depth_fail_op, stencil_front_ops.compare_op); + } else { + if (dirty_state.stencil_front_ops) { + dirty_state.stencil_front_ops = false; + cmdbuf.setStencilOpEXT(vk::StencilFaceFlagBits::eFront, stencil_front_ops.fail_op, + stencil_front_ops.pass_op, stencil_front_ops.depth_fail_op, + stencil_front_ops.compare_op); + } + if (dirty_state.stencil_back_ops) { + dirty_state.stencil_back_ops = false; + cmdbuf.setStencilOpEXT(vk::StencilFaceFlagBits::eBack, stencil_back_ops.fail_op, + stencil_back_ops.pass_op, stencil_back_ops.depth_fail_op, + stencil_back_ops.compare_op); + } + } + if (dirty_state.stencil_front_reference && dirty_state.stencil_back_reference && + stencil_front_reference == stencil_back_reference) { + dirty_state.stencil_front_reference = false; + dirty_state.stencil_back_reference = false; + cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, + stencil_front_reference); + } else { + if (dirty_state.stencil_front_reference) { + dirty_state.stencil_front_reference = false; + cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFront, + stencil_front_reference); + } + if (dirty_state.stencil_back_reference) { + dirty_state.stencil_back_reference = false; + cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eBack, stencil_back_reference); + } + } + if (dirty_state.stencil_front_write_mask && dirty_state.stencil_back_write_mask && + stencil_front_write_mask == stencil_back_write_mask) { + dirty_state.stencil_front_write_mask = false; + dirty_state.stencil_back_write_mask = false; + cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack, + stencil_front_write_mask); + } else { + if (dirty_state.stencil_front_write_mask) { + dirty_state.stencil_front_write_mask = false; + cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFront, + stencil_front_write_mask); + } + if (dirty_state.stencil_back_write_mask) { + dirty_state.stencil_back_write_mask = false; + cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eBack, stencil_back_write_mask); + } + } + if (dirty_state.stencil_front_compare_mask && dirty_state.stencil_back_compare_mask && + stencil_front_compare_mask == stencil_back_compare_mask) { + dirty_state.stencil_front_compare_mask = false; + dirty_state.stencil_back_compare_mask = false; + cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack, + stencil_front_compare_mask); + } else { + if (dirty_state.stencil_front_compare_mask) { + dirty_state.stencil_front_compare_mask = false; + cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFront, + stencil_front_compare_mask); + } + if (dirty_state.stencil_back_compare_mask) { + dirty_state.stencil_back_compare_mask = false; + cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eBack, + stencil_back_compare_mask); + } + } + } + if (dirty_state.blend_constants) { + dirty_state.blend_constants = false; + cmdbuf.setBlendConstants(blend_constants); + } + if (dirty_state.color_write_masks) { + dirty_state.color_write_masks = false; + if (instance.IsDynamicColorWriteMaskSupported()) { + cmdbuf.setColorWriteMaskEXT(0, color_write_masks); + } + } +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index fd5e68373..880bd4b04 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -7,6 +7,7 @@ #include #include "common/types.h" #include "common/unique_function.h" +#include "video_core/amdgpu/liverpool.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_resource_pool.h" @@ -55,6 +56,219 @@ struct SubmitInfo { } }; +using Viewports = boost::container::static_vector; +using Scissors = boost::container::static_vector; +using ColorWriteMasks = std::array; +struct StencilOps { + vk::StencilOp fail_op{}; + vk::StencilOp pass_op{}; + vk::StencilOp depth_fail_op{}; + vk::CompareOp compare_op{}; + + bool operator==(const StencilOps& other) const { + return fail_op == other.fail_op && pass_op == other.pass_op && + depth_fail_op == other.depth_fail_op && compare_op == other.compare_op; + } +}; +struct DynamicState { + struct { + bool viewports : 1; + bool scissors : 1; + + bool depth_test_enabled : 1; + bool depth_write_enabled : 1; + bool depth_compare_op : 1; + + bool depth_bounds_test_enabled : 1; + bool depth_bounds : 1; + + bool depth_bias_enabled : 1; + bool depth_bias : 1; + + bool stencil_test_enabled : 1; + bool stencil_front_ops : 1; + bool stencil_front_reference : 1; + bool stencil_front_write_mask : 1; + bool stencil_front_compare_mask : 1; + bool stencil_back_ops : 1; + bool stencil_back_reference : 1; + bool stencil_back_write_mask : 1; + bool stencil_back_compare_mask : 1; + + bool blend_constants : 1; + bool color_write_masks : 1; + } dirty_state{}; + + Viewports viewports{}; + Scissors scissors{}; + + bool depth_test_enabled{}; + bool depth_write_enabled{}; + vk::CompareOp depth_compare_op{}; + + bool depth_bounds_test_enabled{}; + float depth_bounds_min{}; + float depth_bounds_max{}; + + bool depth_bias_enabled{}; + float depth_bias_constant{}; + float depth_bias_clamp{}; + float depth_bias_slope{}; + + bool stencil_test_enabled{}; + StencilOps stencil_front_ops{}; + u32 stencil_front_reference{}; + u32 stencil_front_write_mask{}; + u32 stencil_front_compare_mask{}; + StencilOps stencil_back_ops{}; + u32 stencil_back_reference{}; + u32 stencil_back_write_mask{}; + u32 stencil_back_compare_mask{}; + + float blend_constants[4]{}; + ColorWriteMasks color_write_masks{}; + + /// Commits the dynamic state to the provided command buffer. + void Commit(const Instance& instance, const vk::CommandBuffer& cmdbuf); + + /// Invalidates all dynamic state to be flushed into the next command buffer. + void Invalidate() { + std::memset(&dirty_state, 0xFF, sizeof(dirty_state)); + } + + void SetViewports(const Viewports& viewports_) { + if (!std::ranges::equal(viewports, viewports_)) { + viewports = viewports_; + dirty_state.viewports = true; + } + } + + void SetScissors(const Scissors& scissors_) { + if (!std::ranges::equal(scissors, scissors_)) { + scissors = scissors_; + dirty_state.scissors = true; + } + } + + void SetDepthTestEnabled(const bool enabled) { + if (depth_test_enabled != enabled) { + depth_test_enabled = enabled; + dirty_state.depth_test_enabled = true; + } + } + + void SetDepthWriteEnabled(const bool enabled) { + if (depth_write_enabled != enabled) { + depth_write_enabled = enabled; + dirty_state.depth_write_enabled = true; + } + } + + void SetDepthCompareOp(const vk::CompareOp compare_op) { + if (depth_compare_op != compare_op) { + depth_compare_op = compare_op; + dirty_state.depth_compare_op = true; + } + } + + void SetDepthBoundsTestEnabled(const bool enabled) { + if (depth_bounds_test_enabled != enabled) { + depth_bounds_test_enabled = enabled; + dirty_state.depth_bounds_test_enabled = true; + } + } + + void SetDepthBounds(const float min, const float max) { + if (depth_bounds_min != min || depth_bounds_max != max) { + depth_bounds_min = min; + depth_bounds_max = max; + dirty_state.depth_bounds = true; + } + } + + void SetDepthBiasEnabled(const bool enabled) { + if (depth_bias_enabled != enabled) { + depth_bias_enabled = enabled; + dirty_state.depth_bias_enabled = true; + } + } + + void SetDepthBias(const float constant, const float clamp, const float slope) { + if (depth_bias_constant != constant || depth_bias_clamp != clamp || + depth_bias_slope != slope) { + depth_bias_constant = constant; + depth_bias_clamp = clamp; + depth_bias_slope = slope; + dirty_state.depth_bias = true; + } + } + + void SetStencilTestEnabled(const bool enabled) { + if (stencil_test_enabled != enabled) { + stencil_test_enabled = enabled; + dirty_state.stencil_test_enabled = true; + } + } + + void SetStencilOps(const StencilOps& front_ops, const StencilOps& back_ops) { + if (stencil_front_ops != front_ops) { + stencil_front_ops = front_ops; + dirty_state.stencil_front_ops = true; + } + if (stencil_back_ops != back_ops) { + stencil_back_ops = back_ops; + dirty_state.stencil_back_ops = true; + } + } + + void SetStencilReferences(const u32 front_reference, const u32 back_reference) { + if (stencil_front_reference != front_reference) { + stencil_front_reference = front_reference; + dirty_state.stencil_front_reference = true; + } + if (stencil_back_reference != back_reference) { + stencil_back_reference = back_reference; + dirty_state.stencil_back_reference = true; + } + } + + void SetStencilWriteMasks(const u32 front_write_mask, const u32 back_write_mask) { + if (stencil_front_write_mask != front_write_mask) { + stencil_front_write_mask = front_write_mask; + dirty_state.stencil_front_write_mask = true; + } + if (stencil_back_write_mask != back_write_mask) { + stencil_back_write_mask = back_write_mask; + dirty_state.stencil_back_write_mask = true; + } + } + + void SetStencilCompareMasks(const u32 front_compare_mask, const u32 back_compare_mask) { + if (stencil_front_compare_mask != front_compare_mask) { + stencil_front_compare_mask = front_compare_mask; + dirty_state.stencil_front_compare_mask = true; + } + if (stencil_back_compare_mask != back_compare_mask) { + stencil_back_compare_mask = back_compare_mask; + dirty_state.stencil_back_compare_mask = true; + } + } + + void SetBlendConstants(const float blend_constants_[4]) { + if (!std::equal(blend_constants, std::end(blend_constants), blend_constants_)) { + std::memcpy(blend_constants, blend_constants_, sizeof(blend_constants)); + dirty_state.blend_constants = true; + } + } + + void SetColorWriteMasks(const ColorWriteMasks& color_write_masks_) { + if (!std::ranges::equal(color_write_masks, color_write_masks_)) { + color_write_masks = color_write_masks_; + dirty_state.color_write_masks = true; + } + } +}; + class Scheduler { public: explicit Scheduler(const Instance& instance); @@ -81,6 +295,10 @@ public: return render_state; } + DynamicState& GetDynamicState() { + return dynamic_state; + } + /// Returns the current command buffer. vk::CommandBuffer CommandBuffer() const { return current_cmdbuf; @@ -125,6 +343,7 @@ private: }; std::queue pending_ops; RenderState render_state; + DynamicState dynamic_state; bool is_rendering = false; tracy::VkCtxScope* profiler_scope{}; }; From 29656563259be924e04dca02e5f2a7c63f27beee Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Wed, 9 Apr 2025 00:54:39 -0700 Subject: [PATCH 02/49] build: Target same CPU architecture level as PS4. (#2763) --- CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7f3d4468f..37492eeb3 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -54,9 +54,9 @@ else() endif() if (ARCHITECTURE STREQUAL "x86_64") - # Target Sandy Bridge as a reasonable subset of instructions supported by PS4 and host CPUs. - # Note that the native PS4 architecture 'btver2' has been attempted but causes issues with M1 CPUs. - add_compile_options(-march=sandybridge -mtune=generic) + # Target the same CPU architecture as the PS4, to maintain the same level of compatibility. + # Exclude SSE4a as it is only available on AMD CPUs. + add_compile_options(-march=btver2 -mtune=generic -mno-sse4a) endif() if (APPLE AND ARCHITECTURE STREQUAL "x86_64" AND CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "arm64") From e3b1c041d0de657795e8753cea34da8481c61c4c Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Wed, 9 Apr 2025 00:59:33 -0700 Subject: [PATCH 03/49] documents: Update macOS version in quickstart guide. --- documents/Quickstart/Quickstart.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/documents/Quickstart/Quickstart.md b/documents/Quickstart/Quickstart.md index 9c6bc5a6f..55825ac7d 100644 --- a/documents/Quickstart/Quickstart.md +++ b/documents/Quickstart/Quickstart.md @@ -24,7 +24,7 @@ SPDX-License-Identifier: GPL-2.0-or-later - A CPU supporting the following instruction sets: MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, F16C, CLMUL, AES, BMI1, MOVBE, XSAVE, ABM - **Intel**: Haswell generation or newer - **AMD**: Jaguar generation or newer - - **Apple**: Rosetta 2 on macOS 15 or newer + - **Apple**: Rosetta 2 on macOS 15.4 or newer ### GPU From 5abec2a2917a52cf01f2ef9f5c9e3e2656988383 Mon Sep 17 00:00:00 2001 From: Dmugetsu <168934208+diegolix29@users.noreply.github.com> Date: Wed, 9 Apr 2025 18:06:54 -0600 Subject: [PATCH 04/49] Enabling Depth Bias Explicity (#2766) --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 600c205e3..5aae43cc8 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -1096,14 +1096,13 @@ void Rasterizer::UpdateDepthStencilState() const { } const auto depth_bias_enabled = regs.polygon_control.NeedsBias(); + dynamic_state.SetDepthBiasEnabled(depth_bias_enabled); if (depth_bias_enabled) { + const bool front = regs.polygon_control.enable_polygon_offset_front; dynamic_state.SetDepthBias( - regs.polygon_control.enable_polygon_offset_front ? regs.poly_offset.front_offset - : regs.poly_offset.back_offset, + front ? regs.poly_offset.front_offset : regs.poly_offset.back_offset, regs.poly_offset.depth_bias, - (regs.polygon_control.enable_polygon_offset_front ? regs.poly_offset.front_scale - : regs.poly_offset.back_scale) / - 16.f); + (front ? regs.poly_offset.front_scale : regs.poly_offset.back_scale) / 16.f); } const auto stencil_test_enabled = From da118e3bd9700499d094ea1eec00416e6af41654 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Tue, 11 Feb 2025 17:40:46 +0100 Subject: [PATCH 05/49] Dump IR program --- src/shader_recompiler/ir/program.cpp | 25 +++++++++++++++++++++---- src/shader_recompiler/ir/program.h | 2 +- src/shader_recompiler/recompiler.cpp | 2 ++ 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/ir/program.cpp b/src/shader_recompiler/ir/program.cpp index 7728a3ccb..aea14b15d 100644 --- a/src/shader_recompiler/ir/program.cpp +++ b/src/shader_recompiler/ir/program.cpp @@ -6,13 +6,30 @@ #include +#include "common/config.h" +#include "common/io_file.h" +#include "common/path_util.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/program.h" #include "shader_recompiler/ir/value.h" namespace Shader::IR { -std::string DumpProgram(const Program& program) { +void DumpProgram(const Program& program, const Info& info) { + using namespace Common::FS; + + if (!Config::dumpShaders()) { + return; + } + + const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps"; + if (!std::filesystem::exists(dump_dir)) { + std::filesystem::create_directories(dump_dir); + } + const auto filename = fmt::format("{}_{:#018x}.irprogram.txt", info.stage, info.pgm_hash); + const auto file = IOFile{dump_dir / filename, FileAccessMode::Write, FileType::TextFile}; + + size_t index{0}; std::map inst_to_index; std::map block_to_index; @@ -21,11 +38,11 @@ std::string DumpProgram(const Program& program) { block_to_index.emplace(block, index); ++index; } - std::string ret; + for (const auto& block : program.blocks) { - ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n'; + std::string s = IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n'; + file.WriteString(s); } - return ret; } } // namespace Shader::IR diff --git a/src/shader_recompiler/ir/program.h b/src/shader_recompiler/ir/program.h index 84a1a2d40..9ede71215 100644 --- a/src/shader_recompiler/ir/program.h +++ b/src/shader_recompiler/ir/program.h @@ -21,6 +21,6 @@ struct Program { Info& info; }; -[[nodiscard]] std::string DumpProgram(const Program& program); +void DumpProgram(const Program& program, const Info& info); } // namespace Shader::IR diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index 5004e0beb..2a0f9a819 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -82,6 +82,8 @@ IR::Program TranslateProgram(std::span code, Pools& pools, Info& info Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); Shader::Optimization::CollectShaderInfoPass(program); + Shader::IR::DumpProgram(program, info); + return program; } From e10078335794d4ee452f57b4cd3634404cd5562e Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Mon, 17 Feb 2025 22:30:42 +0100 Subject: [PATCH 06/49] Handle non inmediate offset on S_LOAD_DWORD --- .../frontend/translate/scalar_memory.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/scalar_memory.cpp b/src/shader_recompiler/frontend/translate/scalar_memory.cpp index 89426e080..c2e91b328 100644 --- a/src/shader_recompiler/frontend/translate/scalar_memory.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_memory.cpp @@ -39,21 +39,22 @@ void Translator::EmitScalarMemory(const GcnInst& inst) { void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) { const auto& smrd = inst.control.smrd; - const u32 dword_offset = [&] -> u32 { + const IR::U32 dword_offset = [&] -> IR::U32 { if (smrd.imm) { - return smrd.offset; + return ir.Imm32(smrd.offset); } if (smrd.offset == SQ_SRC_LITERAL) { - return inst.src[1].code; + return ir.Imm32(inst.src[1].code); } - UNREACHABLE(); + return ir.GetScalarReg(IR::ScalarReg(smrd.offset)); }(); const IR::ScalarReg sbase{inst.src[0].code * 2}; const IR::Value base = ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1)); IR::ScalarReg dst_reg{inst.dst[0].code}; for (u32 i = 0; i < num_dwords; i++) { - ir.SetScalarReg(dst_reg++, ir.ReadConst(base, ir.Imm32(dword_offset + i))); + const IR::U32 index = ir.IAdd(dword_offset, ir.Imm32(i)); + ir.SetScalarReg(dst_reg++, ir.ReadConst(base, index)); } } From 85334a924a17b6cf0d64d4fa5db12fccb0a2a434 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Tue, 18 Feb 2025 18:29:37 +0100 Subject: [PATCH 07/49] ASL dumping --- CMakeLists.txt | 1 + .../ir/abstract_syntax_list.cpp | 30 +++++++++++++++++++ .../ir/abstract_syntax_list.h | 3 ++ src/shader_recompiler/ir/program.cpp | 15 +++++++--- 4 files changed, 45 insertions(+), 4 deletions(-) create mode 100644 src/shader_recompiler/ir/abstract_syntax_list.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 37492eeb3..209b4ee91 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -842,6 +842,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp src/shader_recompiler/ir/passes/shared_memory_to_storage_pass.cpp src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp + src/shader_recompiler/ir/abstract_syntax_list.cpp src/shader_recompiler/ir/abstract_syntax_list.h src/shader_recompiler/ir/attribute.cpp src/shader_recompiler/ir/attribute.h diff --git a/src/shader_recompiler/ir/abstract_syntax_list.cpp b/src/shader_recompiler/ir/abstract_syntax_list.cpp new file mode 100644 index 000000000..28fa3505e --- /dev/null +++ b/src/shader_recompiler/ir/abstract_syntax_list.cpp @@ -0,0 +1,30 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "abstract_syntax_list.h" + +namespace Shader::IR { + +std::string DumpASLNode(const AbstractSyntaxNode& node, const std::map& block_to_index, const std::map& inst_to_index) { + switch (node.type) { + case AbstractSyntaxNode::Type::Block: + return fmt::format("Block: ${}", block_to_index.at(node.data.block)); + case AbstractSyntaxNode::Type::If: + return fmt::format("If: cond = %{}, body = ${}, merge = ${}", inst_to_index.at(node.data.if_node.cond.Inst()), block_to_index.at(node.data.if_node.body), block_to_index.at(node.data.if_node.merge)); + case AbstractSyntaxNode::Type::EndIf: + return fmt::format("EndIf: merge = ${}", block_to_index.at(node.data.end_if.merge)); + case AbstractSyntaxNode::Type::Loop: + return fmt::format("Loop: body = ${}, continue = ${}, merge = ${}", block_to_index.at(node.data.loop.body), block_to_index.at(node.data.loop.continue_block), block_to_index.at(node.data.loop.merge)); + case AbstractSyntaxNode::Type::Repeat: + return fmt::format("Repeat: cond = %{}, header = ${}, merge = ${}", inst_to_index.at(node.data.repeat.cond.Inst()), block_to_index.at(node.data.repeat.loop_header), block_to_index.at(node.data.repeat.merge)); + case AbstractSyntaxNode::Type::Break: + return fmt::format("Break: cond = %{}, merge = ${}, skip = ${}", inst_to_index.at(node.data.break_node.cond.Inst()), block_to_index.at(node.data.break_node.merge), block_to_index.at(node.data.break_node.skip)); + case AbstractSyntaxNode::Type::Return: + return "Return"; + case AbstractSyntaxNode::Type::Unreachable: + UNREACHABLE(); + }; + UNREACHABLE(); +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/ir/abstract_syntax_list.h b/src/shader_recompiler/ir/abstract_syntax_list.h index 313a23abc..b2a4f7b2a 100644 --- a/src/shader_recompiler/ir/abstract_syntax_list.h +++ b/src/shader_recompiler/ir/abstract_syntax_list.h @@ -4,6 +4,7 @@ #pragma once #include +#include #include "shader_recompiler/ir/value.h" namespace Shader::IR { @@ -53,4 +54,6 @@ struct AbstractSyntaxNode { }; using AbstractSyntaxList = std::vector; +std::string DumpASLNode(const AbstractSyntaxNode& node, const std::map& block_to_index, const std::map& inst_to_index); + } // namespace Shader::IR diff --git a/src/shader_recompiler/ir/program.cpp b/src/shader_recompiler/ir/program.cpp index aea14b15d..3a41c579d 100644 --- a/src/shader_recompiler/ir/program.cpp +++ b/src/shader_recompiler/ir/program.cpp @@ -26,9 +26,8 @@ void DumpProgram(const Program& program, const Info& info) { if (!std::filesystem::exists(dump_dir)) { std::filesystem::create_directories(dump_dir); } - const auto filename = fmt::format("{}_{:#018x}.irprogram.txt", info.stage, info.pgm_hash); - const auto file = IOFile{dump_dir / filename, FileAccessMode::Write, FileType::TextFile}; - + const auto ir_filename = fmt::format("{}_{:#018x}.irprogram.txt", info.stage, info.pgm_hash); + const auto ir_file = IOFile{dump_dir / ir_filename, FileAccessMode::Write, FileType::TextFile}; size_t index{0}; std::map inst_to_index; @@ -41,7 +40,15 @@ void DumpProgram(const Program& program, const Info& info) { for (const auto& block : program.blocks) { std::string s = IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n'; - file.WriteString(s); + ir_file.WriteString(s); + } + + const auto asl_filename = fmt::format("{}_{:#018x}.asl.txt", info.stage, info.pgm_hash); + const auto asl_file = IOFile{dump_dir / asl_filename, FileAccessMode::Write, FileType::TextFile}; + + for (const auto& node : program.syntax_list) { + std::string s = IR::DumpASLNode(node, block_to_index, inst_to_index) + '\n'; + asl_file.WriteString(s); } } From 7e1159bdaf1235dc6c6c9ac9a7f0fd6decad53b8 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Thu, 20 Feb 2025 15:12:24 +0100 Subject: [PATCH 08/49] Fix unreacheable ASL dump --- src/shader_recompiler/ir/abstract_syntax_list.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/ir/abstract_syntax_list.cpp b/src/shader_recompiler/ir/abstract_syntax_list.cpp index 28fa3505e..42d2292a9 100644 --- a/src/shader_recompiler/ir/abstract_syntax_list.cpp +++ b/src/shader_recompiler/ir/abstract_syntax_list.cpp @@ -22,7 +22,7 @@ std::string DumpASLNode(const AbstractSyntaxNode& node, const std::map Date: Thu, 27 Feb 2025 19:13:50 +0100 Subject: [PATCH 09/49] Add conditional tree --- CMakeLists.txt | 2 + src/shader_recompiler/ir/basic_block.h | 20 +++++++ src/shader_recompiler/ir/conditional_tree.cpp | 59 +++++++++++++++++++ src/shader_recompiler/ir/conditional_tree.h | 12 ++++ src/shader_recompiler/recompiler.cpp | 3 + 5 files changed, 96 insertions(+) create mode 100644 src/shader_recompiler/ir/conditional_tree.cpp create mode 100644 src/shader_recompiler/ir/conditional_tree.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 209b4ee91..ffbbb030e 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -849,6 +849,8 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/basic_block.cpp src/shader_recompiler/ir/basic_block.h src/shader_recompiler/ir/condition.h + src/shader_recompiler/ir/conditional_tree.cpp + src/shader_recompiler/ir/conditional_tree.h src/shader_recompiler/ir/ir_emitter.cpp src/shader_recompiler/ir/ir_emitter.h src/shader_recompiler/ir/microinstruction.cpp diff --git a/src/shader_recompiler/ir/basic_block.h b/src/shader_recompiler/ir/basic_block.h index 74a7d2c56..6e1b19229 100644 --- a/src/shader_recompiler/ir/basic_block.h +++ b/src/shader_recompiler/ir/basic_block.h @@ -11,6 +11,7 @@ #include "common/object_pool.h" #include "common/types.h" +#include "shader_recompiler/ir/abstract_syntax_list.h" #include "shader_recompiler/ir/reg.h" #include "shader_recompiler/ir/value.h" @@ -18,6 +19,12 @@ namespace Shader::IR { class Block { public: + struct ConditionalData { + std::uint32_t depth; + const ConditionalData* parent; + const AbstractSyntaxNode* asl_node; + }; + using InstructionList = boost::intrusive::list; using size_type = InstructionList::size_type; using iterator = InstructionList::iterator; @@ -65,6 +72,16 @@ public: return imm_successors; } + // Set the conditional data for this block. + void SetConditionalData(const ConditionalData& data) { + cond_data = data; + } + + // Get the conditional data for this block. + [[nodiscard]] const ConditionalData& CondData() const { + return cond_data; + } + /// Intrusively store the host definition of this instruction. template void SetDefinition(T def) { @@ -164,6 +181,9 @@ private: /// Block immediate successors std::vector imm_successors; + // Conditional data + Block::ConditionalData cond_data; + /// Intrusively store if the block is sealed in the SSA pass. bool is_ssa_sealed{false}; diff --git a/src/shader_recompiler/ir/conditional_tree.cpp b/src/shader_recompiler/ir/conditional_tree.cpp new file mode 100644 index 000000000..ef00285d9 --- /dev/null +++ b/src/shader_recompiler/ir/conditional_tree.cpp @@ -0,0 +1,59 @@ +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/ir/conditional_tree.h" +#include "shader_recompiler/ir/basic_block.h" + +#include + +namespace Shader::IR { + +static void AddConditionalTree(std::span asl_span, Block::ConditionalData* parent) { + const auto get_span = [&asl_span](AbstractSyntaxNode& node, Block* merge_block) -> std::span { + auto it = std::find_if(asl_span.begin(), asl_span.end(), + [&node, &merge_block](const AbstractSyntaxNode& n) { + return n.data.block == merge_block; + } + ); + ASSERT(it != asl_span.end()); + std::ptrdiff_t merge_index = std::distance(asl_span.begin(), it); + return std::span(&node + 1, asl_span.data() + merge_index); + }; + const Block::ConditionalData* copied_parent = nullptr; + for (auto it = asl_span.begin(); it < asl_span.end(); ++it) { + AbstractSyntaxNode& node = *it; + if (node.type == AbstractSyntaxNode::Type::If || node.type == AbstractSyntaxNode::Type::Loop) { + ASSERT(copied_parent); + Block* merge_block; + switch (node.type) { + case AbstractSyntaxNode::Type::If: + merge_block = node.data.if_node.merge; + break; + case AbstractSyntaxNode::Type::Loop: + merge_block = node.data.loop.merge; + break; + default: + UNREACHABLE(); + } + auto subspan = get_span(node, merge_block); + Block::ConditionalData cond{copied_parent->depth + 1, copied_parent, &node}; + AddConditionalTree(subspan, &cond); + it += subspan.size(); + } else if (node.type == AbstractSyntaxNode::Type::Block) { + Block* block = node.data.block; + if (!copied_parent) { + block->SetConditionalData(*parent); + copied_parent = &block->CondData(); + } else { + block->SetConditionalData(*copied_parent); + } + } + } +} + +void AddConditionalTreeFromASL(AbstractSyntaxList& syntax_list) { + Block::ConditionalData cond{0, nullptr, nullptr}; + AddConditionalTree(syntax_list, &cond); +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/ir/conditional_tree.h b/src/shader_recompiler/ir/conditional_tree.h new file mode 100644 index 000000000..9d330bc6d --- /dev/null +++ b/src/shader_recompiler/ir/conditional_tree.h @@ -0,0 +1,12 @@ +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "shader_recompiler/ir/abstract_syntax_list.h" + +namespace Shader::IR { + +void AddConditionalTreeFromASL(AbstractSyntaxList& syntax_list); + +} // namespace Shader::IR diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index 2a0f9a819..765d18e05 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "shader_recompiler/frontend/control_flow_graph.h" +#include "shader_recompiler/ir/conditional_tree.h" #include "shader_recompiler/frontend/decode.h" #include "shader_recompiler/frontend/structured_control_flow.h" #include "shader_recompiler/ir/passes/ir_passes.h" @@ -58,6 +59,8 @@ IR::Program TranslateProgram(std::span code, Pools& pools, Info& info program.info, runtime_info, profile); program.blocks = GenerateBlocks(program.syntax_list); program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front()); + + Shader::IR::AddConditionalTreeFromASL(program.syntax_list); // Run optimization passes Shader::Optimization::SsaRewritePass(program.post_order_blocks); From 920efdf37c5b3e49e6adae231ebc2fc861e54028 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Fri, 28 Feb 2025 19:10:47 +0100 Subject: [PATCH 10/49] Usefulness of conditional tree --- src/shader_recompiler/ir/conditional_tree.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/shader_recompiler/ir/conditional_tree.cpp b/src/shader_recompiler/ir/conditional_tree.cpp index ef00285d9..5e064c43d 100644 --- a/src/shader_recompiler/ir/conditional_tree.cpp +++ b/src/shader_recompiler/ir/conditional_tree.cpp @@ -6,6 +6,10 @@ #include +// This can be used to get, for a given block, the list of conditions that +// must be true for the block to be executed. Can be also useful for +// for determining the maximum number of thimes a block is executed. + namespace Shader::IR { static void AddConditionalTree(std::span asl_span, Block::ConditionalData* parent) { From 0f6912cf189b7b4bc61a9d8d5ec267e9bebd5bcd Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sun, 2 Mar 2025 23:55:19 +0100 Subject: [PATCH 11/49] Subprogram creation --- CMakeLists.txt | 6 +- src/shader_recompiler/ir/basic_block.h | 10 +- src/shader_recompiler/ir/subprogram.cpp | 225 ++++++++++++++++++++++++ src/shader_recompiler/ir/subprogram.h | 37 ++++ src/shader_recompiler/pools.h | 26 +++ src/shader_recompiler/recompiler.h | 17 +- 6 files changed, 303 insertions(+), 18 deletions(-) create mode 100644 src/shader_recompiler/ir/subprogram.cpp create mode 100644 src/shader_recompiler/ir/subprogram.h create mode 100644 src/shader_recompiler/pools.h diff --git a/CMakeLists.txt b/CMakeLists.txt index ffbbb030e..e1b8c2920 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -778,6 +778,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/recompiler.cpp src/shader_recompiler/recompiler.h src/shader_recompiler/info.h + src/shader_recompiler/pools.h src/shader_recompiler/params.h src/shader_recompiler/runtime_info.h src/shader_recompiler/specialization.h @@ -863,8 +864,11 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/post_order.h src/shader_recompiler/ir/program.cpp src/shader_recompiler/ir/program.h - src/shader_recompiler/ir/reinterpret.h src/shader_recompiler/ir/reg.h + src/shader_recompiler/ir/reinterpret.h + src/shader_recompiler/ir/srt_gvn_table.h + src/shader_recompiler/ir/subprogram.cpp + src/shader_recompiler/ir/subprogram.h src/shader_recompiler/ir/type.cpp src/shader_recompiler/ir/type.h src/shader_recompiler/ir/value.cpp diff --git a/src/shader_recompiler/ir/basic_block.h b/src/shader_recompiler/ir/basic_block.h index 6e1b19229..865243835 100644 --- a/src/shader_recompiler/ir/basic_block.h +++ b/src/shader_recompiler/ir/basic_block.h @@ -20,7 +20,7 @@ namespace Shader::IR { class Block { public: struct ConditionalData { - std::uint32_t depth; + u32 depth; const ConditionalData* parent; const AbstractSyntaxNode* asl_node; }; @@ -71,6 +71,14 @@ public: [[nodiscard]] std::span ImmSuccessors() const noexcept { return imm_successors; } + // Returns if the block has a given immediate predecessor. + [[nodiscard]] bool HasImmPredecessor(const Block* block) const noexcept { + return std::ranges::find(imm_predecessors, block) != imm_predecessors.end(); + } + // Returns if the block has a given immediate successor. + [[nodiscard]] bool HasImmSuccessor(const Block* block) const noexcept { + return std::ranges::find(imm_successors, block) != imm_successors.end(); + } // Set the conditional data for this block. void SetConditionalData(const ConditionalData& data) { diff --git a/src/shader_recompiler/ir/subprogram.cpp b/src/shader_recompiler/ir/subprogram.cpp new file mode 100644 index 000000000..5d55c1485 --- /dev/null +++ b/src/shader_recompiler/ir/subprogram.cpp @@ -0,0 +1,225 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include "shader_recompiler/ir/conditional_tree.h" +#include "shader_recompiler/ir/subprogram.h" +#include "shader_recompiler/ir/post_order.h" + +namespace Shader::IR { + +SubProgram::SubProgram(Program* super_program, Pools& pools) : super_program(super_program), pools(pools) {} + +Block* SubProgram::AddBlock(Block* orig_block) { + auto it = orig_block_to_block.find(orig_block); + if (it != orig_block_to_block.end()) { + return it->second; + } + auto block = pools.block_pool.Create(pools.inst_pool); + orig_block_to_block[orig_block] = block; + return block; +} + +Inst* SubProgram::AddInst(Inst* orig_inst) { + auto it = orig_inst_to_inst.find(orig_inst); + if (it != orig_inst_to_inst.end()) { + return it->second; + } + Block* block = AddBlock(orig_inst->GetParent()); + Inst inst(orig_inst->GetOpcode(), orig_inst->Flags()); + if (orig_inst->GetOpcode() == Opcode::Phi) { + AddPhi(orig_inst, &inst); + } else { + for (size_t i = 0; i < orig_inst->NumArgs(); ++i) { + SetArg(&inst, i, orig_inst->Arg(i)); + } + } + auto insertion_point = block->end(); + if (block->back().GetOpcode() == Opcode::ConditionRef) { + --insertion_point; + } + return &(*block->PrependNewInst(insertion_point, inst)); +} + +Block* SubProgram::GetBlock(Block* orig_block) { + auto it = orig_block_to_block.find(orig_block); + if (it != orig_block_to_block.end()) { + return it->second; + } + return nullptr; +} + +Inst* SubProgram::GetInst(Inst* orig_inst) { + auto it = orig_inst_to_inst.find(orig_inst); + if (it != orig_inst_to_inst.end()) { + return it->second; + } + return nullptr; +} + +Program SubProgram::GetSubProgram() { + Program sub_program(super_program->info); + BuildBlockListAndASL(sub_program); + sub_program.post_order_blocks = PostOrder(sub_program.syntax_list.front()); + AddConditionalTreeFromASL(sub_program.syntax_list); + for (Block* block : sub_program.blocks) { + block->SsaSeal(); + } + return sub_program; +} + +void SubProgram::AddPhi(Inst* orig_phi, Inst* phi) { + // Current IR only has Phis with 2 arguments. + ASSERT(orig_phi->NumArgs() == 2); + Block* orig_block0 = orig_phi->PhiBlock(0); + Block* orig_block1 = orig_phi->PhiBlock(1); + Block* block0 = AddBlock(orig_block0); + Block* block1 = AddBlock(orig_block1); + const Value& arg0 = orig_phi->Arg(0); + const Value& arg1 = orig_phi->Arg(1); + AddPhiOperand(phi, block0, arg0); + AddPhiOperand(phi, block1, arg1); + const auto get_conds = [orig_block0, orig_block1]() -> std::pair { + const Block::ConditionalData& cond0 = orig_block0->CondData(); + const Block::ConditionalData& cond1 = orig_block1->CondData(); + if (cond0.depth > cond1.depth) { + return {cond0, cond1}; + } + return {cond1, cond0}; + }; + const auto& [start_cond, target_cond] = get_conds(); + const Block::ConditionalData* cond = &start_cond; + while (cond->depth > target_cond.depth) { + if (cond->asl_node->type == AbstractSyntaxNode::Type::If) { + AddInst(cond->asl_node->data.if_node.cond.InstRecursive()); + } else if (cond->asl_node->type == AbstractSyntaxNode::Type::Loop) { + AddInst(&cond->asl_node->data.loop.continue_block->back()); + } + if (orig_phi->GetParent()->CondData().asl_node == cond->asl_node) { + break; + } + cond = cond->parent; + } +} + +void SubProgram::SetArg(Inst* inst, size_t index, const Value& arg) { + if (arg.IsImmediate()) { + inst->SetArg(index, arg); + } else { + inst->SetArg(index, Value(AddInst(arg.InstRecursive()))); + } +} + +void SubProgram::AddPhiOperand(Inst* phi, Block* block, const Value& arg) { + if (arg.IsImmediate()) { + phi->AddPhiOperand(block, arg); + } else { + phi->AddPhiOperand(block, Value(AddInst(arg.InstRecursive()))); + } +} + +void SubProgram::BuildBlockListAndASL(Program& sub_program) { + boost::container::flat_set filter_blocks; + for (const AbstractSyntaxNode& orig_asl_node : super_program->syntax_list) { + AbstractSyntaxNode asl_node; + asl_node.type = orig_asl_node.type; + Block* orig_block = orig_asl_node.data.block; + switch (orig_asl_node.type) { + case AbstractSyntaxNode::Type::Block: { + Block* block = GetBlock(orig_block); + if (!block) { + continue; + } + if (!sub_program.syntax_list.empty()) { + Block* last_block = sub_program.blocks.back(); + if (!last_block->HasImmSuccessor(block)) { + last_block->AddBranch(block); + } + } + asl_node.data.block = block; + sub_program.blocks.push_back(block); + break; + } + case AbstractSyntaxNode::Type::If: { + Inst* cond = GetInst(orig_asl_node.data.if_node.cond.InstRecursive()); + if (!cond) { + continue; + } + Block* block = cond->GetParent(); + Block* merge_block = AddBlock(orig_asl_node.data.if_node.merge); + Block* body_block = AddBlock(orig_asl_node.data.if_node.body); + asl_node.data.if_node.cond = U1(Value(cond)); + asl_node.data.if_node.body = body_block; + asl_node.data.if_node.merge = merge_block; + block->AddBranch(merge_block); + block->AddBranch(body_block); + filter_blocks.insert(merge_block); + break; + } + case AbstractSyntaxNode::Type::EndIf: { + Block* merge_block = GetBlock(orig_asl_node.data.end_if.merge); + if (!filter_blocks.contains(merge_block)) { + continue; + } + asl_node.data.end_if.merge = merge_block; + break; + } + case AbstractSyntaxNode::Type::Loop: { + Block* continue_block = GetBlock(orig_asl_node.data.loop.continue_block); + if (!continue_block) { + continue; + } + if (continue_block->back().GetOpcode() != Opcode::ConditionRef) { + continue; + } + Block* merge_block = AddBlock(orig_asl_node.data.loop.merge); + asl_node.data.loop.body = AddBlock(orig_asl_node.data.loop.body); + asl_node.data.loop.continue_block = continue_block; + asl_node.data.loop.merge = merge_block; + filter_blocks.insert(merge_block); + break; + } + case AbstractSyntaxNode::Type::Repeat: { + Inst* cond = GetInst(orig_asl_node.data.repeat.cond.InstRecursive()); + if (!cond) { + continue; + } + Block* block = cond->GetParent(); + Block* merge_block = AddBlock(orig_asl_node.data.repeat.merge); + Block* loop_header_block = AddBlock(orig_asl_node.data.repeat.loop_header); + asl_node.data.repeat.cond = U1(Value(cond)); + asl_node.data.repeat.loop_header = loop_header_block; + asl_node.data.repeat.merge = merge_block; + block->AddBranch(loop_header_block); + block->AddBranch(merge_block); + break; + } + case AbstractSyntaxNode::Type::Break: { + Inst* cond = GetInst(orig_asl_node.data.break_node.cond.InstRecursive()); + if (!cond) { + continue; + } + Block* block = cond->GetParent(); + Block* merge_block = AddBlock(orig_asl_node.data.break_node.merge); + Block* skip_block = AddBlock(orig_asl_node.data.break_node.skip); + asl_node.data.break_node.cond = U1(Value(&block->back())); + asl_node.data.break_node.merge = merge_block; + asl_node.data.break_node.skip = skip_block; + block->AddBranch(merge_block); + block->AddBranch(skip_block); + break; + } + case AbstractSyntaxNode::Type::Unreachable: + continue; + default: + break; + } + sub_program.syntax_list.push_back(asl_node); + } + for (Block* block : sub_program.blocks) { + block->has_multiple_predecessors = block->ImmPredecessors().size() > 1; + } +} + +} // namespace Shader::IR \ No newline at end of file diff --git a/src/shader_recompiler/ir/subprogram.h b/src/shader_recompiler/ir/subprogram.h new file mode 100644 index 000000000..07970cbee --- /dev/null +++ b/src/shader_recompiler/ir/subprogram.h @@ -0,0 +1,37 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "shader_recompiler/pools.h" +#include "shader_recompiler/ir/basic_block.h" +#include "shader_recompiler/ir/program.h" + +namespace Shader::IR { + +struct SubProgram { + SubProgram(Program* super_program, Pools& pools); + + Block* AddBlock(Block* orig_block); + Inst* AddInst(Inst* orig_inst); + + Block* GetBlock(Block* orig_block); + Inst* GetInst(Inst* orig_inst); + + Program GetSubProgram(); +private: + void AddPhi(Inst* orig_phi, Inst* phi); + + void SetArg(Inst* inst, size_t index, const Value& arg); + void AddPhiOperand(Inst* phi, Block* block, const Value& arg); + + void BuildBlockListAndASL(Program& sub_program); + + Program* super_program; + Pools& pools; + boost::container::flat_map orig_block_to_block; + boost::container::flat_map orig_inst_to_inst; +}; + +} // namespace Shader::IR diff --git a/src/shader_recompiler/pools.h b/src/shader_recompiler/pools.h new file mode 100644 index 000000000..e9d10e6f0 --- /dev/null +++ b/src/shader_recompiler/pools.h @@ -0,0 +1,26 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/object_pool.h" +#include "shader_recompiler/ir/basic_block.h" + +namespace Shader { + +struct Pools { + static constexpr u32 InstPoolSize = 8192; + static constexpr u32 BlockPoolSize = 32; + + Common::ObjectPool inst_pool; + Common::ObjectPool block_pool; + + explicit Pools() : inst_pool{InstPoolSize}, block_pool{BlockPoolSize} {} + + void ReleaseContents() { + inst_pool.ReleaseContents(); + block_pool.ReleaseContents(); + } +}; + +} // namespace Shader diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h index 8180c29b3..b863d0457 100644 --- a/src/shader_recompiler/recompiler.h +++ b/src/shader_recompiler/recompiler.h @@ -3,7 +3,7 @@ #pragma once -#include "common/object_pool.h" +#include "shader_recompiler/pools.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/program.h" @@ -12,21 +12,6 @@ namespace Shader { struct Profile; struct RuntimeInfo; -struct Pools { - static constexpr u32 InstPoolSize = 8192; - static constexpr u32 BlockPoolSize = 32; - - Common::ObjectPool inst_pool; - Common::ObjectPool block_pool; - - explicit Pools() : inst_pool{InstPoolSize}, block_pool{BlockPoolSize} {} - - void ReleaseContents() { - inst_pool.ReleaseContents(); - block_pool.ReleaseContents(); - } -}; - [[nodiscard]] IR::Program TranslateProgram(std::span code, Pools& pools, Info& info, RuntimeInfo& runtime_info, const Profile& profile); From 068573e9d66ab9f90a0a3847fd1c845c783a11ce Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Mon, 3 Mar 2025 00:04:19 +0100 Subject: [PATCH 12/49] clang-format --- .../ir/abstract_syntax_list.cpp | 24 +++++++++++++++---- .../ir/abstract_syntax_list.h | 6 +++-- src/shader_recompiler/ir/conditional_tree.cpp | 18 +++++++------- src/shader_recompiler/ir/program.cpp | 3 ++- src/shader_recompiler/ir/subprogram.cpp | 9 ++++--- src/shader_recompiler/ir/subprogram.h | 3 ++- src/shader_recompiler/recompiler.cpp | 4 ++-- src/shader_recompiler/recompiler.h | 2 +- 8 files changed, 46 insertions(+), 23 deletions(-) diff --git a/src/shader_recompiler/ir/abstract_syntax_list.cpp b/src/shader_recompiler/ir/abstract_syntax_list.cpp index 42d2292a9..6d8e37f19 100644 --- a/src/shader_recompiler/ir/abstract_syntax_list.cpp +++ b/src/shader_recompiler/ir/abstract_syntax_list.cpp @@ -5,20 +5,34 @@ namespace Shader::IR { -std::string DumpASLNode(const AbstractSyntaxNode& node, const std::map& block_to_index, const std::map& inst_to_index) { +std::string DumpASLNode(const AbstractSyntaxNode& node, + const std::map& block_to_index, + const std::map& inst_to_index) { switch (node.type) { case AbstractSyntaxNode::Type::Block: return fmt::format("Block: ${}", block_to_index.at(node.data.block)); case AbstractSyntaxNode::Type::If: - return fmt::format("If: cond = %{}, body = ${}, merge = ${}", inst_to_index.at(node.data.if_node.cond.Inst()), block_to_index.at(node.data.if_node.body), block_to_index.at(node.data.if_node.merge)); + return fmt::format("If: cond = %{}, body = ${}, merge = ${}", + inst_to_index.at(node.data.if_node.cond.Inst()), + block_to_index.at(node.data.if_node.body), + block_to_index.at(node.data.if_node.merge)); case AbstractSyntaxNode::Type::EndIf: return fmt::format("EndIf: merge = ${}", block_to_index.at(node.data.end_if.merge)); case AbstractSyntaxNode::Type::Loop: - return fmt::format("Loop: body = ${}, continue = ${}, merge = ${}", block_to_index.at(node.data.loop.body), block_to_index.at(node.data.loop.continue_block), block_to_index.at(node.data.loop.merge)); + return fmt::format("Loop: body = ${}, continue = ${}, merge = ${}", + block_to_index.at(node.data.loop.body), + block_to_index.at(node.data.loop.continue_block), + block_to_index.at(node.data.loop.merge)); case AbstractSyntaxNode::Type::Repeat: - return fmt::format("Repeat: cond = %{}, header = ${}, merge = ${}", inst_to_index.at(node.data.repeat.cond.Inst()), block_to_index.at(node.data.repeat.loop_header), block_to_index.at(node.data.repeat.merge)); + return fmt::format("Repeat: cond = %{}, header = ${}, merge = ${}", + inst_to_index.at(node.data.repeat.cond.Inst()), + block_to_index.at(node.data.repeat.loop_header), + block_to_index.at(node.data.repeat.merge)); case AbstractSyntaxNode::Type::Break: - return fmt::format("Break: cond = %{}, merge = ${}, skip = ${}", inst_to_index.at(node.data.break_node.cond.Inst()), block_to_index.at(node.data.break_node.merge), block_to_index.at(node.data.break_node.skip)); + return fmt::format("Break: cond = %{}, merge = ${}, skip = ${}", + inst_to_index.at(node.data.break_node.cond.Inst()), + block_to_index.at(node.data.break_node.merge), + block_to_index.at(node.data.break_node.skip)); case AbstractSyntaxNode::Type::Return: return "Return"; case AbstractSyntaxNode::Type::Unreachable: diff --git a/src/shader_recompiler/ir/abstract_syntax_list.h b/src/shader_recompiler/ir/abstract_syntax_list.h index b2a4f7b2a..a620baccb 100644 --- a/src/shader_recompiler/ir/abstract_syntax_list.h +++ b/src/shader_recompiler/ir/abstract_syntax_list.h @@ -3,8 +3,8 @@ #pragma once -#include #include +#include #include "shader_recompiler/ir/value.h" namespace Shader::IR { @@ -54,6 +54,8 @@ struct AbstractSyntaxNode { }; using AbstractSyntaxList = std::vector; -std::string DumpASLNode(const AbstractSyntaxNode& node, const std::map& block_to_index, const std::map& inst_to_index); +std::string DumpASLNode(const AbstractSyntaxNode& node, + const std::map& block_to_index, + const std::map& inst_to_index); } // namespace Shader::IR diff --git a/src/shader_recompiler/ir/conditional_tree.cpp b/src/shader_recompiler/ir/conditional_tree.cpp index 5e064c43d..355e1aba2 100644 --- a/src/shader_recompiler/ir/conditional_tree.cpp +++ b/src/shader_recompiler/ir/conditional_tree.cpp @@ -1,8 +1,8 @@ // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include "shader_recompiler/ir/conditional_tree.h" #include "shader_recompiler/ir/basic_block.h" +#include "shader_recompiler/ir/conditional_tree.h" #include @@ -12,13 +12,14 @@ namespace Shader::IR { -static void AddConditionalTree(std::span asl_span, Block::ConditionalData* parent) { - const auto get_span = [&asl_span](AbstractSyntaxNode& node, Block* merge_block) -> std::span { +static void AddConditionalTree(std::span asl_span, + Block::ConditionalData* parent) { + const auto get_span = [&asl_span](AbstractSyntaxNode& node, + Block* merge_block) -> std::span { auto it = std::find_if(asl_span.begin(), asl_span.end(), - [&node, &merge_block](const AbstractSyntaxNode& n) { - return n.data.block == merge_block; - } - ); + [&node, &merge_block](const AbstractSyntaxNode& n) { + return n.data.block == merge_block; + }); ASSERT(it != asl_span.end()); std::ptrdiff_t merge_index = std::distance(asl_span.begin(), it); return std::span(&node + 1, asl_span.data() + merge_index); @@ -26,7 +27,8 @@ static void AddConditionalTree(std::span asl_span, Block::Co const Block::ConditionalData* copied_parent = nullptr; for (auto it = asl_span.begin(); it < asl_span.end(); ++it) { AbstractSyntaxNode& node = *it; - if (node.type == AbstractSyntaxNode::Type::If || node.type == AbstractSyntaxNode::Type::Loop) { + if (node.type == AbstractSyntaxNode::Type::If || + node.type == AbstractSyntaxNode::Type::Loop) { ASSERT(copied_parent); Block* merge_block; switch (node.type) { diff --git a/src/shader_recompiler/ir/program.cpp b/src/shader_recompiler/ir/program.cpp index 3a41c579d..4071c9ac9 100644 --- a/src/shader_recompiler/ir/program.cpp +++ b/src/shader_recompiler/ir/program.cpp @@ -44,7 +44,8 @@ void DumpProgram(const Program& program, const Info& info) { } const auto asl_filename = fmt::format("{}_{:#018x}.asl.txt", info.stage, info.pgm_hash); - const auto asl_file = IOFile{dump_dir / asl_filename, FileAccessMode::Write, FileType::TextFile}; + const auto asl_file = + IOFile{dump_dir / asl_filename, FileAccessMode::Write, FileType::TextFile}; for (const auto& node : program.syntax_list) { std::string s = IR::DumpASLNode(node, block_to_index, inst_to_index) + '\n'; diff --git a/src/shader_recompiler/ir/subprogram.cpp b/src/shader_recompiler/ir/subprogram.cpp index 5d55c1485..ea8223e0e 100644 --- a/src/shader_recompiler/ir/subprogram.cpp +++ b/src/shader_recompiler/ir/subprogram.cpp @@ -4,12 +4,13 @@ #include #include #include "shader_recompiler/ir/conditional_tree.h" -#include "shader_recompiler/ir/subprogram.h" #include "shader_recompiler/ir/post_order.h" +#include "shader_recompiler/ir/subprogram.h" namespace Shader::IR { -SubProgram::SubProgram(Program* super_program, Pools& pools) : super_program(super_program), pools(pools) {} +SubProgram::SubProgram(Program* super_program, Pools& pools) + : super_program(super_program), pools(pools) {} Block* SubProgram::AddBlock(Block* orig_block) { auto it = orig_block_to_block.find(orig_block); @@ -80,7 +81,9 @@ void SubProgram::AddPhi(Inst* orig_phi, Inst* phi) { const Value& arg1 = orig_phi->Arg(1); AddPhiOperand(phi, block0, arg0); AddPhiOperand(phi, block1, arg1); - const auto get_conds = [orig_block0, orig_block1]() -> std::pair { + const auto get_conds = + [orig_block0, + orig_block1]() -> std::pair { const Block::ConditionalData& cond0 = orig_block0->CondData(); const Block::ConditionalData& cond1 = orig_block1->CondData(); if (cond0.depth > cond1.depth) { diff --git a/src/shader_recompiler/ir/subprogram.h b/src/shader_recompiler/ir/subprogram.h index 07970cbee..50f833fc9 100644 --- a/src/shader_recompiler/ir/subprogram.h +++ b/src/shader_recompiler/ir/subprogram.h @@ -4,9 +4,9 @@ #pragma once #include -#include "shader_recompiler/pools.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/program.h" +#include "shader_recompiler/pools.h" namespace Shader::IR { @@ -20,6 +20,7 @@ struct SubProgram { Inst* GetInst(Inst* orig_inst); Program GetSubProgram(); + private: void AddPhi(Inst* orig_phi, Inst* phi); diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index 765d18e05..b02ec706c 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -2,9 +2,9 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "shader_recompiler/frontend/control_flow_graph.h" -#include "shader_recompiler/ir/conditional_tree.h" #include "shader_recompiler/frontend/decode.h" #include "shader_recompiler/frontend/structured_control_flow.h" +#include "shader_recompiler/ir/conditional_tree.h" #include "shader_recompiler/ir/passes/ir_passes.h" #include "shader_recompiler/ir/post_order.h" #include "shader_recompiler/recompiler.h" @@ -59,7 +59,7 @@ IR::Program TranslateProgram(std::span code, Pools& pools, Info& info program.info, runtime_info, profile); program.blocks = GenerateBlocks(program.syntax_list); program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front()); - + Shader::IR::AddConditionalTreeFromASL(program.syntax_list); // Run optimization passes diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h index b863d0457..fc42b29d1 100644 --- a/src/shader_recompiler/recompiler.h +++ b/src/shader_recompiler/recompiler.h @@ -3,9 +3,9 @@ #pragma once -#include "shader_recompiler/pools.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/program.h" +#include "shader_recompiler/pools.h" namespace Shader { From c1a577b5987e0424a02edc7919ba4f7eb73bc89a Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Tue, 4 Mar 2025 19:04:43 +0100 Subject: [PATCH 13/49] Fix subprogram generation --- src/shader_recompiler/ir/subprogram.cpp | 11 +++++++++-- src/shader_recompiler/ir/subprogram.h | 1 + 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/ir/subprogram.cpp b/src/shader_recompiler/ir/subprogram.cpp index ea8223e0e..a247c8c01 100644 --- a/src/shader_recompiler/ir/subprogram.cpp +++ b/src/shader_recompiler/ir/subprogram.cpp @@ -7,6 +7,11 @@ #include "shader_recompiler/ir/post_order.h" #include "shader_recompiler/ir/subprogram.h" +// Given an IR program, this class is used to create a subprogram that contains +// only the blocks and instructions that relevant to a group of given instructions. +// Taking into account only the given instructions, the instructions that it uses and +// conditions. + namespace Shader::IR { SubProgram::SubProgram(Program* super_program, Pools& pools) @@ -60,6 +65,8 @@ Inst* SubProgram::GetInst(Inst* orig_inst) { } Program SubProgram::GetSubProgram() { + ASSERT_MSG(!completed, "SubProgram already completed"); + completed = true; Program sub_program(super_program->info); BuildBlockListAndASL(sub_program); sub_program.post_order_blocks = PostOrder(sub_program.syntax_list.front()); @@ -155,8 +162,8 @@ void SubProgram::BuildBlockListAndASL(Program& sub_program) { asl_node.data.if_node.cond = U1(Value(cond)); asl_node.data.if_node.body = body_block; asl_node.data.if_node.merge = merge_block; - block->AddBranch(merge_block); block->AddBranch(body_block); + block->AddBranch(merge_block); filter_blocks.insert(merge_block); break; } @@ -209,8 +216,8 @@ void SubProgram::BuildBlockListAndASL(Program& sub_program) { asl_node.data.break_node.cond = U1(Value(&block->back())); asl_node.data.break_node.merge = merge_block; asl_node.data.break_node.skip = skip_block; - block->AddBranch(merge_block); block->AddBranch(skip_block); + block->AddBranch(merge_block); break; } case AbstractSyntaxNode::Type::Unreachable: diff --git a/src/shader_recompiler/ir/subprogram.h b/src/shader_recompiler/ir/subprogram.h index 50f833fc9..c9cd4ff1f 100644 --- a/src/shader_recompiler/ir/subprogram.h +++ b/src/shader_recompiler/ir/subprogram.h @@ -29,6 +29,7 @@ private: void BuildBlockListAndASL(Program& sub_program); + bool completed = false; Program* super_program; Pools& pools; boost::container::flat_map orig_block_to_block; From eebd557efc9fba5411ae54d2613a1c64a27ca93f Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Thu, 6 Mar 2025 01:02:39 +0100 Subject: [PATCH 14/49] ImmValue --- CMakeLists.txt | 2 + .../ir/compute_value/imm_value.cpp | 1106 +++++++++++++++++ .../ir/compute_value/imm_value.h | 345 +++++ 3 files changed, 1453 insertions(+) create mode 100644 src/shader_recompiler/ir/compute_value/imm_value.cpp create mode 100644 src/shader_recompiler/ir/compute_value/imm_value.h diff --git a/CMakeLists.txt b/CMakeLists.txt index e1b8c2920..041642840 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -843,6 +843,8 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp src/shader_recompiler/ir/passes/shared_memory_to_storage_pass.cpp src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp + src/shader_recompiler/ir/compute_value/imm_value.cpp + src/shader_recompiler/ir/compute_value/imm_value.h src/shader_recompiler/ir/abstract_syntax_list.cpp src/shader_recompiler/ir/abstract_syntax_list.h src/shader_recompiler/ir/attribute.cpp diff --git a/src/shader_recompiler/ir/compute_value/imm_value.cpp b/src/shader_recompiler/ir/compute_value/imm_value.cpp new file mode 100644 index 000000000..f222ea009 --- /dev/null +++ b/src/shader_recompiler/ir/compute_value/imm_value.cpp @@ -0,0 +1,1106 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/hash.h" +#include "shader_recompiler/ir/compute_value/imm_value.h" + +namespace Shader::IR { + +ImmValue::ImmValue(const IR::Value& value) noexcept { + IR::Value resolved = value.Resolve(); + type = resolved.Type(); + switch (type) { + case Type::U1: + imm_values[0].imm_u1 = resolved.U1(); + break; + case Type::U8: + imm_values[0].imm_u8 = resolved.U8(); + break; + case Type::U16: + imm_values[0].imm_u16 = resolved.U16(); + break; + case Type::U32: + imm_values[0].imm_u32 = resolved.U32(); + break; + case Type::F32: + imm_values[0].imm_f32 = resolved.F32(); + break; + case Type::U64: + imm_values[0].imm_u64 = resolved.U64(); + break; + case Type::F64: + imm_values[0].imm_f64 = resolved.F64(); + break; + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +ImmValue::ImmValue(bool value) noexcept : type{Type::U1}, is_signed{false} { + imm_values[0].imm_u1 = value; +} + +ImmValue::ImmValue(u8 value) noexcept : type{Type::U8}, is_signed{false} { + imm_values[0].imm_u8 = value; +} + +ImmValue::ImmValue(s8 value) noexcept : type{Type::U8}, is_signed{true} { + imm_values[0].imm_s8 = value; +} + +ImmValue::ImmValue(u16 value) noexcept : type{Type::U16}, is_signed{false} { + imm_values[0].imm_u16 = value; +} + +ImmValue::ImmValue(s16 value) noexcept : type{Type::U16}, is_signed{true} { + imm_values[0].imm_s16 = value; +} + +ImmValue::ImmValue(u32 value) noexcept : type{Type::U32}, is_signed{false} { + imm_values[0].imm_u32 = value; +} + +ImmValue::ImmValue(s32 value) noexcept : type{Type::U32}, is_signed{true} { + imm_values[0].imm_s32 = value; +} + +ImmValue::ImmValue(f32 value) noexcept : type{Type::F32}, is_signed{true} { + imm_values[0].imm_f32 = value; +} + +ImmValue::ImmValue(u64 value) noexcept : type{Type::U64}, is_signed{false} { + imm_values[0].imm_u64 = value; +} + +ImmValue::ImmValue(s64 value) noexcept : type{Type::U64}, is_signed{true} { + imm_values[0].imm_s64 = value; +} + +ImmValue::ImmValue(f64 value) noexcept : type{Type::F64}, is_signed{true} { + imm_values[0].imm_f64 = value; +} + +ImmValue::ImmValue(u32 value1, u32 value2) noexcept : type{Type::U32x2}, is_signed{false} { + imm_values[0].imm_u32 = value1; + imm_values[1].imm_u32 = value2; +} + +ImmValue::ImmValue(u32 value1, u32 value2, u32 value3) noexcept + : type{Type::U32x3}, is_signed{false} { + imm_values[0].imm_u32 = value1; + imm_values[1].imm_u32 = value2; + imm_values[2].imm_u32 = value3; +} + +ImmValue::ImmValue(u32 value1, u32 value2, u32 value3, u32 value4) noexcept + : type{Type::U32x4}, is_signed{false} { + imm_values[0].imm_u32 = value1; + imm_values[1].imm_u32 = value2; + imm_values[2].imm_u32 = value3; + imm_values[3].imm_u32 = value4; +} + +ImmValue::ImmValue(s32 value1, s32 value2) noexcept : type{Type::U32x2}, is_signed{true} { + imm_values[0].imm_s32 = value1; + imm_values[1].imm_s32 = value2; +} + +ImmValue::ImmValue(s32 value1, s32 value2, s32 value3) noexcept + : type{Type::U32x3}, is_signed{true} { + imm_values[0].imm_s32 = value1; + imm_values[1].imm_s32 = value2; + imm_values[2].imm_s32 = value3; +} + +ImmValue::ImmValue(s32 value1, s32 value2, s32 value3, s32 value4) noexcept + : type{Type::U32x4}, is_signed{true} { + imm_values[0].imm_s32 = value1; + imm_values[1].imm_s32 = value2; + imm_values[2].imm_s32 = value3; + imm_values[3].imm_s32 = value4; +} + +ImmValue::ImmValue(f32 value1, f32 value2) noexcept : type{Type::F32x2}, is_signed{true} { + imm_values[0].imm_f32 = value1; + imm_values[1].imm_f32 = value2; +} + +ImmValue::ImmValue(f32 value1, f32 value2, f32 value3) noexcept + : type{Type::F32x3}, is_signed{true} { + imm_values[0].imm_f32 = value1; + imm_values[1].imm_f32 = value2; + imm_values[2].imm_f32 = value3; +} + +ImmValue::ImmValue(f32 value1, f32 value2, f32 value3, f32 value4) noexcept + : type{Type::F32x4}, is_signed{true} { + imm_values[0].imm_f32 = value1; + imm_values[1].imm_f32 = value2; + imm_values[2].imm_f32 = value3; + imm_values[3].imm_f32 = value4; +} + +ImmValue::ImmValue(f64 value1, f64 value2) noexcept : type{Type::F64x2}, is_signed{true} { + imm_values[0].imm_f64 = value1; + imm_values[1].imm_f64 = value2; +} + +ImmValue::ImmValue(f64 value1, f64 value2, f64 value3) noexcept + : type{Type::F64x3}, is_signed{true} { + imm_values[0].imm_f64 = value1; + imm_values[1].imm_f64 = value2; + imm_values[2].imm_f64 = value3; +} + +ImmValue::ImmValue(f64 value1, f64 value2, f64 value3, f64 value4) noexcept + : type{Type::F64x4}, is_signed{true} { + imm_values[0].imm_f64 = value1; + imm_values[1].imm_f64 = value2; + imm_values[2].imm_f64 = value3; + imm_values[3].imm_f64 = value4; +} + +IR::Type ImmValue::BaseType() const noexcept { + switch (type) { + case Type::U1: + return Type::U1; + case Type::U8: + return Type::U8; + case Type::U16: + return Type::U16; + case Type::U32: + case Type::U32x2: + case Type::U32x3: + case Type::U32x4: + return Type::U32; + case Type::U64: + return Type::U64; + case Type::F32: + case Type::F32x2: + case Type::F32x3: + case Type::F32x4: + return Type::F32; + case Type::F64: + case Type::F64x2: + case Type::F64x3: + case Type::F64x4: + return Type::F64; + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +u32 ImmValue::Dimensions() const noexcept { + switch (type) { + case Type::U1: + case Type::U8: + case Type::U16: + case Type::U32: + case Type::U64: + case Type::F32: + case Type::F64: + return 1; + case Type::U32x2: + case Type::F32x2: + case Type::F64x2: + return 2; + case Type::U32x3: + case Type::F32x3: + case Type::F64x3: + return 3; + case Type::U32x4: + case Type::F32x4: + case Type::F64x4: + return 4; + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +bool ImmValue::IsSigned() const noexcept { + return is_signed; +} + +void ImmValue::SetSigned(bool signed_) noexcept { + is_signed = signed_; +} + +void ImmValue::SameSignAs(const ImmValue& other) noexcept { + SetSigned(other.IsSigned()); +} + +bool ImmValue::operator==(const ImmValue& other) const noexcept { + if (type != other.type) { + return false; + } + switch (type) { + case Type::U1: + return imm_values[0].imm_u1 == other.imm_values[0].imm_u1; + case Type::U8: + return imm_values[0].imm_u8 == other.imm_values[0].imm_u8; + case Type::U16: + return imm_values[0].imm_u16 == other.imm_values[0].imm_u16; + case Type::U32: + case Type::F32: + return imm_values[0].imm_u32 == other.imm_values[0].imm_u32; + case Type::U64: + case Type::F64: + return imm_values[0].imm_u64 == other.imm_values[0].imm_u64; + case Type::U32x2: + case Type::F32x2: + case Type::F64x2: + return imm_values[0].imm_u32 == other.imm_values[0].imm_u32 && + imm_values[1].imm_u32 == other.imm_values[1].imm_u32; + case Type::U32x3: + case Type::F32x3: + case Type::F64x3: + return imm_values[0].imm_u32 == other.imm_values[0].imm_u32 && + imm_values[1].imm_u32 == other.imm_values[1].imm_u32 && + imm_values[2].imm_u32 == other.imm_values[2].imm_u32; + case Type::U32x4: + case Type::F32x4: + case Type::F64x4: + return imm_values[0].imm_u32 == other.imm_values[0].imm_u32 && + imm_values[1].imm_u32 == other.imm_values[1].imm_u32 && + imm_values[2].imm_u32 == other.imm_values[2].imm_u32 && + imm_values[3].imm_u32 == other.imm_values[3].imm_u32; + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +bool ImmValue::operator!=(const ImmValue& other) const noexcept { + return !operator==(other); +} + +bool ImmValue::operator<(const ImmValue& other) const noexcept { + ASSERT(type == other.type); + switch (type) { + case Type::U8: + return is_signed && other.is_signed ? imm_values[0].imm_s8 < other.imm_values[0].imm_s8 + : imm_values[0].imm_u8 < other.imm_values[0].imm_u8; + case Type::U16: + return is_signed && other.is_signed ? imm_values[0].imm_s16 < other.imm_values[0].imm_s16 + : imm_values[0].imm_u16 < other.imm_values[0].imm_u16; + case Type::U32: + return is_signed && other.is_signed ? imm_values[0].imm_s32 < other.imm_values[0].imm_s32 + : imm_values[0].imm_u32 < other.imm_values[0].imm_u32; + case Type::F32: + return imm_values[0].imm_f32 < other.imm_values[0].imm_f32; + case Type::U64: + return is_signed && other.is_signed ? imm_values[0].imm_s64 < other.imm_values[0].imm_s64 + : imm_values[0].imm_u64 < other.imm_values[0].imm_u64; + case Type::F64: + return imm_values[0].imm_f64 < other.imm_values[0].imm_f64; + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +bool ImmValue::operator>(const ImmValue& other) const noexcept { + ASSERT(type == other.type); + switch (type) { + case Type::U8: + return is_signed && other.is_signed ? imm_values[0].imm_s8 > other.imm_values[0].imm_s8 + : imm_values[0].imm_u8 > other.imm_values[0].imm_u8; + case Type::U16: + return is_signed && other.is_signed ? imm_values[0].imm_s16 > other.imm_values[0].imm_s16 + : imm_values[0].imm_u16 > other.imm_values[0].imm_u16; + case Type::U32: + return is_signed && other.is_signed ? imm_values[0].imm_s32 > other.imm_values[0].imm_s32 + : imm_values[0].imm_u32 > other.imm_values[0].imm_u32; + case Type::F32: + return imm_values[0].imm_f32 > other.imm_values[0].imm_f32; + case Type::U64: + return is_signed && other.is_signed ? imm_values[0].imm_s64 > other.imm_values[0].imm_s64 + : imm_values[0].imm_u64 > other.imm_values[0].imm_u64; + case Type::F64: + return imm_values[0].imm_f64 > other.imm_values[0].imm_f64; + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +bool ImmValue::operator<=(const ImmValue& other) const noexcept { + return !operator>(other); +} + +bool ImmValue::operator>=(const ImmValue& other) const noexcept { + return !operator<(other); +} + +ImmValue ImmValue::operator+(const ImmValue& other) const noexcept { + ASSERT(type == other.type); + switch (type) { + case Type::U8: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s8 + other.imm_values[0].imm_s8) + : ImmValue(imm_values[0].imm_u8 + other.imm_values[0].imm_u8); + case Type::U16: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s16 + other.imm_values[0].imm_s16) + : ImmValue(imm_values[0].imm_u16 + other.imm_values[0].imm_u16); + case Type::U32: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s32 + other.imm_values[0].imm_s32) + : ImmValue(imm_values[0].imm_u32 + other.imm_values[0].imm_u32); + case Type::F32: + return ImmValue(imm_values[0].imm_f32 + other.imm_values[0].imm_f32); + case Type::U32x2: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s32 + other.imm_values[0].imm_s32, + imm_values[1].imm_s32 + other.imm_values[1].imm_s32) + : ImmValue(imm_values[0].imm_u32 + other.imm_values[0].imm_u32, + imm_values[1].imm_u32 + other.imm_values[1].imm_u32); + case Type::F32x2: + return ImmValue(imm_values[0].imm_f32 + other.imm_values[0].imm_f32, + imm_values[1].imm_f32 + other.imm_values[1].imm_f32); + case Type::U32x3: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s32 + other.imm_values[0].imm_s32, + imm_values[1].imm_s32 + other.imm_values[1].imm_s32, + imm_values[2].imm_s32 + other.imm_values[2].imm_s32) + : ImmValue(imm_values[0].imm_u32 + other.imm_values[0].imm_u32, + imm_values[1].imm_u32 + other.imm_values[1].imm_u32, + imm_values[2].imm_u32 + other.imm_values[2].imm_u32); + case Type::F32x3: + return ImmValue(imm_values[0].imm_f32 + other.imm_values[0].imm_f32, + imm_values[1].imm_f32 + other.imm_values[1].imm_f32, + imm_values[2].imm_f32 + other.imm_values[2].imm_f32); + case Type::U32x4: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s32 + other.imm_values[0].imm_s32, + imm_values[1].imm_s32 + other.imm_values[1].imm_s32, + imm_values[2].imm_s32 + other.imm_values[2].imm_s32, + imm_values[3].imm_s32 + other.imm_values[3].imm_s32) + : ImmValue(imm_values[0].imm_u32 + other.imm_values[0].imm_u32, + imm_values[1].imm_u32 + other.imm_values[1].imm_u32, + imm_values[2].imm_u32 + other.imm_values[2].imm_u32, + imm_values[3].imm_u32 + other.imm_values[3].imm_u32); + case Type::F32x4: + return ImmValue(imm_values[0].imm_f32 + other.imm_values[0].imm_f32, + imm_values[1].imm_f32 + other.imm_values[1].imm_f32, + imm_values[2].imm_f32 + other.imm_values[2].imm_f32, + imm_values[3].imm_f32 + other.imm_values[3].imm_f32); + case Type::U64: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s64 + other.imm_values[0].imm_s64) + : ImmValue(imm_values[0].imm_u64 + other.imm_values[0].imm_u64); + case Type::F64: + return ImmValue(imm_values[0].imm_f64 + other.imm_values[0].imm_f64); + case Type::F64x2: + return ImmValue(imm_values[0].imm_f64 + other.imm_values[0].imm_f64, + imm_values[1].imm_f64 + other.imm_values[1].imm_f64); + case Type::F64x3: + return ImmValue(imm_values[0].imm_f64 + other.imm_values[0].imm_f64, + imm_values[1].imm_f64 + other.imm_values[1].imm_f64, + imm_values[2].imm_f64 + other.imm_values[2].imm_f64); + case Type::F64x4: + return ImmValue(imm_values[0].imm_f64 + other.imm_values[0].imm_f64, + imm_values[1].imm_f64 + other.imm_values[1].imm_f64, + imm_values[2].imm_f64 + other.imm_values[2].imm_f64, + imm_values[3].imm_f64 + other.imm_values[3].imm_f64); + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +ImmValue ImmValue::operator-(const ImmValue& other) const noexcept { + ASSERT(type == other.type); + switch (type) { + case Type::U8: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s8 - other.imm_values[0].imm_s8) + : ImmValue(imm_values[0].imm_u8 - other.imm_values[0].imm_u8); + case Type::U16: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s16 - other.imm_values[0].imm_s16) + : ImmValue(imm_values[0].imm_u16 - other.imm_values[0].imm_u16); + case Type::U32: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s32 - other.imm_values[0].imm_s32) + : ImmValue(imm_values[0].imm_u32 - other.imm_values[0].imm_u32); + case Type::F32: + return ImmValue(imm_values[0].imm_f32 - other.imm_values[0].imm_f32); + case Type::U32x2: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s32 - other.imm_values[0].imm_s32, + imm_values[1].imm_s32 - other.imm_values[1].imm_s32) + : ImmValue(imm_values[0].imm_u32 - other.imm_values[0].imm_u32, + imm_values[1].imm_u32 - other.imm_values[1].imm_u32); + case Type::F32x2: + return ImmValue(imm_values[0].imm_f32 - other.imm_values[0].imm_f32, + imm_values[1].imm_f32 - other.imm_values[1].imm_f32); + case Type::U32x3: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s32 - other.imm_values[0].imm_s32, + imm_values[1].imm_s32 - other.imm_values[1].imm_s32, + imm_values[2].imm_s32 - other.imm_values[2].imm_s32) + : ImmValue(imm_values[0].imm_u32 - other.imm_values[0].imm_u32, + imm_values[1].imm_u32 - other.imm_values[1].imm_u32, + imm_values[2].imm_u32 - other.imm_values[2].imm_u32); + case Type::F32x3: + return ImmValue(imm_values[0].imm_f32 - other.imm_values[0].imm_f32, + imm_values[1].imm_f32 - other.imm_values[1].imm_f32, + imm_values[2].imm_f32 - other.imm_values[2].imm_f32); + case Type::U32x4: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s32 - other.imm_values[0].imm_s32, + imm_values[1].imm_s32 - other.imm_values[1].imm_s32, + imm_values[2].imm_s32 - other.imm_values[2].imm_s32, + imm_values[3].imm_s32 - other.imm_values[3].imm_s32) + : ImmValue(imm_values[0].imm_u32 - other.imm_values[0].imm_u32, + imm_values[1].imm_u32 - other.imm_values[1].imm_u32, + imm_values[2].imm_u32 - other.imm_values[2].imm_u32, + imm_values[3].imm_u32 - other.imm_values[3].imm_u32); + case Type::F32x4: + return ImmValue(imm_values[0].imm_f32 - other.imm_values[0].imm_f32, + imm_values[1].imm_f32 - other.imm_values[1].imm_f32, + imm_values[2].imm_f32 - other.imm_values[2].imm_f32, + imm_values[3].imm_f32 - other.imm_values[3].imm_f32); + case Type::U64: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s64 - other.imm_values[0].imm_s64) + : ImmValue(imm_values[0].imm_u64 - other.imm_values[0].imm_u64); + case Type::F64: + return ImmValue(imm_values[0].imm_f64 - other.imm_values[0].imm_f64); + case Type::F64x2: + return ImmValue(imm_values[0].imm_f64 - other.imm_values[0].imm_f64, + imm_values[1].imm_f64 - other.imm_values[1].imm_f64); + case Type::F64x3: + return ImmValue(imm_values[0].imm_f64 - other.imm_values[0].imm_f64, + imm_values[1].imm_f64 - other.imm_values[1].imm_f64, + imm_values[2].imm_f64 - other.imm_values[2].imm_f64); + case Type::F64x4: + return ImmValue(imm_values[0].imm_f64 - other.imm_values[0].imm_f64, + imm_values[1].imm_f64 - other.imm_values[1].imm_f64, + imm_values[2].imm_f64 - other.imm_values[2].imm_f64, + imm_values[3].imm_f64 - other.imm_values[3].imm_f64); + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +ImmValue ImmValue::operator*(const ImmValue& other) const noexcept { + ASSERT(BaseType() == other.BaseType()); + const ImmValue* vector; + const ImmValue* scalar; + if (Dimensions() == 1) { + scalar = this; + vector = &other; + } else if (other.Dimensions() == 1) { + scalar = &other; + vector = this; + } else { + UNREACHABLE_MSG("Unspecified behavior for vector * vector multiplication"); + } + switch (vector->type) { + case Type::U8: + return is_signed && scalar->is_signed + ? ImmValue(scalar->imm_values[0].imm_s8 * vector->imm_values[0].imm_s8) + : ImmValue(scalar->imm_values[0].imm_u8 * vector->imm_values[0].imm_u8); + case Type::U16: + return is_signed && scalar->is_signed + ? ImmValue(scalar->imm_values[0].imm_s16 * vector->imm_values[0].imm_s16) + : ImmValue(scalar->imm_values[0].imm_u16 * vector->imm_values[0].imm_u16); + case Type::U32: + return is_signed && scalar->is_signed + ? ImmValue(scalar->imm_values[0].imm_s32 * vector->imm_values[0].imm_s32) + : ImmValue(scalar->imm_values[0].imm_u32 * vector->imm_values[0].imm_u32); + case Type::F32: + return ImmValue(scalar->imm_values[0].imm_f32 * vector->imm_values[0].imm_f32); + case Type::U32x2: + return is_signed && scalar->is_signed + ? ImmValue(scalar->imm_values[0].imm_s32 * vector->imm_values[0].imm_s32, + scalar->imm_values[0].imm_s32 * vector->imm_values[1].imm_s32) + : ImmValue(scalar->imm_values[0].imm_u32 * vector->imm_values[0].imm_u32, + scalar->imm_values[0].imm_u32 * vector->imm_values[1].imm_u32); + case Type::F32x2: + return ImmValue(scalar->imm_values[0].imm_f32 * vector->imm_values[0].imm_f32, + scalar->imm_values[0].imm_f32 * vector->imm_values[1].imm_f32); + case Type::U32x3: + return is_signed && scalar->is_signed + ? ImmValue(scalar->imm_values[0].imm_s32 * vector->imm_values[0].imm_s32, + scalar->imm_values[0].imm_s32 * vector->imm_values[1].imm_s32, + scalar->imm_values[0].imm_s32 * vector->imm_values[2].imm_s32) + : ImmValue(scalar->imm_values[0].imm_u32 * vector->imm_values[0].imm_u32, + scalar->imm_values[0].imm_u32 * vector->imm_values[1].imm_u32, + scalar->imm_values[0].imm_u32 * vector->imm_values[2].imm_u32); + case Type::F32x3: + return ImmValue(scalar->imm_values[0].imm_f32 * vector->imm_values[0].imm_f32, + scalar->imm_values[0].imm_f32 * vector->imm_values[1].imm_f32, + scalar->imm_values[0].imm_f32 * vector->imm_values[2].imm_f32); + case Type::U32x4: + return is_signed && scalar->is_signed + ? ImmValue(scalar->imm_values[0].imm_s32 * vector->imm_values[0].imm_s32, + scalar->imm_values[0].imm_s32 * vector->imm_values[1].imm_s32, + scalar->imm_values[0].imm_s32 * vector->imm_values[2].imm_s32, + scalar->imm_values[0].imm_s32 * vector->imm_values[3].imm_s32) + : ImmValue(scalar->imm_values[0].imm_u32 * vector->imm_values[0].imm_u32, + scalar->imm_values[0].imm_u32 * vector->imm_values[1].imm_u32, + scalar->imm_values[0].imm_u32 * vector->imm_values[2].imm_u32, + scalar->imm_values[0].imm_u32 * vector->imm_values[3].imm_u32); + case Type::F32x4: + return ImmValue(scalar->imm_values[0].imm_f32 * vector->imm_values[0].imm_f32, + scalar->imm_values[0].imm_f32 * vector->imm_values[1].imm_f32, + scalar->imm_values[0].imm_f32 * vector->imm_values[2].imm_f32, + scalar->imm_values[0].imm_f32 * vector->imm_values[3].imm_f32); + case Type::U64: + return is_signed && scalar->is_signed + ? ImmValue(scalar->imm_values[0].imm_s64 * vector->imm_values[0].imm_s64) + : ImmValue(scalar->imm_values[0].imm_u64 * vector->imm_values[0].imm_u64); + case Type::F64: + return ImmValue(scalar->imm_values[0].imm_f64 * vector->imm_values[0].imm_f64); + case Type::F64x2: + return ImmValue(scalar->imm_values[0].imm_f64 * vector->imm_values[0].imm_f64, + scalar->imm_values[0].imm_f64 * vector->imm_values[1].imm_f64); + case Type::F64x3: + return ImmValue(scalar->imm_values[0].imm_f64 * vector->imm_values[0].imm_f64, + scalar->imm_values[0].imm_f64 * vector->imm_values[1].imm_f64, + scalar->imm_values[0].imm_f64 * vector->imm_values[2].imm_f64); + case Type::F64x4: + return ImmValue(scalar->imm_values[0].imm_f64 * vector->imm_values[0].imm_f64, + scalar->imm_values[0].imm_f64 * vector->imm_values[1].imm_f64, + scalar->imm_values[0].imm_f64 * vector->imm_values[2].imm_f64, + scalar->imm_values[0].imm_f64 * vector->imm_values[3].imm_f64); + default: + UNREACHABLE_MSG("Invalid type {}", vector->type); + } +} + +ImmValue ImmValue::operator/(const ImmValue& other) const { + ASSERT(BaseType() == other.BaseType() && other.Dimensions() == 1); + switch (type) { + case Type::U8: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s8 / other.imm_values[0].imm_s8) + : ImmValue(imm_values[0].imm_u8 / other.imm_values[0].imm_u8); + case Type::U16: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s16 / other.imm_values[0].imm_s16) + : ImmValue(imm_values[0].imm_u16 / other.imm_values[0].imm_u16); + case Type::U32: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s32 / other.imm_values[0].imm_s32) + : ImmValue(imm_values[0].imm_u32 / other.imm_values[0].imm_u32); + case Type::F32: + return ImmValue(imm_values[0].imm_f32 / other.imm_values[0].imm_f32); + case Type::U32x2: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s32 / other.imm_values[0].imm_s32, + imm_values[1].imm_s32 / other.imm_values[0].imm_s32) + : ImmValue(imm_values[0].imm_u32 / other.imm_values[0].imm_u32, + imm_values[1].imm_u32 / other.imm_values[0].imm_u32); + case Type::F32x2: + return ImmValue(imm_values[0].imm_f32 / other.imm_values[0].imm_f32, + imm_values[1].imm_f32 / other.imm_values[0].imm_f32); + case Type::U32x3: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s32 / other.imm_values[0].imm_s32, + imm_values[1].imm_s32 / other.imm_values[0].imm_s32, + imm_values[2].imm_s32 / other.imm_values[0].imm_s32) + : ImmValue(imm_values[0].imm_u32 / other.imm_values[0].imm_u32, + imm_values[1].imm_u32 / other.imm_values[0].imm_u32, + imm_values[2].imm_u32 / other.imm_values[0].imm_u32); + case Type::F32x3: + return ImmValue(imm_values[0].imm_f32 / other.imm_values[0].imm_f32, + imm_values[1].imm_f32 / other.imm_values[0].imm_f32, + imm_values[2].imm_f32 / other.imm_values[0].imm_f32); + case Type::U32x4: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s32 / other.imm_values[0].imm_s32, + imm_values[1].imm_s32 / other.imm_values[0].imm_s32, + imm_values[2].imm_s32 / other.imm_values[0].imm_s32, + imm_values[3].imm_s32 / other.imm_values[0].imm_s32) + : ImmValue(imm_values[0].imm_u32 / other.imm_values[0].imm_u32, + imm_values[1].imm_u32 / other.imm_values[0].imm_u32, + imm_values[2].imm_u32 / other.imm_values[0].imm_u32, + imm_values[3].imm_u32 / other.imm_values[0].imm_u32); + case Type::F32x4: + return ImmValue(imm_values[0].imm_f32 / other.imm_values[0].imm_f32, + imm_values[1].imm_f32 / other.imm_values[0].imm_f32, + imm_values[2].imm_f32 / other.imm_values[0].imm_f32, + imm_values[3].imm_f32 / other.imm_values[0].imm_f32); + case Type::U64: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s64 / other.imm_values[0].imm_s64) + : ImmValue(imm_values[0].imm_u64 / other.imm_values[0].imm_u64); + case Type::F64: + return ImmValue(imm_values[0].imm_f64 / other.imm_values[0].imm_f64); + case Type::F64x2: + return ImmValue(imm_values[0].imm_f64 / other.imm_values[0].imm_f64, + imm_values[1].imm_f64 / other.imm_values[0].imm_f64); + case Type::F64x3: + return ImmValue(imm_values[0].imm_f64 / other.imm_values[0].imm_f64, + imm_values[1].imm_f64 / other.imm_values[0].imm_f64, + imm_values[2].imm_f64 / other.imm_values[0].imm_f64); + case Type::F64x4: + return ImmValue(imm_values[0].imm_f64 / other.imm_values[0].imm_f64, + imm_values[1].imm_f64 / other.imm_values[0].imm_f64, + imm_values[2].imm_f64 / other.imm_values[0].imm_f64, + imm_values[3].imm_f64 / other.imm_values[0].imm_f64); + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +ImmValue ImmValue::operator%(const ImmValue& other) const noexcept { + ASSERT(type == other.type); + switch (type) { + case Type::U8: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s8 % other.imm_values[0].imm_s8) + : ImmValue(imm_values[0].imm_u8 % other.imm_values[0].imm_u8); + case Type::U16: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s16 % other.imm_values[0].imm_s16) + : ImmValue(imm_values[0].imm_u16 % other.imm_values[0].imm_u16); + case Type::U32: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s32 % other.imm_values[0].imm_s32) + : ImmValue(imm_values[0].imm_u32 % other.imm_values[0].imm_u32); + case Type::U64: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s64 % other.imm_values[0].imm_s64) + : ImmValue(imm_values[0].imm_u64 % other.imm_values[0].imm_u64); + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +ImmValue ImmValue::operator&(const ImmValue& other) const noexcept { + ASSERT(type == other.type); + switch (type) { + case Type::U1: + return ImmValue(imm_values[0].imm_u1 & other.imm_values[0].imm_u1); + case Type::U8: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s8 & other.imm_values[0].imm_s8) + : ImmValue(imm_values[0].imm_u8 & other.imm_values[0].imm_u8); + case Type::U16: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s16 & other.imm_values[0].imm_s16) + : ImmValue(imm_values[0].imm_u16 & other.imm_values[0].imm_u16); + case Type::U32: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s32 & other.imm_values[0].imm_s32) + : ImmValue(imm_values[0].imm_u32 & other.imm_values[0].imm_u32); + case Type::U64: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s64 & other.imm_values[0].imm_s64) + : ImmValue(imm_values[0].imm_u64 & other.imm_values[0].imm_u64); + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +ImmValue ImmValue::operator|(const ImmValue& other) const noexcept { + ASSERT(type == other.type); + switch (type) { + case Type::U1: + return ImmValue(imm_values[0].imm_u1 | other.imm_values[0].imm_u1); + case Type::U8: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s8 | other.imm_values[0].imm_s8) + : ImmValue(imm_values[0].imm_u8 | other.imm_values[0].imm_u8); + case Type::U16: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s16 | other.imm_values[0].imm_s16) + : ImmValue(imm_values[0].imm_u16 | other.imm_values[0].imm_u16); + case Type::U32: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s32 | other.imm_values[0].imm_s32) + : ImmValue(imm_values[0].imm_u32 | other.imm_values[0].imm_u32); + case Type::U64: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s64 | other.imm_values[0].imm_s64) + : ImmValue(imm_values[0].imm_u64 | other.imm_values[0].imm_u64); + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +ImmValue ImmValue::operator^(const ImmValue& other) const noexcept { + ASSERT(type == other.type); + switch (type) { + case Type::U1: + return ImmValue(imm_values[0].imm_u1 ^ other.imm_values[0].imm_u1); + case Type::U8: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s8 ^ other.imm_values[0].imm_s8) + : ImmValue(imm_values[0].imm_u8 ^ other.imm_values[0].imm_u8); + case Type::U16: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s16 ^ other.imm_values[0].imm_s16) + : ImmValue(imm_values[0].imm_u16 ^ other.imm_values[0].imm_u16); + case Type::U32: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s32 ^ other.imm_values[0].imm_s32) + : ImmValue(imm_values[0].imm_u32 ^ other.imm_values[0].imm_u32); + case Type::U64: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s64 ^ other.imm_values[0].imm_s64) + : ImmValue(imm_values[0].imm_u64 ^ other.imm_values[0].imm_u64); + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +ImmValue ImmValue::operator<<(const ImmValue& other) const noexcept { + ASSERT(type == other.type); + switch (type) { + case Type::U1: + return ImmValue(imm_values[0].imm_u1 << other.imm_values[0].imm_u1); + case Type::U8: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s8 << other.imm_values[0].imm_s8) + : ImmValue(imm_values[0].imm_u8 << other.imm_values[0].imm_u8); + case Type::U16: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s16 << other.imm_values[0].imm_s16) + : ImmValue(imm_values[0].imm_u16 << other.imm_values[0].imm_u16); + case Type::U32: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s32 << other.imm_values[0].imm_s32) + : ImmValue(imm_values[0].imm_u32 << other.imm_values[0].imm_u32); + case Type::U64: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s64 << other.imm_values[0].imm_s64) + : ImmValue(imm_values[0].imm_u64 << other.imm_values[0].imm_u64); + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +ImmValue ImmValue::operator>>(const ImmValue& other) const noexcept { + ASSERT(type == other.type); + switch (type) { + case Type::U1: + return ImmValue(imm_values[0].imm_u1 >> other.imm_values[0].imm_u1); + case Type::U8: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s8 >> other.imm_values[0].imm_s8) + : ImmValue(imm_values[0].imm_u8 >> other.imm_values[0].imm_u8); + case Type::U16: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s16 >> other.imm_values[0].imm_s16) + : ImmValue(imm_values[0].imm_u16 >> other.imm_values[0].imm_u16); + case Type::U32: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s32 >> other.imm_values[0].imm_s32) + : ImmValue(imm_values[0].imm_u32 >> other.imm_values[0].imm_u32); + case Type::U64: + return is_signed && other.is_signed + ? ImmValue(imm_values[0].imm_s64 >> other.imm_values[0].imm_s64) + : ImmValue(imm_values[0].imm_u64 >> other.imm_values[0].imm_u64); + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +ImmValue ImmValue::operator~() const noexcept { + switch (type) { + case Type::U1: + return ImmValue(~imm_values[0].imm_u1); + case Type::U8: + return is_signed ? ImmValue(imm_values[0].imm_s8) : ImmValue(imm_values[0].imm_u8); + case Type::U16: + return is_signed ? ImmValue(imm_values[0].imm_s16) : ImmValue(imm_values[0].imm_u16); + case Type::U32: + return is_signed ? ImmValue(imm_values[0].imm_s32) : ImmValue(imm_values[0].imm_u32); + case Type::U64: + return is_signed ? ImmValue(imm_values[0].imm_s64) : ImmValue(imm_values[0].imm_u64); + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +ImmValue ImmValue::operator++(int) noexcept { + switch (type) { + case Type::U8: + return is_signed ? ImmValue(imm_values[0].imm_s8++) : ImmValue(imm_values[0].imm_u8++); + case Type::U16: + return is_signed ? ImmValue(imm_values[0].imm_s16++) : ImmValue(imm_values[0].imm_u16++); + case Type::U32: + return is_signed ? ImmValue(imm_values[0].imm_s32++) : ImmValue(imm_values[0].imm_u32++); + case Type::U64: + return is_signed ? ImmValue(imm_values[0].imm_s64++) : ImmValue(imm_values[0].imm_u64++); + case Type::F32: + return ImmValue(imm_values[0].imm_f32++); + case Type::F64: + return ImmValue(imm_values[0].imm_f64++); + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +ImmValue ImmValue::operator--(int) noexcept { + switch (type) { + case Type::U8: + return is_signed ? ImmValue(imm_values[0].imm_s8--) : ImmValue(imm_values[0].imm_u8--); + case Type::U16: + return is_signed ? ImmValue(imm_values[0].imm_s16--) : ImmValue(imm_values[0].imm_u16--); + case Type::U32: + return is_signed ? ImmValue(imm_values[0].imm_s32--) : ImmValue(imm_values[0].imm_u32--); + case Type::U64: + return is_signed ? ImmValue(imm_values[0].imm_s64--) : ImmValue(imm_values[0].imm_u64--); + case Type::F32: + return ImmValue(imm_values[0].imm_f32--); + case Type::F64: + return ImmValue(imm_values[0].imm_f64--); + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +ImmValue& ImmValue::operator++() noexcept { + switch (type) { + case Type::U8: + if (is_signed) { + imm_values[0].imm_s8++; + } else { + imm_values[0].imm_u8++; + } + break; + case Type::U16: + if (is_signed) { + imm_values[0].imm_s16++; + } else { + imm_values[0].imm_u16++; + } + break; + case Type::U32: + if (is_signed) { + imm_values[0].imm_s32++; + } else { + imm_values[0].imm_u32++; + } + break; + case Type::U64: + if (is_signed) { + imm_values[0].imm_s64++; + } else { + imm_values[0].imm_u64++; + } + break; + case Type::F32: + imm_values[0].imm_f32++; + break; + case Type::F64: + imm_values[0].imm_f64++; + break; + default: + UNREACHABLE_MSG("Invalid type {}", type); + } + return *this; +} + +ImmValue& ImmValue::operator--() noexcept { + switch (type) { + case Type::U8: + if (is_signed) { + imm_values[0].imm_s8--; + } else { + imm_values[0].imm_u8--; + } + break; + case Type::U16: + if (is_signed) { + imm_values[0].imm_s16--; + } else { + imm_values[0].imm_u16--; + } + break; + case Type::U32: + if (is_signed) { + imm_values[0].imm_s32--; + } else { + imm_values[0].imm_u32--; + } + break; + case Type::U64: + if (is_signed) { + imm_values[0].imm_s64--; + } else { + imm_values[0].imm_u64--; + } + break; + case Type::F32: + imm_values[0].imm_f32--; + break; + case Type::F64: + imm_values[0].imm_f64--; + break; + default: + UNREACHABLE_MSG("Invalid type {}", type); + } + return *this; +} + +ImmValue ImmValue::operator-() const noexcept { + switch (type) { + case Type::U8: + return is_signed ? ImmValue(-imm_values[0].imm_s8) : ImmValue(-imm_values[0].imm_u8); + case Type::U16: + return is_signed ? ImmValue(-imm_values[0].imm_s16) : ImmValue(-imm_values[0].imm_u16); + case Type::U32: + return is_signed ? ImmValue(-imm_values[0].imm_s32) : ImmValue(-imm_values[0].imm_u32); + case Type::U32x2: + return is_signed ? ImmValue(-imm_values[0].imm_s32, -imm_values[1].imm_s32) + : ImmValue(-imm_values[0].imm_u32, -imm_values[1].imm_u32); + case Type::U32x3: + return is_signed ? ImmValue(-imm_values[0].imm_s32, -imm_values[1].imm_s32, + -imm_values[2].imm_s32) + : ImmValue(-imm_values[0].imm_u32, -imm_values[1].imm_u32, + -imm_values[2].imm_u32); + case Type::U32x4: + return is_signed ? ImmValue(-imm_values[0].imm_s32, -imm_values[1].imm_s32, + -imm_values[2].imm_s32, -imm_values[3].imm_s32) + : ImmValue(-imm_values[0].imm_u32, -imm_values[1].imm_u32, + -imm_values[2].imm_u32, -imm_values[3].imm_u32); + case Type::U64: + return is_signed ? ImmValue(-imm_values[0].imm_s64) : ImmValue(-imm_values[0].imm_u64); + case Type::F32: + return ImmValue(-imm_values[0].imm_f32); + case Type::F32x2: + return ImmValue(-imm_values[0].imm_f32, -imm_values[1].imm_f32); + case Type::F32x3: + return ImmValue(-imm_values[0].imm_f32, -imm_values[1].imm_f32, -imm_values[2].imm_f32); + case Type::F32x4: + return ImmValue(-imm_values[0].imm_f32, -imm_values[1].imm_f32, -imm_values[2].imm_f32, + -imm_values[3].imm_f32); + case Type::F64: + return ImmValue(-imm_values[0].imm_f64); + case Type::F64x2: + return ImmValue(-imm_values[0].imm_f64, -imm_values[1].imm_f64); + case Type::F64x3: + return ImmValue(-imm_values[0].imm_f64, -imm_values[1].imm_f64, -imm_values[2].imm_f64); + case Type::F64x4: + return ImmValue(-imm_values[0].imm_f64, -imm_values[1].imm_f64, -imm_values[2].imm_f64, + -imm_values[3].imm_f64); + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +ImmValue ImmValue::operator+() const noexcept { + return *this; +} + +// this is not the best way + +ImmValue& ImmValue::operator+=(const ImmValue& other) noexcept { + ImmValue result = *this + other; + *this = result; + return *this; +} + +ImmValue& ImmValue::operator-=(const ImmValue& other) noexcept { + ImmValue result = *this - other; + *this = result; + return *this; +} + +ImmValue& ImmValue::operator*=(const ImmValue& other) noexcept { + ImmValue result = *this * other; + *this = result; + return *this; +} + +ImmValue& ImmValue::operator/=(const ImmValue& other) { + ImmValue result = *this / other; + *this = result; + return *this; +} + +ImmValue& ImmValue::operator%=(const ImmValue& other) noexcept { + ImmValue result = *this % other; + *this = result; + return *this; +} + +ImmValue& ImmValue::operator&=(const ImmValue& other) noexcept { + ImmValue result = *this & other; + *this = result; + return *this; +} + +ImmValue& ImmValue::operator|=(const ImmValue& other) noexcept { + ImmValue result = *this | other; + *this = result; + return *this; +} + +ImmValue& ImmValue::operator^=(const ImmValue& other) noexcept { + ImmValue result = *this ^ other; + *this = result; + return *this; +} + +ImmValue& ImmValue::operator<<=(const ImmValue& other) noexcept { + ImmValue result = *this << other; + *this = result; + return *this; +} + +ImmValue& ImmValue::operator>>=(const ImmValue& other) noexcept { + ImmValue result = *this >> other; + *this = result; + return *this; +} + +} // namespace Shader::IR + +namespace std { + +std::size_t hash::operator()(const Shader::IR::ImmValue& value) const { + using namespace Shader::IR; + + u64 h = HashCombine(static_cast(value.Type()), 0ULL); + + switch (value.Type()) { + case Type::U1: + return HashCombine(static_cast(value.imm_values[0].imm_u1), h); + case Type::U8: + return HashCombine(static_cast(value.imm_values[0].imm_u8), h); + case Type::U16: + return HashCombine(static_cast(value.imm_values[0].imm_u16), h); + case Type::U32: + case Type::F32: + return HashCombine(static_cast(value.imm_values[0].imm_u32), h); + case Type::U64: + case Type::F64: + return HashCombine(static_cast(value.imm_values[0].imm_u64), h); + case Type::U32x2: + case Type::F32x2: + h = HashCombine(static_cast(value.imm_values[0].imm_u32), h); + return HashCombine(static_cast(value.imm_values[1].imm_u32), h); + case Type::F64x2: + h = HashCombine(static_cast(value.imm_values[0].imm_f64), h); + return HashCombine(static_cast(value.imm_values[1].imm_f64), h); + case Type::U32x3: + case Type::F32x3: + h = HashCombine(static_cast(value.imm_values[0].imm_u32), h); + h = HashCombine(static_cast(value.imm_values[1].imm_u32), h); + return HashCombine(static_cast(value.imm_values[2].imm_u32), h); + case Type::F64x3: + h = HashCombine(static_cast(value.imm_values[0].imm_f64), h); + h = HashCombine(static_cast(value.imm_values[1].imm_f64), h); + return HashCombine(static_cast(value.imm_values[2].imm_f64), h); + case Type::U32x4: + case Type::F32x4: + h = HashCombine(static_cast(value.imm_values[0].imm_u32), h); + h = HashCombine(static_cast(value.imm_values[1].imm_u32), h); + h = HashCombine(static_cast(value.imm_values[2].imm_u32), h); + return HashCombine(static_cast(value.imm_values[3].imm_u32), h); + case Type::F64x4: + h = HashCombine(static_cast(value.imm_values[0].imm_f64), h); + h = HashCombine(static_cast(value.imm_values[1].imm_f64), h); + h = HashCombine(static_cast(value.imm_values[2].imm_f64), h); + return HashCombine(static_cast(value.imm_values[3].imm_f64), h); + default: + UNREACHABLE_MSG("Invalid type {}", value.Type()); + } +} + +} // namespace std diff --git a/src/shader_recompiler/ir/compute_value/imm_value.h b/src/shader_recompiler/ir/compute_value/imm_value.h new file mode 100644 index 000000000..1a304a4af --- /dev/null +++ b/src/shader_recompiler/ir/compute_value/imm_value.h @@ -0,0 +1,345 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include "common/assert.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/ir/type.h" +#include "shader_recompiler/ir/value.h" + +namespace Shader::IR { + +class ImmValue { +public: + ImmValue() noexcept = default; + explicit ImmValue(const IR::Value& value) noexcept; + explicit ImmValue(bool value) noexcept; + explicit ImmValue(u8 value) noexcept; + explicit ImmValue(s8 value) noexcept; + explicit ImmValue(u16 value) noexcept; + explicit ImmValue(s16 value) noexcept; + explicit ImmValue(u32 value) noexcept; + explicit ImmValue(s32 value) noexcept; + explicit ImmValue(f32 value) noexcept; + explicit ImmValue(u64 value) noexcept; + explicit ImmValue(s64 value) noexcept; + explicit ImmValue(f64 value) noexcept; + ImmValue(u32 value1, u32 value2) noexcept; + ImmValue(u32 value1, u32 value2, u32 value3) noexcept; + ImmValue(u32 value1, u32 value2, u32 value3, u32 value4) noexcept; + ImmValue(s32 value1, s32 value2) noexcept; + ImmValue(s32 value1, s32 value2, s32 value3) noexcept; + ImmValue(s32 value1, s32 value2, s32 value3, s32 value4) noexcept; + ImmValue(f32 value1, f32 value2) noexcept; + ImmValue(f32 value1, f32 value2, f32 value3) noexcept; + ImmValue(f32 value1, f32 value2, f32 value3, f32 value4) noexcept; + ImmValue(f64 value1, f64 value2) noexcept; + ImmValue(f64 value1, f64 value2, f64 value3) noexcept; + ImmValue(f64 value1, f64 value2, f64 value3, f64 value4) noexcept; + + [[nodiscard]] bool IsEmpty() const noexcept; + [[nodiscard]] IR::Type Type() const noexcept; + [[nodiscard]] IR::Type BaseType() const noexcept; + [[nodiscard]] u32 Dimensions() const noexcept; + + [[nodiscard]] bool IsSigned() const noexcept; + void SetSigned(bool signed_) noexcept; + void SameSignAs(const ImmValue& other) noexcept; + + [[nodiscard]] bool U1() const; + [[nodiscard]] u8 U8() const; + [[nodiscard]] s8 S8() const; + [[nodiscard]] u16 U16() const; + [[nodiscard]] s16 S16() const; + [[nodiscard]] u32 U32() const; + [[nodiscard]] s32 S32() const; + [[nodiscard]] f32 F32() const; + [[nodiscard]] u64 U64() const; + [[nodiscard]] s64 S64() const; + [[nodiscard]] f64 F64() const; + + [[nodiscard]] std::tuple U32x2() const; + [[nodiscard]] std::tuple U32x3() const; + [[nodiscard]] std::tuple U32x4() const; + [[nodiscard]] std::tuple S32x2() const; + [[nodiscard]] std::tuple S32x3() const; + [[nodiscard]] std::tuple S32x4() const; + [[nodiscard]] std::tuple F32x2() const; + [[nodiscard]] std::tuple F32x3() const; + [[nodiscard]] std::tuple F32x4() const; + [[nodiscard]] std::tuple F64x2() const; + [[nodiscard]] std::tuple F64x3() const; + [[nodiscard]] std::tuple F64x4() const; + + [[nodiscard]] bool operator==(const ImmValue& other) const noexcept; + [[nodiscard]] bool operator!=(const ImmValue& other) const noexcept; + [[nodiscard]] bool operator<(const ImmValue& other) const noexcept; + [[nodiscard]] bool operator>(const ImmValue& other) const noexcept; + [[nodiscard]] bool operator<=(const ImmValue& other) const noexcept; + [[nodiscard]] bool operator>=(const ImmValue& other) const noexcept; + + [[nodiscard]] ImmValue operator+(const ImmValue& other) const noexcept; + [[nodiscard]] ImmValue operator-(const ImmValue& other) const noexcept; + [[nodiscard]] ImmValue operator*(const ImmValue& other) const noexcept; + [[nodiscard]] ImmValue operator/(const ImmValue& other) const; + [[nodiscard]] ImmValue operator%(const ImmValue& other) const noexcept; + [[nodiscard]] ImmValue operator&(const ImmValue& other) const noexcept; + [[nodiscard]] ImmValue operator|(const ImmValue& other) const noexcept; + [[nodiscard]] ImmValue operator^(const ImmValue& other) const noexcept; + [[nodiscard]] ImmValue operator<<(const ImmValue& other) const noexcept; + [[nodiscard]] ImmValue operator>>(const ImmValue& other) const noexcept; + [[nodiscard]] ImmValue operator~() const noexcept; + + [[nodiscard]] ImmValue operator++(int) noexcept; + [[nodiscard]] ImmValue operator--(int) noexcept; + + ImmValue& operator++() noexcept; + ImmValue& operator--() noexcept; + + [[nodiscard]] ImmValue operator-() const noexcept; + [[nodiscard]] ImmValue operator+() const noexcept; + + ImmValue& operator+=(const ImmValue& other) noexcept; + ImmValue& operator-=(const ImmValue& other) noexcept; + ImmValue& operator*=(const ImmValue& other) noexcept; + ImmValue& operator/=(const ImmValue& other); + ImmValue& operator%=(const ImmValue& other) noexcept; + ImmValue& operator&=(const ImmValue& other) noexcept; + ImmValue& operator|=(const ImmValue& other) noexcept; + ImmValue& operator^=(const ImmValue& other) noexcept; + ImmValue& operator<<=(const ImmValue& other) noexcept; + ImmValue& operator>>=(const ImmValue& other) noexcept; + +private: + union Value { + bool imm_u1; + u8 imm_u8; + s8 imm_s8; + u16 imm_u16; + s16 imm_s16; + u32 imm_u32; + s32 imm_s32; + f32 imm_f32; + u64 imm_u64; + s64 imm_s64; + f64 imm_f64; + }; + + IR::Type type{}; + bool is_signed{}; + std::array imm_values; + + friend class std::hash; +}; +static_assert(std::is_trivially_copyable_v); + +template +class TypedImmValue : public ImmValue { +public: + inline static constexpr IR::Type static_type = type_; + inline static constexpr bool static_is_signed = is_signed_; + + TypedImmValue() = default; + + template + requires((other_type & type_) != IR::Type::Void && other_signed == is_signed_) + explicit(false) TypedImmValue(const TypedImmValue& other) + : ImmValue(other) {} + + explicit TypedImmValue(const ImmValue& value) : ImmValue(value) { + if ((value.Type() & type_) == IR::Type::Void && value.IsSigned() == is_signed_) { + throw InvalidArgument("Incompatible types {} {} and {} {}", + is_signed_ ? "signed" : "unsigned", type_, value.Type(), + value.IsSigned() ? "signed" : "unsigned"); + } + } +}; + +using ImmU1 = TypedImmValue; +using ImmU8 = TypedImmValue; +using ImmS8 = TypedImmValue; +using ImmU16 = TypedImmValue; +using ImmS16 = TypedImmValue; +using ImmU32 = TypedImmValue; +using ImmS32 = TypedImmValue; +using ImmF32 = TypedImmValue; +using ImmU64 = TypedImmValue; +using ImmS64 = TypedImmValue; +using ImmF64 = TypedImmValue; +using ImmS32F32 = TypedImmValue; +using ImmS64F64 = TypedImmValue; +using ImmU32U64 = TypedImmValue; +using ImmS32S64 = TypedImmValue; +using ImmU16U32U64 = TypedImmValue; +using ImmS16S32S64 = TypedImmValue; +using ImmF32F64 = TypedImmValue; +using ImmUAny = TypedImmValue; +using ImmSAny = TypedImmValue; +using ImmU32x2 = TypedImmValue; +using ImmU32x3 = TypedImmValue; +using ImmU32x4 = TypedImmValue; +using ImmS32x2 = TypedImmValue; +using ImmS32x3 = TypedImmValue; +using ImmS32x4 = TypedImmValue; +using ImmF32x2 = TypedImmValue; +using ImmF32x3 = TypedImmValue; +using ImmF32x4 = TypedImmValue; +using ImmF64x2 = TypedImmValue; +using ImmF64x3 = TypedImmValue; +using ImmF64x4 = TypedImmValue; +using ImmS32F32x2 = TypedImmValue; +using ImmS32F32x3 = TypedImmValue; +using ImmS32F32x4 = TypedImmValue; +using ImmF32F64x2 = TypedImmValue; +using ImmF32F64x3 = TypedImmValue; +using ImmF32F64x4 = TypedImmValue; +using ImmU32xAny = TypedImmValue; +using ImmS32xAny = TypedImmValue; +using ImmF32xAny = TypedImmValue; +using ImmF64xAny = TypedImmValue; +using ImmS32F32xAny = TypedImmValue; +using ImmF32F64xAny = TypedImmValue; + +inline bool ImmValue::IsEmpty() const noexcept { + return type == Type::Void; +} + +inline IR::Type ImmValue::Type() const noexcept { + return type; +} + +inline bool ImmValue::U1() const { + ASSERT(type == Type::U1 && !is_signed); + return imm_values[0].imm_u1; +} + +inline u8 ImmValue::U8() const { + ASSERT(type == Type::U8 && !is_signed); + return imm_values[0].imm_u8; +} + +inline s8 ImmValue::S8() const { + ASSERT(type == Type::U8 && is_signed); + return imm_values[0].imm_s8; +} + +inline u16 ImmValue::U16() const { + ASSERT(type == Type::U16 && !is_signed); + return imm_values[0].imm_u16; +} + +inline s16 ImmValue::S16() const { + ASSERT(type == Type::U16 && is_signed); + return imm_values[0].imm_s16; +} + +inline u32 ImmValue::U32() const { + ASSERT(type == Type::U32 && !is_signed); + return imm_values[0].imm_u32; +} + +inline s32 ImmValue::S32() const { + ASSERT(type == Type::U32 && is_signed); + return imm_values[0].imm_s32; +} + +inline f32 ImmValue::F32() const { + ASSERT(type == Type::F32 && is_signed); + return imm_values[0].imm_f32; +} + +inline u64 ImmValue::U64() const { + ASSERT(type == Type::U64 && !is_signed); + return imm_values[0].imm_u64; +} + +inline s64 ImmValue::S64() const { + ASSERT(type == Type::U64 && is_signed); + return imm_values[0].imm_s64; +} + +inline f64 ImmValue::F64() const { + ASSERT(type == Type::F64 && is_signed); + return imm_values[0].imm_f64; +} + +inline std::tuple ImmValue::U32x2() const { + ASSERT(type == Type::U32x2 && !is_signed); + return {imm_values[0].imm_u32, imm_values[1].imm_u32}; +} + +inline std::tuple ImmValue::U32x3() const { + ASSERT(type == Type::U32x3 && !is_signed); + return {imm_values[0].imm_u32, imm_values[1].imm_u32, imm_values[2].imm_u32}; +} + +inline std::tuple ImmValue::U32x4() const { + ASSERT(type == Type::U32x4 && !is_signed); + return {imm_values[0].imm_u32, imm_values[1].imm_u32, imm_values[2].imm_u32, + imm_values[3].imm_u32}; +} + +inline std::tuple ImmValue::S32x2() const { + ASSERT(type == Type::U32x2 && is_signed); + return {imm_values[0].imm_s32, imm_values[1].imm_s32}; +} + +inline std::tuple ImmValue::S32x3() const { + ASSERT(type == Type::U32x3 && is_signed); + return {imm_values[0].imm_s32, imm_values[1].imm_s32, imm_values[2].imm_s32}; +} + +inline std::tuple ImmValue::S32x4() const { + ASSERT(type == Type::U32x4 && is_signed); + return {imm_values[0].imm_s32, imm_values[1].imm_s32, imm_values[2].imm_s32, + imm_values[3].imm_s32}; +} + +inline std::tuple ImmValue::F32x2() const { + ASSERT(type == Type::F32x2 && is_signed); + return {imm_values[0].imm_f32, imm_values[1].imm_f32}; +} + +inline std::tuple ImmValue::F32x3() const { + ASSERT(type == Type::F32x3 && is_signed); + return {imm_values[0].imm_f32, imm_values[1].imm_f32, imm_values[2].imm_f32}; +} + +inline std::tuple ImmValue::F32x4() const { + ASSERT(type == Type::F32x4 && is_signed); + return {imm_values[0].imm_f32, imm_values[1].imm_f32, imm_values[2].imm_f32, + imm_values[3].imm_f32}; +} + +inline std::tuple ImmValue::F64x2() const { + ASSERT(type == Type::F64x2 && is_signed); + return {imm_values[0].imm_f64, imm_values[1].imm_f64}; +} + +inline std::tuple ImmValue::F64x3() const { + ASSERT(type == Type::F64x3 && is_signed); + return {imm_values[0].imm_f64, imm_values[1].imm_f64, imm_values[2].imm_f64}; +} + +inline std::tuple ImmValue::F64x4() const { + ASSERT(type == Type::F64x4 && is_signed); + return {imm_values[0].imm_f64, imm_values[1].imm_f64, imm_values[2].imm_f64, + imm_values[3].imm_f64}; +} + +} // namespace Shader::IR + +namespace std { +template <> +struct hash { + std::size_t operator()(const Shader::IR::ImmValue& value) const; +}; +} // namespace std \ No newline at end of file From 5536febd73ccb499ccacb41fcac9f1722325e4be Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Thu, 6 Mar 2025 01:09:12 +0100 Subject: [PATCH 15/49] Better documentation --- src/shader_recompiler/ir/compute_value/imm_value.h | 3 +++ src/shader_recompiler/ir/subprogram.cpp | 11 +++-------- src/shader_recompiler/ir/subprogram.h | 5 +++++ 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/shader_recompiler/ir/compute_value/imm_value.h b/src/shader_recompiler/ir/compute_value/imm_value.h index 1a304a4af..7ece9f48e 100644 --- a/src/shader_recompiler/ir/compute_value/imm_value.h +++ b/src/shader_recompiler/ir/compute_value/imm_value.h @@ -13,6 +13,9 @@ namespace Shader::IR { +// Live IR::Value but can only hold immediate values. Additionally, can hold vectors of values. +// Has arithmetic operations defined for it. Usefull for computing a value at shader compile time. + class ImmValue { public: ImmValue() noexcept = default; diff --git a/src/shader_recompiler/ir/subprogram.cpp b/src/shader_recompiler/ir/subprogram.cpp index a247c8c01..ac69ec61e 100644 --- a/src/shader_recompiler/ir/subprogram.cpp +++ b/src/shader_recompiler/ir/subprogram.cpp @@ -7,11 +7,6 @@ #include "shader_recompiler/ir/post_order.h" #include "shader_recompiler/ir/subprogram.h" -// Given an IR program, this class is used to create a subprogram that contains -// only the blocks and instructions that relevant to a group of given instructions. -// Taking into account only the given instructions, the instructions that it uses and -// conditions. - namespace Shader::IR { SubProgram::SubProgram(Program* super_program, Pools& pools) @@ -159,7 +154,7 @@ void SubProgram::BuildBlockListAndASL(Program& sub_program) { Block* block = cond->GetParent(); Block* merge_block = AddBlock(orig_asl_node.data.if_node.merge); Block* body_block = AddBlock(orig_asl_node.data.if_node.body); - asl_node.data.if_node.cond = U1(Value(cond)); + asl_node.data.if_node.cond = U1(cond); asl_node.data.if_node.body = body_block; asl_node.data.if_node.merge = merge_block; block->AddBranch(body_block); @@ -198,7 +193,7 @@ void SubProgram::BuildBlockListAndASL(Program& sub_program) { Block* block = cond->GetParent(); Block* merge_block = AddBlock(orig_asl_node.data.repeat.merge); Block* loop_header_block = AddBlock(orig_asl_node.data.repeat.loop_header); - asl_node.data.repeat.cond = U1(Value(cond)); + asl_node.data.repeat.cond = U1(cond); asl_node.data.repeat.loop_header = loop_header_block; asl_node.data.repeat.merge = merge_block; block->AddBranch(loop_header_block); @@ -213,7 +208,7 @@ void SubProgram::BuildBlockListAndASL(Program& sub_program) { Block* block = cond->GetParent(); Block* merge_block = AddBlock(orig_asl_node.data.break_node.merge); Block* skip_block = AddBlock(orig_asl_node.data.break_node.skip); - asl_node.data.break_node.cond = U1(Value(&block->back())); + asl_node.data.break_node.cond = U1(&block->back()); asl_node.data.break_node.merge = merge_block; asl_node.data.break_node.skip = skip_block; block->AddBranch(skip_block); diff --git a/src/shader_recompiler/ir/subprogram.h b/src/shader_recompiler/ir/subprogram.h index c9cd4ff1f..9e1ad3c91 100644 --- a/src/shader_recompiler/ir/subprogram.h +++ b/src/shader_recompiler/ir/subprogram.h @@ -10,6 +10,11 @@ namespace Shader::IR { +// Given an IR program, this class is used to create a subprogram that contains +// only the blocks and instructions that relevant to a group of given instructions. +// Taking into account only the given instructions, the instructions that it uses and +// conditions. + struct SubProgram { SubProgram(Program* super_program, Pools& pools); From c36b6d6adf5a028a752c30bde8d91a4f024f93db Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Thu, 6 Mar 2025 01:10:58 +0100 Subject: [PATCH 16/49] clang-format --- src/shader_recompiler/ir/subprogram.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/ir/subprogram.h b/src/shader_recompiler/ir/subprogram.h index 9e1ad3c91..b14a31e3d 100644 --- a/src/shader_recompiler/ir/subprogram.h +++ b/src/shader_recompiler/ir/subprogram.h @@ -14,7 +14,7 @@ namespace Shader::IR { // only the blocks and instructions that relevant to a group of given instructions. // Taking into account only the given instructions, the instructions that it uses and // conditions. - + struct SubProgram { SubProgram(Program* super_program, Pools& pools); From 676b06db0ff1d6a94abd5c762a4b0268492526e5 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Tue, 11 Mar 2025 15:43:06 +0100 Subject: [PATCH 17/49] Squashed commit of the following: commit 39328be45ed83d252e491b61da5cb4d767ff100d Author: Lander Gallastegi Date: Tue Mar 11 15:40:44 2025 +0100 Fix trivialy copiable commit f0633525b343dab7ea75cd7c809c936cb67e572f Author: Lander Gallastegi Date: Tue Mar 11 00:29:42 2025 +0100 Compute value commit 8c42a014ee925b61c5ea5721423da3211a635eb8 Author: Lander Gallastegi Date: Tue Mar 11 00:29:31 2025 +0100 Add missing operations --- CMakeLists.txt | 2 + .../ir/compute_value/compute.cpp | 503 ++++++++++++++++++ .../ir/compute_value/compute.h | 18 + .../ir/compute_value/imm_value.cpp | 462 +++++++++++++++- .../ir/compute_value/imm_value.h | 31 ++ 5 files changed, 997 insertions(+), 19 deletions(-) create mode 100644 src/shader_recompiler/ir/compute_value/compute.cpp create mode 100644 src/shader_recompiler/ir/compute_value/compute.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 041642840..5b723f76c 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -843,6 +843,8 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp src/shader_recompiler/ir/passes/shared_memory_to_storage_pass.cpp src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp + src/shader_recompiler/ir/compute_value/compute.cpp + src/shader_recompiler/ir/compute_value/compute.h src/shader_recompiler/ir/compute_value/imm_value.cpp src/shader_recompiler/ir/compute_value/imm_value.h src/shader_recompiler/ir/abstract_syntax_list.cpp diff --git a/src/shader_recompiler/ir/compute_value/compute.cpp b/src/shader_recompiler/ir/compute_value/compute.cpp new file mode 100644 index 000000000..14e7518f4 --- /dev/null +++ b/src/shader_recompiler/ir/compute_value/compute.cpp @@ -0,0 +1,503 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include +#include +#include "shader_recompiler/ir/compute_value/compute.h" + +namespace Shader::IR { + +template +static void CartesianInvokeImpl(Func func, OutputIt out_it, + std::tuple& arglists_its, + const std::tuple& arglists_tuple) { + if constexpr (Level == N) { + auto get_tuple = [&](std::index_sequence) { + return std::forward_as_tuple(*std::get(arglists_its)...); + }; + *out_it++ = std::move(std::apply(func, get_tuple(std::make_index_sequence{}))); + return; + } else { + const auto& arglist = std::get(arglists_tuple); + for (auto it = arglist.begin(); it != arglist.end(); ++it) { + std::get(arglists_its) = it; + CartesianInvokeImpl(func, out_it, arglists_its, arglists_tuple); + } + } +} + +template +static void CartesianInvoke(Func func, OutputIt out_it, const ArgLists&... arg_lists) { + constexpr size_t N = sizeof...(ArgLists); + const std::tuple arglists_tuple = std::forward_as_tuple(arg_lists...); + + std::tuple arglists_it; + CartesianInvokeImpl(func, out_it, arglists_it, arglists_tuple); +} + +static void SetSigned(ImmValueList& values, bool is_signed) { + for (auto& value : values) { + value.SetSigned(is_signed); + } +} + +static void OperationAbs(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args; + ComputeImmValues(inst->Arg(0), args, cache); + + const auto op = [](const ImmValue& a) { + return a.abs(); + }; + + std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); +} + +static void OperationAdd(Inst* inst, bool is_signed, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args0, args1; + ComputeImmValues(inst->Arg(0), args0, cache); + ComputeImmValues(inst->Arg(1), args1, cache); + + const auto op = [](const ImmValue& a, const ImmValue& b) { + return a + b; + }; + + SetSigned(args0, is_signed); + SetSigned(args1, is_signed); + CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); +} + +static void OperationSub(Inst* inst, bool is_signed, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args0, args1; + ComputeImmValues(inst->Arg(0), args0, cache); + ComputeImmValues(inst->Arg(1), args1, cache); + + const auto op = [](const ImmValue& a, const ImmValue& b) { + return a - b; + }; + + SetSigned(args0, is_signed); + SetSigned(args1, is_signed); + CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); +} + +static void OperationFma(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args0, args1, args2; + ComputeImmValues(inst->Arg(0), args0, cache); + ComputeImmValues(inst->Arg(1), args1, cache); + ComputeImmValues(inst->Arg(2), args2, cache); + + const auto op = [](const ImmValue& a, const ImmValue& b, const ImmValue& c) { + return ImmValue::fma(a, b, c); + }; + + CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1, args2); +} + +static void OperationMin(Inst* inst, bool is_signed, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args0, args1, is_legacy_args; + ComputeImmValues(inst->Arg(0), args0, cache); + ComputeImmValues(inst->Arg(1), args1, cache); + if (inst->NumArgs() > 2) { + ComputeImmValues(inst->Arg(2), is_legacy_args, cache); + } else { + is_legacy_args.insert(ImmValue(false)); + } + + const auto op = [](const ImmValue& a, const ImmValue& b, const ImmValue& is_legacy) { + if (is_legacy.U1()) { + if (a.isnan()) return b; + if (b.isnan()) return a; + } + return std::min(a, b); + }; + + SetSigned(args0, is_signed); + SetSigned(args1, is_signed); + CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1, is_legacy_args); +} + +static void OperationMax(Inst* inst, bool is_signed, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args0, args1, is_legacy_args; + ComputeImmValues(inst->Arg(0), args0, cache); + ComputeImmValues(inst->Arg(1), args1, cache); + if (inst->NumArgs() > 2) { + ComputeImmValues(inst->Arg(2), is_legacy_args, cache); + } else { + is_legacy_args.insert(ImmValue(false)); + } + + const auto op = [](const ImmValue& a, const ImmValue& b, const ImmValue& is_legacy) { + if (is_legacy.U1()) { + if (a.isnan()) return b; + if (b.isnan()) return a; + } + return std::max(a, b); + }; + + SetSigned(args0, is_signed); + SetSigned(args1, is_signed); + CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1, is_legacy_args); +} + +static void OperationMul(Inst* inst, bool is_signed, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args0, args1; + ComputeImmValues(inst->Arg(0), args0, cache); + ComputeImmValues(inst->Arg(1), args1, cache); + + const auto op = [](const ImmValue& a, const ImmValue& b) { + return a * b; + }; + + SetSigned(args0, is_signed); + SetSigned(args1, is_signed); + CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); +} + +static void OperationDiv(Inst* inst, bool is_signed, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args0, args1; + ComputeImmValues(inst->Arg(0), args0, cache); + ComputeImmValues(inst->Arg(1), args1, cache); + + const auto op = [](const ImmValue& a, const ImmValue& b) { + return a / b; + }; + + SetSigned(args0, is_signed); + SetSigned(args1, is_signed); + CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); +} + +static void OperationNeg(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args; + ComputeImmValues(inst->Arg(0), args, cache); + + const auto op = [](const ImmValue& a) { + return -a; + }; + + SetSigned(args, true); + std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); +} + +static void OperationRecip(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args; + ComputeImmValues(inst->Arg(0), args, cache); + + const auto op = [](const ImmValue& a) { + return a.recip(); + }; + + std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); +} + +static void OperationRecipSqrt(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args; + ComputeImmValues(inst->Arg(0), args, cache); + + const auto op = [](const ImmValue& a) { + return a.rsqrt(); + }; + + std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); +} + +static void OperationSqrt(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args; + ComputeImmValues(inst->Arg(0), args, cache); + + const auto op = [](const ImmValue& a) { + return a.sqrt(); + }; + + std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); +} + +static void OperationSin(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args; + ComputeImmValues(inst->Arg(0), args, cache); + + const auto op = [](const ImmValue& a) { + return a.sin(); + }; + + std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); +} + +static void OperationExp2(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args; + ComputeImmValues(inst->Arg(0), args, cache); + + const auto op = [](const ImmValue& a) { + return a.exp2(); + }; + + std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); +} + +static void OperationLdexp(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args0, args1; + ComputeImmValues(inst->Arg(0), args0, cache); + ComputeImmValues(inst->Arg(1), args1, cache); + + const auto op = [](const ImmValue& a, const ImmValue& b) { + return a.ldexp(b); + }; + + CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); +} + +static void OperationCos(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args; + ComputeImmValues(inst->Arg(0), args, cache); + + const auto op = [](const ImmValue& a) { + return a.cos(); + }; + + std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); +} + +static void OperationLog2(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args; + ComputeImmValues(inst->Arg(0), args, cache); + + const auto op = [](const ImmValue& a) { + return a.log2(); + }; + + std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); +} + +static void OperationClamp(Inst* inst, bool is_signed, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args0, args1, args2; + ComputeImmValues(inst->Arg(0), args0, cache); + ComputeImmValues(inst->Arg(1), args1, cache); + ComputeImmValues(inst->Arg(2), args2, cache); + + const auto op = [](const ImmValue& a, const ImmValue& b, const ImmValue& c) { + return a.clamp(b, c); + }; + + SetSigned(args0, is_signed); + SetSigned(args1, is_signed); + SetSigned(args2, is_signed); + CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1, args2); +} + +static void OperationRound(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args; + ComputeImmValues(inst->Arg(0), args, cache); + + const auto op = [](const ImmValue& a) { + return a.round(); + }; + + std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); +} + +static void OperationFloor(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args; + ComputeImmValues(inst->Arg(0), args, cache); + + const auto op = [](const ImmValue& a) { + return a.floor(); + }; + + std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); +} + +static void OperationCeil(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args; + ComputeImmValues(inst->Arg(0), args, cache); + + const auto op = [](const ImmValue& a) { + return a.ceil(); + }; + + std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); +} + +static void OperationTrunc(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args; + ComputeImmValues(inst->Arg(0), args, cache); + + const auto op = [](const ImmValue& a) { + return a.trunc(); + }; + + std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); +} + +static void OperationFract(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args; + ComputeImmValues(inst->Arg(0), args, cache); + + const auto op = [](const ImmValue& a) { + return a.fract(); + }; + std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); +} + +static void OperationShiftLeft(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args0, args1; + ComputeImmValues(inst->Arg(0), args0, cache); + ComputeImmValues(inst->Arg(1), args1, cache); + + const auto op = [](const ImmValue& a, const ImmValue& b) { + return a << b; + }; + + SetSigned(args1, false); + CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); +} + +static void OperationShiftRight(Inst* inst, bool is_signed, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args0, args1; + ComputeImmValues(inst->Arg(0), args0, cache); + ComputeImmValues(inst->Arg(1), args1, cache); + + const auto op = [](const ImmValue& a, const ImmValue& b) { + return a >> b; + }; + + SetSigned(args0, is_signed); + SetSigned(args1, false); + CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); +} + +static void OperationBitwiseNot(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args; + ComputeImmValues(inst->Arg(0), args, cache); + + const auto op = [](const ImmValue& a) { + return ~a; + }; + + std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); +} + +static void OperationBitwiseAnd(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args0, args1; + ComputeImmValues(inst->Arg(0), args0, cache); + ComputeImmValues(inst->Arg(1), args1, cache); + + const auto op = [](const ImmValue& a, const ImmValue& b) { + return a & b; + }; + + CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); +} + +static void OperationBitwiseOr(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args0, args1; + ComputeImmValues(inst->Arg(0), args0, cache); + ComputeImmValues(inst->Arg(1), args1, cache); + + const auto op = [](const ImmValue& a, const ImmValue& b) { + return a | b; + }; + + CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); +} + +static void OperationBitwiseXor(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args0, args1; + ComputeImmValues(inst->Arg(0), args0, cache); + ComputeImmValues(inst->Arg(1), args1, cache); + + const auto op = [](const ImmValue& a, const ImmValue& b) { + return a ^ b; + }; + + CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); +} + +static void OperationConvert(Inst* inst, bool is_signed, Type new_type, bool new_signed, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args; + ComputeImmValues(inst->Arg(0), args, cache); + + const auto op = [new_type, new_signed](const ImmValue& a) { + return a.Convert(new_type, new_signed); + }; + + SetSigned(args, is_signed); + std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); +} + +static void OperationBitCast(Inst* inst, Type new_type, bool new_signed, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args; + ComputeImmValues(inst->Arg(0), args, cache); + + const auto op = [new_type, new_signed](const ImmValue& a) { + return a.Bitcast(new_type, new_signed); + }; + + std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); +} + +template +static void OperationCompositeConstruct(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + std::array args; + for (size_t i = 0; i < N; ++i) { + ComputeImmValues(inst->Arg(i), args[i], cache); + } + + const auto op = [](const Args&... args) { + return ImmValue(args...); + }; + + const auto call_cartesian = [&](std::index_sequence) { + CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args[I]...); + }; + call_cartesian(std::make_index_sequence{}); +} + +static void OperationCompositeExtract(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args0, args1; + ComputeImmValues(inst->Arg(0), args0, cache); + ComputeImmValues(inst->Arg(1), args1, cache); + + const auto op = [](const ImmValue& a, const ImmValue& b) { + return a.Extract(b); + }; + + SetSigned(args1, false); + CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); +} + +static void DoInstructionOperation(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + switch (inst->GetOpcode()) { + default: + break; + } +} + +void ComputeImmValues(const Value& value, ImmValueList& values, ComputeImmValuesCache& cache) { + Value resolved = value.Resolve(); + if (ImmValue::IsSupportedValue(resolved)) { + values.insert(ImmValue(resolved)); + return; + } + if (resolved.Type() != Type::Opaque) { + return; + } + Inst* inst = resolved.InstRecursive(); + auto it = cache.find(inst); + if (it != cache.end()) { + values.insert(it->second.begin(), it->second.end()); + return; + } + auto& inst_values = cache.emplace(inst, ImmValueList{}).first->second; + if (inst->GetOpcode() == Opcode::Phi) { + for (size_t i = 0; i < inst->NumArgs(); ++i) { + ComputeImmValues(inst->Arg(i), inst_values, cache); + } + } else { + + } + values.insert(inst_values.begin(), inst_values.end()); +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/ir/compute_value/compute.h b/src/shader_recompiler/ir/compute_value/compute.h new file mode 100644 index 000000000..fbfe46575 --- /dev/null +++ b/src/shader_recompiler/ir/compute_value/compute.h @@ -0,0 +1,18 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include "shader_recompiler/ir/compute_value/imm_value.h" +#include "shader_recompiler/ir/value.h" + +namespace Shader::IR { + +using ImmValueList = boost::container::flat_set; +using ComputeImmValuesCache = boost::container::flat_map; + +void ComputeImmValues(const Value& value, ImmValueList& values, ComputeImmValuesCache& cache); + +} // namespace Shader::IR diff --git a/src/shader_recompiler/ir/compute_value/imm_value.cpp b/src/shader_recompiler/ir/compute_value/imm_value.cpp index f222ea009..6c433c244 100644 --- a/src/shader_recompiler/ir/compute_value/imm_value.cpp +++ b/src/shader_recompiler/ir/compute_value/imm_value.cpp @@ -7,29 +7,28 @@ namespace Shader::IR { ImmValue::ImmValue(const IR::Value& value) noexcept { - IR::Value resolved = value.Resolve(); - type = resolved.Type(); + type = value.Type(); switch (type) { case Type::U1: - imm_values[0].imm_u1 = resolved.U1(); + imm_values[0].imm_u1 = value.U1(); break; case Type::U8: - imm_values[0].imm_u8 = resolved.U8(); + imm_values[0].imm_u8 = value.U8(); break; case Type::U16: - imm_values[0].imm_u16 = resolved.U16(); + imm_values[0].imm_u16 = value.U16(); break; case Type::U32: - imm_values[0].imm_u32 = resolved.U32(); + imm_values[0].imm_u32 = value.U32(); break; case Type::F32: - imm_values[0].imm_f32 = resolved.F32(); + imm_values[0].imm_f32 = value.F32(); break; case Type::U64: - imm_values[0].imm_u64 = resolved.U64(); + imm_values[0].imm_u64 = value.U64(); break; case Type::F64: - imm_values[0].imm_f64 = resolved.F64(); + imm_values[0].imm_f64 = value.F64(); break; default: UNREACHABLE_MSG("Invalid type {}", type); @@ -160,6 +159,44 @@ ImmValue::ImmValue(f64 value1, f64 value2, f64 value3, f64 value4) noexcept imm_values[3].imm_f64 = value4; } +ImmValue::ImmValue(const ImmValue& value1, const ImmValue& value2) noexcept + : type{value1.type}, is_signed{value1.is_signed} { + ASSERT(value1.type == value2.type && value1.is_signed == value2.is_signed); + switch (value1.Dimensions()) { + case 1: + imm_values[0] = value1.imm_values[0]; + imm_values[1] = value2.imm_values[0]; + break; + case 2: + imm_values[0] = value1.imm_values[0]; + imm_values[1] = value1.imm_values[1]; + imm_values[2] = value2.imm_values[0]; + imm_values[3] = value2.imm_values[1]; + break; + default: + UNREACHABLE_MSG("Invalid dimensions {}", value1.Dimensions()); + } +} + +ImmValue::ImmValue(const ImmValue& value1, const ImmValue& value2, const ImmValue& value3) noexcept + : type{value1.type}, is_signed{value1.is_signed} { + ASSERT(value1.type == value2.type && value1.type == value3.type && value1.is_signed == value2.is_signed && + value1.is_signed == value3.is_signed && value1.Dimensions() == 1); + imm_values[0] = value1.imm_values[0]; + imm_values[1] = value2.imm_values[0]; + imm_values[2] = value3.imm_values[0]; +} + +ImmValue::ImmValue(const ImmValue& value1, const ImmValue& value2, const ImmValue& value3, const ImmValue& value4) noexcept + : type{value1.type}, is_signed{value1.is_signed} { + ASSERT(value1.type == value2.type && value1.type == value3.type && value1.type == value4.type && value1.is_signed == value2.is_signed && + value1.is_signed == value3.is_signed && value1.is_signed == value4.is_signed && value1.Dimensions() == 1); + imm_values[0] = value1.imm_values[0]; + imm_values[1] = value2.imm_values[0]; + imm_values[2] = value3.imm_values[0]; + imm_values[3] = value4.imm_values[0]; +} + IR::Type ImmValue::BaseType() const noexcept { switch (type) { case Type::U1: @@ -229,6 +266,102 @@ void ImmValue::SameSignAs(const ImmValue& other) noexcept { SetSigned(other.IsSigned()); } +ImmValue ImmValue::Convert(IR::Type new_type, bool new_signed) const noexcept { + switch (new_type) { + case Type::U16: { + switch (type) { + case Type::U32: + return ImmValue(static_cast(imm_values[0].imm_u32)); + default: + break; + } + break; + } + case Type::U32: { + if (new_signed) { + switch (type) { + case Type::F32: + return ImmValue(static_cast(imm_values[0].imm_f32)); + case Type::F64: + return ImmValue(static_cast(imm_values[0].imm_f64)); + default: + break; + } + } else { + switch (type) { + case Type::U16: + return ImmValue(static_cast(imm_values[0].imm_u16)); + case Type::U32: + if (is_signed) { + return ImmValue(static_cast(imm_values[0].imm_s32)); + } + break; + case Type::F32: + return ImmValue(static_cast(imm_values[0].imm_f32)); + default: + break; + } + } + } + case Type::F32: { + switch (type) { + case Type::U16: + return ImmValue(static_cast(imm_values[0].imm_u16)); + case Type::U32: + if (is_signed) { + return ImmValue(static_cast(imm_values[0].imm_s32)); + } else { + return ImmValue(static_cast(imm_values[0].imm_u32)); + } + case Type::F64: + return ImmValue(static_cast(imm_values[0].imm_f64)); + default: + break; + } + break; + } + case Type::F64: { + switch (type) { + case Type::F32: + return ImmValue(static_cast(imm_values[0].imm_f32)); + default: + break; + } + break; + } + default: + break; + } + UNREACHABLE_MSG("Invalid conversion from {} {} to {} {}", is_signed ? "signed" : "unsigned", + type, new_signed ? "signed" : "unsigned", new_type); +} + +ImmValue ImmValue::Bitcast(IR::Type new_type, bool new_signed) const noexcept { + ImmValue result; + result.type = new_type; + result.is_signed = new_signed; + result.imm_values = imm_values; + ASSERT(Dimensions() == result.Dimensions()); + return result; +} + +ImmValue ImmValue::Extract(const ImmValue& index) const noexcept { + ASSERT(index.type == Type::U32 && !index.is_signed && index.imm_values[0].imm_u32 < Dimensions()); + ImmValue result; + result.type = BaseType(); + result.is_signed = IsSigned(); + result.imm_values[0] = imm_values[index.imm_values[0].imm_u32]; + return result; +} + +ImmValue ImmValue::Insert(const ImmValue& value, const ImmValue& index) const noexcept { + ASSERT(index.type == Type::U32 && !index.is_signed && index.imm_values[0].imm_u32 < Dimensions()); + ASSERT(value.type == BaseType() && value.IsSigned() == IsSigned()); + ImmValue result = *this; + result.imm_values[index.imm_values[0].imm_u32] = value.imm_values[0]; + return result; +} + bool ImmValue::operator==(const ImmValue& other) const noexcept { if (type != other.type) { return false; @@ -747,24 +880,24 @@ ImmValue ImmValue::operator^(const ImmValue& other) const noexcept { } ImmValue ImmValue::operator<<(const ImmValue& other) const noexcept { - ASSERT(type == other.type); + ASSERT(other.type == Type::U32 && other.Dimensions() == 1); switch (type) { case Type::U1: return ImmValue(imm_values[0].imm_u1 << other.imm_values[0].imm_u1); case Type::U8: - return is_signed && other.is_signed + return is_signed ? ImmValue(imm_values[0].imm_s8 << other.imm_values[0].imm_s8) : ImmValue(imm_values[0].imm_u8 << other.imm_values[0].imm_u8); case Type::U16: - return is_signed && other.is_signed + return is_signed ? ImmValue(imm_values[0].imm_s16 << other.imm_values[0].imm_s16) : ImmValue(imm_values[0].imm_u16 << other.imm_values[0].imm_u16); case Type::U32: - return is_signed && other.is_signed + return is_signed ? ImmValue(imm_values[0].imm_s32 << other.imm_values[0].imm_s32) : ImmValue(imm_values[0].imm_u32 << other.imm_values[0].imm_u32); case Type::U64: - return is_signed && other.is_signed + return is_signed ? ImmValue(imm_values[0].imm_s64 << other.imm_values[0].imm_s64) : ImmValue(imm_values[0].imm_u64 << other.imm_values[0].imm_u64); default: @@ -773,24 +906,24 @@ ImmValue ImmValue::operator<<(const ImmValue& other) const noexcept { } ImmValue ImmValue::operator>>(const ImmValue& other) const noexcept { - ASSERT(type == other.type); + ASSERT(other.type == Type::U32 && other.Dimensions() == 1); switch (type) { case Type::U1: return ImmValue(imm_values[0].imm_u1 >> other.imm_values[0].imm_u1); case Type::U8: - return is_signed && other.is_signed + return is_signed ? ImmValue(imm_values[0].imm_s8 >> other.imm_values[0].imm_s8) : ImmValue(imm_values[0].imm_u8 >> other.imm_values[0].imm_u8); case Type::U16: - return is_signed && other.is_signed + return is_signed ? ImmValue(imm_values[0].imm_s16 >> other.imm_values[0].imm_s16) : ImmValue(imm_values[0].imm_u16 >> other.imm_values[0].imm_u16); case Type::U32: - return is_signed && other.is_signed + return is_signed ? ImmValue(imm_values[0].imm_s32 >> other.imm_values[0].imm_s32) : ImmValue(imm_values[0].imm_u32 >> other.imm_values[0].imm_u32); case Type::U64: - return is_signed && other.is_signed + return is_signed ? ImmValue(imm_values[0].imm_s64 >> other.imm_values[0].imm_s64) : ImmValue(imm_values[0].imm_u64 >> other.imm_values[0].imm_u64); default: @@ -1049,6 +1182,297 @@ ImmValue& ImmValue::operator>>=(const ImmValue& other) noexcept { return *this; } +ImmValue ImmValue::abs() const noexcept { + switch (type) { + case Type::U8: + return is_signed ? ImmValue(std::abs(imm_values[0].imm_s8)) + : ImmValue(imm_values[0].imm_u8); + case Type::U16: + return is_signed ? ImmValue(std::abs(imm_values[0].imm_s16)) + : ImmValue(imm_values[0].imm_u16); + case Type::U32: + return is_signed ? ImmValue(std::abs(imm_values[0].imm_s32)) + : ImmValue(imm_values[0].imm_u32); + case Type::U64: + return is_signed ? ImmValue(std::abs(imm_values[0].imm_s64)) + : ImmValue(imm_values[0].imm_u64); + case Type::F32: + return ImmValue(std::abs(imm_values[0].imm_f32)); + case Type::F64: + return ImmValue(std::abs(imm_values[0].imm_f64)); + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +ImmValue ImmValue::recip() const noexcept { + switch (type) { + case Type::F32: + return ImmValue(1.0f / imm_values[0].imm_f32); + case Type::F64: + return ImmValue(1.0 / imm_values[0].imm_f64); + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +ImmValue ImmValue::sqrt() const noexcept { + switch (type) { + case Type::F32: + return ImmValue(std::sqrt(imm_values[0].imm_f32)); + case Type::F64: + return ImmValue(std::sqrt(imm_values[0].imm_f64)); + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +ImmValue ImmValue::rsqrt() const noexcept { + switch (type) { + case Type::F32: + return ImmValue(1.0f / std::sqrt(imm_values[0].imm_f32)); + case Type::F64: + return ImmValue(1.0 / std::sqrt(imm_values[0].imm_f64)); + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +ImmValue ImmValue::sin() const noexcept { + switch (type) { + case Type::F32: + return ImmValue(std::sin(imm_values[0].imm_f32)); + case Type::F64: + return ImmValue(std::sin(imm_values[0].imm_f64)); + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +ImmValue ImmValue::cos() const noexcept { + switch (type) { + case Type::F32: + return ImmValue(std::cos(imm_values[0].imm_f32)); + case Type::F64: + return ImmValue(std::cos(imm_values[0].imm_f64)); + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +ImmValue ImmValue::exp2() const noexcept { + switch (type) { + case Type::F32: + return ImmValue(std::exp2(imm_values[0].imm_f32)); + case Type::F64: + return ImmValue(std::exp2(imm_values[0].imm_f64)); + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +ImmValue ImmValue::ldexp(const ImmValue& exp) const noexcept { + ASSERT(type == exp.type); + switch (type) { + case Type::F32: + return ImmValue(std::ldexp(imm_values[0].imm_f32, exp.imm_values[0].imm_s32)); + case Type::F64: + return ImmValue(std::ldexp(imm_values[0].imm_f64, exp.imm_values[0].imm_s32)); + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +ImmValue ImmValue::log2() const noexcept { + switch (type) { + case Type::F32: + return ImmValue(std::log2(imm_values[0].imm_f32)); + case Type::F64: + return ImmValue(std::log2(imm_values[0].imm_f64)); + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +ImmValue ImmValue::clamp(const ImmValue& min, const ImmValue& max) const noexcept { + ASSERT(type == min.type && min.type == max.type); + switch (type) { + case Type::U8: + return is_signed && min.is_signed && max.is_signed + ? ImmValue(std::clamp(imm_values[0].imm_s8, min.imm_values[0].imm_s8, + max.imm_values[0].imm_s8)) + : ImmValue(std::clamp(imm_values[0].imm_u8, min.imm_values[0].imm_u8, + max.imm_values[0].imm_u8)); + case Type::U16: + return is_signed && min.is_signed && max.is_signed + ? ImmValue(std::clamp(imm_values[0].imm_s16, min.imm_values[0].imm_s16, + max.imm_values[0].imm_s16)) + : ImmValue(std::clamp(imm_values[0].imm_u16, min.imm_values[0].imm_u16, + max.imm_values[0].imm_u16)); + case Type::U32: + return is_signed && min.is_signed && max.is_signed + ? ImmValue(std::clamp(imm_values[0].imm_s32, min.imm_values[0].imm_s32, + max.imm_values[0].imm_s32)) + : ImmValue(std::clamp(imm_values[0].imm_u32, min.imm_values[0].imm_u32, + max.imm_values[0].imm_u32)); + case Type::U64: + return is_signed && min.is_signed && max.is_signed + ? ImmValue(std::clamp(imm_values[0].imm_s64, min.imm_values[0].imm_s64, + max.imm_values[0].imm_s64)) + : ImmValue(std::clamp(imm_values[0].imm_u64, min.imm_values[0].imm_u64, + max.imm_values[0].imm_u64)); + case Type::F32: + return ImmValue(std::clamp(imm_values[0].imm_f32, min.imm_values[0].imm_f32, + max.imm_values[0].imm_f32)); + case Type::F64: + return ImmValue(std::clamp(imm_values[0].imm_f64, min.imm_values[0].imm_f64, + max.imm_values[0].imm_f64)); + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +ImmValue ImmValue::floor() const noexcept { + switch (type) { + case Type::F32: + return ImmValue(std::floor(imm_values[0].imm_f32)); + case Type::F64: + return ImmValue(std::floor(imm_values[0].imm_f64)); + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +ImmValue ImmValue::ceil() const noexcept { + switch (type) { + case Type::F32: + return ImmValue(std::ceil(imm_values[0].imm_f32)); + case Type::F64: + return ImmValue(std::ceil(imm_values[0].imm_f64)); + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +ImmValue ImmValue::round() const noexcept { + switch (type) { + case Type::F32: + return ImmValue(std::round(imm_values[0].imm_f32)); + case Type::F64: + return ImmValue(std::round(imm_values[0].imm_f64)); + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +ImmValue ImmValue::trunc() const noexcept { + switch (type) { + case Type::F32: + return ImmValue(std::trunc(imm_values[0].imm_f32)); + case Type::F64: + return ImmValue(std::trunc(imm_values[0].imm_f64)); + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +ImmValue ImmValue::fract() const noexcept { + switch (type) { + case Type::F32: + return ImmValue(imm_values[0].imm_f32 - std::floor(imm_values[0].imm_f32)); + case Type::F64: + return ImmValue(imm_values[0].imm_f64 - std::floor(imm_values[0].imm_f64)); + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +bool ImmValue::isnan() const noexcept { + switch (type) { + case Type::F32: + return std::isnan(imm_values[0].imm_f32); + case Type::F64: + return std::isnan(imm_values[0].imm_f64); + case Type::F32x2: + return std::isnan(imm_values[0].imm_f32) || std::isnan(imm_values[1].imm_f32); + case Type::F64x2: + return std::isnan(imm_values[0].imm_f64) || std::isnan(imm_values[1].imm_f64); + case Type::F32x3: + return std::isnan(imm_values[0].imm_f32) || std::isnan(imm_values[1].imm_f32) || + std::isnan(imm_values[2].imm_f32); + case Type::F64x3: + return std::isnan(imm_values[0].imm_f64) || std::isnan(imm_values[1].imm_f64) || + std::isnan(imm_values[2].imm_f64); + case Type::F32x4: + return std::isnan(imm_values[0].imm_f32) || std::isnan(imm_values[1].imm_f32) || + std::isnan(imm_values[2].imm_f32) || std::isnan(imm_values[3].imm_f32); + case Type::F64x4: + return std::isnan(imm_values[0].imm_f64) || std::isnan(imm_values[1].imm_f64) || + std::isnan(imm_values[2].imm_f64) || std::isnan(imm_values[3].imm_f64); + default: + UNREACHABLE_MSG("Invalid type {}", type); + } +} + +ImmValue ImmValue::fma(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + ASSERT(a.type == b.type && b.type == c.type); + switch (a.type) { + case Type::F32: + return ImmValue( + std::fma(a.imm_values[0].imm_f32, b.imm_values[0].imm_f32, c.imm_values[0].imm_f32)); + case Type::F64: + return ImmValue( + std::fma(a.imm_values[0].imm_f64, b.imm_values[0].imm_f64, c.imm_values[0].imm_f64)); + case Type::F32x2: + return ImmValue( + std::fma(a.imm_values[0].imm_f32, b.imm_values[0].imm_f32, c.imm_values[0].imm_f32), + std::fma(a.imm_values[1].imm_f32, b.imm_values[1].imm_f32, c.imm_values[1].imm_f32)); + case Type::F64x2: + return ImmValue( + std::fma(a.imm_values[0].imm_f64, b.imm_values[0].imm_f64, c.imm_values[0].imm_f64), + std::fma(a.imm_values[1].imm_f64, b.imm_values[1].imm_f64, c.imm_values[1].imm_f64)); + case Type::F32x3: + return ImmValue( + std::fma(a.imm_values[0].imm_f32, b.imm_values[0].imm_f32, c.imm_values[0].imm_f32), + std::fma(a.imm_values[1].imm_f32, b.imm_values[1].imm_f32, c.imm_values[1].imm_f32), + std::fma(a.imm_values[2].imm_f32, b.imm_values[2].imm_f32, c.imm_values[2].imm_f32)); + case Type::F64x3: + return ImmValue( + std::fma(a.imm_values[0].imm_f64, b.imm_values[0].imm_f64, c.imm_values[0].imm_f64), + std::fma(a.imm_values[1].imm_f64, b.imm_values[1].imm_f64, c.imm_values[1].imm_f64), + std::fma(a.imm_values[2].imm_f64, b.imm_values[2].imm_f64, c.imm_values[2].imm_f64)); + case Type::F32x4: + return ImmValue( + std::fma(a.imm_values[0].imm_f32, b.imm_values[0].imm_f32, c.imm_values[0].imm_f32), + std::fma(a.imm_values[1].imm_f32, b.imm_values[1].imm_f32, c.imm_values[1].imm_f32), + std::fma(a.imm_values[2].imm_f32, b.imm_values[2].imm_f32, c.imm_values[2].imm_f32), + std::fma(a.imm_values[3].imm_f32, b.imm_values[3].imm_f32, c.imm_values[3].imm_f32)); + case Type::F64x4: + return ImmValue( + std::fma(a.imm_values[0].imm_f64, b.imm_values[0].imm_f64, c.imm_values[0].imm_f64), + std::fma(a.imm_values[1].imm_f64, b.imm_values[1].imm_f64, c.imm_values[1].imm_f64), + std::fma(a.imm_values[2].imm_f64, b.imm_values[2].imm_f64, c.imm_values[2].imm_f64), + std::fma(a.imm_values[3].imm_f64, b.imm_values[3].imm_f64, c.imm_values[3].imm_f64)); + default: + UNREACHABLE_MSG("Invalid type {}", a.type); + } +} + +bool ImmValue::IsSupportedValue(const IR::Value& value) noexcept { + switch (value.Type()) { + case IR::Type::U1: + case IR::Type::U8: + case IR::Type::U16: + case IR::Type::U32: + case IR::Type::U64: + case IR::Type::F32: + case IR::Type::F64: + return true; + default: + return false; + } +} + } // namespace Shader::IR namespace std { diff --git a/src/shader_recompiler/ir/compute_value/imm_value.h b/src/shader_recompiler/ir/compute_value/imm_value.h index 7ece9f48e..78696d83a 100644 --- a/src/shader_recompiler/ir/compute_value/imm_value.h +++ b/src/shader_recompiler/ir/compute_value/imm_value.h @@ -19,6 +19,7 @@ namespace Shader::IR { class ImmValue { public: ImmValue() noexcept = default; + ImmValue(const ImmValue& value) noexcept = default; explicit ImmValue(const IR::Value& value) noexcept; explicit ImmValue(bool value) noexcept; explicit ImmValue(u8 value) noexcept; @@ -43,6 +44,9 @@ public: ImmValue(f64 value1, f64 value2) noexcept; ImmValue(f64 value1, f64 value2, f64 value3) noexcept; ImmValue(f64 value1, f64 value2, f64 value3, f64 value4) noexcept; + ImmValue(const ImmValue& value1, const ImmValue& value2) noexcept; + ImmValue(const ImmValue& value1, const ImmValue& value2, const ImmValue& value3) noexcept; + ImmValue(const ImmValue& value1, const ImmValue& value2, const ImmValue& value3, const ImmValue& value4) noexcept; [[nodiscard]] bool IsEmpty() const noexcept; [[nodiscard]] IR::Type Type() const noexcept; @@ -53,6 +57,11 @@ public: void SetSigned(bool signed_) noexcept; void SameSignAs(const ImmValue& other) noexcept; + [[nodiscard]] ImmValue Convert(IR::Type new_type, bool new_signed) const noexcept; + [[nodiscard]] ImmValue Bitcast(IR::Type new_type, bool new_signed) const noexcept; + [[nodiscard]] ImmValue Extract(const ImmValue& index) const noexcept; + [[nodiscard]] ImmValue Insert(const ImmValue& value, const ImmValue& index) const noexcept; + [[nodiscard]] bool U1() const; [[nodiscard]] u8 U8() const; [[nodiscard]] s8 S8() const; @@ -78,6 +87,8 @@ public: [[nodiscard]] std::tuple F64x3() const; [[nodiscard]] std::tuple F64x4() const; + ImmValue& operator=(const ImmValue& value) noexcept = default; + [[nodiscard]] bool operator==(const ImmValue& other) const noexcept; [[nodiscard]] bool operator!=(const ImmValue& other) const noexcept; [[nodiscard]] bool operator<(const ImmValue& other) const noexcept; @@ -117,6 +128,26 @@ public: ImmValue& operator<<=(const ImmValue& other) noexcept; ImmValue& operator>>=(const ImmValue& other) noexcept; + [[nodiscard]] ImmValue abs() const noexcept; + [[nodiscard]] ImmValue recip() const noexcept; + [[nodiscard]] ImmValue sqrt() const noexcept; + [[nodiscard]] ImmValue rsqrt() const noexcept; + [[nodiscard]] ImmValue sin() const noexcept; + [[nodiscard]] ImmValue cos() const noexcept; + [[nodiscard]] ImmValue exp2() const noexcept; + [[nodiscard]] ImmValue ldexp(const ImmValue& exp) const noexcept; + [[nodiscard]] ImmValue log2() const noexcept; + [[nodiscard]] ImmValue clamp(const ImmValue& min, const ImmValue& max) const noexcept; + [[nodiscard]] ImmValue floor() const noexcept; + [[nodiscard]] ImmValue ceil() const noexcept; + [[nodiscard]] ImmValue round() const noexcept; + [[nodiscard]] ImmValue trunc() const noexcept; + [[nodiscard]] ImmValue fract() const noexcept; + [[nodiscard]] bool isnan() const noexcept; + + [[nodiscard]] static ImmValue fma(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept; + + static bool IsSupportedValue(const IR::Value& value) noexcept; private: union Value { bool imm_u1; From 79f4648c77e346591feb4cdcc83777d07459e3ee Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Tue, 11 Mar 2025 17:37:50 +0100 Subject: [PATCH 18/49] Use correct types --- .../ir/compute_value/compute.cpp | 10 +- .../ir/compute_value/imm_value.cpp | 23 ++-- .../ir/compute_value/imm_value.h | 117 +++++++++--------- 3 files changed, 75 insertions(+), 75 deletions(-) diff --git a/src/shader_recompiler/ir/compute_value/compute.cpp b/src/shader_recompiler/ir/compute_value/compute.cpp index 14e7518f4..0f1707981 100644 --- a/src/shader_recompiler/ir/compute_value/compute.cpp +++ b/src/shader_recompiler/ir/compute_value/compute.cpp @@ -89,7 +89,7 @@ static void OperationFma(Inst* inst, ImmValueList& inst_values, ComputeImmValues ComputeImmValues(inst->Arg(2), args2, cache); const auto op = [](const ImmValue& a, const ImmValue& b, const ImmValue& c) { - return ImmValue::fma(a, b, c); + return ImmValue::fma(ImmF32F64(a), ImmF32F64(b), ImmF32F64(c)); }; CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1, args2); @@ -242,7 +242,7 @@ static void OperationLdexp(Inst* inst, ImmValueList& inst_values, ComputeImmValu ComputeImmValues(inst->Arg(1), args1, cache); const auto op = [](const ImmValue& a, const ImmValue& b) { - return a.ldexp(b); + return a.ldexp(ImmU32(b)); }; CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); @@ -346,7 +346,7 @@ static void OperationShiftLeft(Inst* inst, ImmValueList& inst_values, ComputeImm ComputeImmValues(inst->Arg(1), args1, cache); const auto op = [](const ImmValue& a, const ImmValue& b) { - return a << b; + return a << ImmU32(b); }; SetSigned(args1, false); @@ -359,7 +359,7 @@ static void OperationShiftRight(Inst* inst, bool is_signed, ImmValueList& inst_v ComputeImmValues(inst->Arg(1), args1, cache); const auto op = [](const ImmValue& a, const ImmValue& b) { - return a >> b; + return a >> ImmU32(b); }; SetSigned(args0, is_signed); @@ -460,7 +460,7 @@ static void OperationCompositeExtract(Inst* inst, ImmValueList& inst_values, Com ComputeImmValues(inst->Arg(1), args1, cache); const auto op = [](const ImmValue& a, const ImmValue& b) { - return a.Extract(b); + return a.Extract(ImmU32(b)); }; SetSigned(args1, false); diff --git a/src/shader_recompiler/ir/compute_value/imm_value.cpp b/src/shader_recompiler/ir/compute_value/imm_value.cpp index 6c433c244..068069d2e 100644 --- a/src/shader_recompiler/ir/compute_value/imm_value.cpp +++ b/src/shader_recompiler/ir/compute_value/imm_value.cpp @@ -345,8 +345,8 @@ ImmValue ImmValue::Bitcast(IR::Type new_type, bool new_signed) const noexcept { return result; } -ImmValue ImmValue::Extract(const ImmValue& index) const noexcept { - ASSERT(index.type == Type::U32 && !index.is_signed && index.imm_values[0].imm_u32 < Dimensions()); +ImmValue ImmValue::Extract(const ImmU32& index) const noexcept { + ASSERT(index.imm_values[0].imm_u32 < Dimensions()); ImmValue result; result.type = BaseType(); result.is_signed = IsSigned(); @@ -354,8 +354,8 @@ ImmValue ImmValue::Extract(const ImmValue& index) const noexcept { return result; } -ImmValue ImmValue::Insert(const ImmValue& value, const ImmValue& index) const noexcept { - ASSERT(index.type == Type::U32 && !index.is_signed && index.imm_values[0].imm_u32 < Dimensions()); +ImmValue ImmValue::Insert(const ImmValue& value, const ImmU32& index) const noexcept { + ASSERT(index.imm_values[0].imm_u32 < Dimensions()); ASSERT(value.type == BaseType() && value.IsSigned() == IsSigned()); ImmValue result = *this; result.imm_values[index.imm_values[0].imm_u32] = value.imm_values[0]; @@ -879,8 +879,7 @@ ImmValue ImmValue::operator^(const ImmValue& other) const noexcept { } } -ImmValue ImmValue::operator<<(const ImmValue& other) const noexcept { - ASSERT(other.type == Type::U32 && other.Dimensions() == 1); +ImmValue ImmValue::operator<<(const ImmU32& other) const noexcept { switch (type) { case Type::U1: return ImmValue(imm_values[0].imm_u1 << other.imm_values[0].imm_u1); @@ -905,8 +904,7 @@ ImmValue ImmValue::operator<<(const ImmValue& other) const noexcept { } } -ImmValue ImmValue::operator>>(const ImmValue& other) const noexcept { - ASSERT(other.type == Type::U32 && other.Dimensions() == 1); +ImmValue ImmValue::operator>>(const ImmU32& other) const noexcept { switch (type) { case Type::U1: return ImmValue(imm_values[0].imm_u1 >> other.imm_values[0].imm_u1); @@ -1170,13 +1168,13 @@ ImmValue& ImmValue::operator^=(const ImmValue& other) noexcept { return *this; } -ImmValue& ImmValue::operator<<=(const ImmValue& other) noexcept { +ImmValue& ImmValue::operator<<=(const ImmU32& other) noexcept { ImmValue result = *this << other; *this = result; return *this; } -ImmValue& ImmValue::operator>>=(const ImmValue& other) noexcept { +ImmValue& ImmValue::operator>>=(const ImmU32& other) noexcept { ImmValue result = *this >> other; *this = result; return *this; @@ -1271,8 +1269,7 @@ ImmValue ImmValue::exp2() const noexcept { } } -ImmValue ImmValue::ldexp(const ImmValue& exp) const noexcept { - ASSERT(type == exp.type); +ImmValue ImmValue::ldexp(const ImmU32& exp) const noexcept { switch (type) { case Type::F32: return ImmValue(std::ldexp(imm_values[0].imm_f32, exp.imm_values[0].imm_s32)); @@ -1414,7 +1411,7 @@ bool ImmValue::isnan() const noexcept { } } -ImmValue ImmValue::fma(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { +ImmValue ImmValue::fma(const ImmF32F64& a, const ImmF32F64& b, const ImmF32F64& c) noexcept { ASSERT(a.type == b.type && b.type == c.type); switch (a.type) { case Type::F32: diff --git a/src/shader_recompiler/ir/compute_value/imm_value.h b/src/shader_recompiler/ir/compute_value/imm_value.h index 78696d83a..017bd339d 100644 --- a/src/shader_recompiler/ir/compute_value/imm_value.h +++ b/src/shader_recompiler/ir/compute_value/imm_value.h @@ -16,6 +16,58 @@ namespace Shader::IR { // Live IR::Value but can only hold immediate values. Additionally, can hold vectors of values. // Has arithmetic operations defined for it. Usefull for computing a value at shader compile time. +template +class TypedImmValue; + +using ImmU1 = TypedImmValue; +using ImmU8 = TypedImmValue; +using ImmS8 = TypedImmValue; +using ImmU16 = TypedImmValue; +using ImmS16 = TypedImmValue; +using ImmU32 = TypedImmValue; +using ImmS32 = TypedImmValue; +using ImmF32 = TypedImmValue; +using ImmU64 = TypedImmValue; +using ImmS64 = TypedImmValue; +using ImmF64 = TypedImmValue; +using ImmS32F32 = TypedImmValue; +using ImmS64F64 = TypedImmValue; +using ImmU32U64 = TypedImmValue; +using ImmS32S64 = TypedImmValue; +using ImmU16U32U64 = TypedImmValue; +using ImmS16S32S64 = TypedImmValue; +using ImmF32F64 = TypedImmValue; +using ImmUAny = TypedImmValue; +using ImmSAny = TypedImmValue; +using ImmU32x2 = TypedImmValue; +using ImmU32x3 = TypedImmValue; +using ImmU32x4 = TypedImmValue; +using ImmS32x2 = TypedImmValue; +using ImmS32x3 = TypedImmValue; +using ImmS32x4 = TypedImmValue; +using ImmF32x2 = TypedImmValue; +using ImmF32x3 = TypedImmValue; +using ImmF32x4 = TypedImmValue; +using ImmF64x2 = TypedImmValue; +using ImmF64x3 = TypedImmValue; +using ImmF64x4 = TypedImmValue; +using ImmS32F32x2 = TypedImmValue; +using ImmS32F32x3 = TypedImmValue; +using ImmS32F32x4 = TypedImmValue; +using ImmF32F64x2 = TypedImmValue; +using ImmF32F64x3 = TypedImmValue; +using ImmF32F64x4 = TypedImmValue; +using ImmU32xAny = TypedImmValue; +using ImmS32xAny = TypedImmValue; +using ImmF32xAny = TypedImmValue; +using ImmF64xAny = TypedImmValue; +using ImmS32F32xAny = TypedImmValue; +using ImmF32F64xAny = TypedImmValue; + class ImmValue { public: ImmValue() noexcept = default; @@ -59,8 +111,8 @@ public: [[nodiscard]] ImmValue Convert(IR::Type new_type, bool new_signed) const noexcept; [[nodiscard]] ImmValue Bitcast(IR::Type new_type, bool new_signed) const noexcept; - [[nodiscard]] ImmValue Extract(const ImmValue& index) const noexcept; - [[nodiscard]] ImmValue Insert(const ImmValue& value, const ImmValue& index) const noexcept; + [[nodiscard]] ImmValue Extract(const ImmU32& index) const noexcept; + [[nodiscard]] ImmValue Insert(const ImmValue& value, const ImmU32& indndex) const noexcept; [[nodiscard]] bool U1() const; [[nodiscard]] u8 U8() const; @@ -104,8 +156,8 @@ public: [[nodiscard]] ImmValue operator&(const ImmValue& other) const noexcept; [[nodiscard]] ImmValue operator|(const ImmValue& other) const noexcept; [[nodiscard]] ImmValue operator^(const ImmValue& other) const noexcept; - [[nodiscard]] ImmValue operator<<(const ImmValue& other) const noexcept; - [[nodiscard]] ImmValue operator>>(const ImmValue& other) const noexcept; + [[nodiscard]] ImmValue operator<<(const ImmU32& other) const noexcept; + [[nodiscard]] ImmValue operator>>(const ImmU32& other) const noexcept; [[nodiscard]] ImmValue operator~() const noexcept; [[nodiscard]] ImmValue operator++(int) noexcept; @@ -125,8 +177,8 @@ public: ImmValue& operator&=(const ImmValue& other) noexcept; ImmValue& operator|=(const ImmValue& other) noexcept; ImmValue& operator^=(const ImmValue& other) noexcept; - ImmValue& operator<<=(const ImmValue& other) noexcept; - ImmValue& operator>>=(const ImmValue& other) noexcept; + ImmValue& operator<<=(const ImmU32& other) noexcept; + ImmValue& operator>>=(const ImmU32& other) noexcept; [[nodiscard]] ImmValue abs() const noexcept; [[nodiscard]] ImmValue recip() const noexcept; @@ -135,7 +187,7 @@ public: [[nodiscard]] ImmValue sin() const noexcept; [[nodiscard]] ImmValue cos() const noexcept; [[nodiscard]] ImmValue exp2() const noexcept; - [[nodiscard]] ImmValue ldexp(const ImmValue& exp) const noexcept; + [[nodiscard]] ImmValue ldexp(const ImmU32& exp) const noexcept; [[nodiscard]] ImmValue log2() const noexcept; [[nodiscard]] ImmValue clamp(const ImmValue& min, const ImmValue& max) const noexcept; [[nodiscard]] ImmValue floor() const noexcept; @@ -145,7 +197,7 @@ public: [[nodiscard]] ImmValue fract() const noexcept; [[nodiscard]] bool isnan() const noexcept; - [[nodiscard]] static ImmValue fma(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept; + [[nodiscard]] static ImmValue fma(const ImmF32F64& a, const ImmF32F64& b, const ImmF32F64& c) noexcept; static bool IsSupportedValue(const IR::Value& value) noexcept; private: @@ -193,55 +245,6 @@ public: } }; -using ImmU1 = TypedImmValue; -using ImmU8 = TypedImmValue; -using ImmS8 = TypedImmValue; -using ImmU16 = TypedImmValue; -using ImmS16 = TypedImmValue; -using ImmU32 = TypedImmValue; -using ImmS32 = TypedImmValue; -using ImmF32 = TypedImmValue; -using ImmU64 = TypedImmValue; -using ImmS64 = TypedImmValue; -using ImmF64 = TypedImmValue; -using ImmS32F32 = TypedImmValue; -using ImmS64F64 = TypedImmValue; -using ImmU32U64 = TypedImmValue; -using ImmS32S64 = TypedImmValue; -using ImmU16U32U64 = TypedImmValue; -using ImmS16S32S64 = TypedImmValue; -using ImmF32F64 = TypedImmValue; -using ImmUAny = TypedImmValue; -using ImmSAny = TypedImmValue; -using ImmU32x2 = TypedImmValue; -using ImmU32x3 = TypedImmValue; -using ImmU32x4 = TypedImmValue; -using ImmS32x2 = TypedImmValue; -using ImmS32x3 = TypedImmValue; -using ImmS32x4 = TypedImmValue; -using ImmF32x2 = TypedImmValue; -using ImmF32x3 = TypedImmValue; -using ImmF32x4 = TypedImmValue; -using ImmF64x2 = TypedImmValue; -using ImmF64x3 = TypedImmValue; -using ImmF64x4 = TypedImmValue; -using ImmS32F32x2 = TypedImmValue; -using ImmS32F32x3 = TypedImmValue; -using ImmS32F32x4 = TypedImmValue; -using ImmF32F64x2 = TypedImmValue; -using ImmF32F64x3 = TypedImmValue; -using ImmF32F64x4 = TypedImmValue; -using ImmU32xAny = TypedImmValue; -using ImmS32xAny = TypedImmValue; -using ImmF32xAny = TypedImmValue; -using ImmF64xAny = TypedImmValue; -using ImmS32F32xAny = TypedImmValue; -using ImmF32F64xAny = TypedImmValue; - inline bool ImmValue::IsEmpty() const noexcept { return type == Type::Void; } From 85b44af27033a1434f95a61ec8888ad39c967868 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Tue, 11 Mar 2025 23:22:31 +0100 Subject: [PATCH 19/49] Operation codes --- .../ir/compute_value/compute.cpp | 185 +++++++++++++++++- 1 file changed, 184 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/ir/compute_value/compute.cpp b/src/shader_recompiler/ir/compute_value/compute.cpp index 0f1707981..829de9d72 100644 --- a/src/shader_recompiler/ir/compute_value/compute.cpp +++ b/src/shader_recompiler/ir/compute_value/compute.cpp @@ -467,13 +467,193 @@ static void OperationCompositeExtract(Inst* inst, ImmValueList& inst_values, Com CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); } +static void OperationInsert(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + ImmValueList args0, args1, args2; + ComputeImmValues(inst->Arg(0), args0, cache); + ComputeImmValues(inst->Arg(1), args1, cache); + ComputeImmValues(inst->Arg(2), args2, cache); + + const auto op = [](const ImmValue& a, const ImmValue& b, const ImmValue& c) { + return a.Insert(b, ImmU32(c)); + }; + + SetSigned(args2, false); + CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1, args2); +} + static void DoInstructionOperation(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { switch (inst->GetOpcode()) { + case Opcode::CompositeConstructU32x2: + case Opcode::CompositeConstructU32x2x2: + case Opcode::CompositeConstructF16x2: + case Opcode::CompositeConstructF32x2: + case Opcode::CompositeConstructF32x2x2: + case Opcode::CompositeConstructF64x2: + OperationCompositeConstruct<2>(inst, inst_values, cache); + break; + case Opcode::CompositeConstructU32x3: + case Opcode::CompositeConstructF16x3: + case Opcode::CompositeConstructF32x3: + case Opcode::CompositeConstructF64x3: + OperationCompositeConstruct<3>(inst, inst_values, cache); + break; + case Opcode::CompositeConstructU32x4: + case Opcode::CompositeConstructF16x4: + case Opcode::CompositeConstructF32x4: + case Opcode::CompositeConstructF64x4: + OperationCompositeConstruct<4>(inst, inst_values, cache); + break; + case Opcode::CompositeExtractU32x2: + case Opcode::CompositeExtractU32x3: + case Opcode::CompositeExtractU32x4: + case Opcode::CompositeExtractF16x2: + case Opcode::CompositeExtractF16x3: + case Opcode::CompositeExtractF16x4: + case Opcode::CompositeExtractF32x2: + case Opcode::CompositeExtractF32x3: + case Opcode::CompositeExtractF32x4: + case Opcode::CompositeExtractF64x2: + case Opcode::CompositeExtractF64x3: + case Opcode::CompositeExtractF64x4: + OperationCompositeExtract(inst, inst_values, cache); + break; + case Opcode::CompositeInsertU32x2: + case Opcode::CompositeInsertU32x3: + case Opcode::CompositeInsertU32x4: + case Opcode::CompositeInsertF16x2: + case Opcode::CompositeInsertF16x3: + case Opcode::CompositeInsertF16x4: + case Opcode::CompositeInsertF32x2: + case Opcode::CompositeInsertF32x3: + case Opcode::CompositeInsertF32x4: + case Opcode::CompositeInsertF64x2: + case Opcode::CompositeInsertF64x3: + case Opcode::CompositeInsertF64x4: + OperationInsert(inst, inst_values, cache); + break; + case Opcode::BitCastU16F16: + OperationBitCast(inst, IR::Type::U16, false, inst_values, cache); + break; + case Opcode::BitCastU32F32: + OperationBitCast(inst, IR::Type::U32, false, inst_values, cache); + break; + case Opcode::BitCastU64F64: + OperationBitCast(inst, IR::Type::U64, false, inst_values, cache); + break; + case Opcode::BitCastF16U16: + OperationBitCast(inst, IR::Type::F16, true, inst_values, cache); + break; + case Opcode::BitCastF32U32: + OperationBitCast(inst, IR::Type::F32, true, inst_values, cache); + break; + case Opcode::BitCastF64U64: + OperationBitCast(inst, IR::Type::F64, true, inst_values, cache); + break; + case Opcode::FPAbs32: + case Opcode::FPAbs64: + OperationAbs(inst, inst_values, cache); + break; + case Opcode::FPAdd32: + case Opcode::FPAdd64: + OperationAdd(inst, false, inst_values, cache); + break; + case Opcode::FPSub32: + OperationSub(inst, false, inst_values, cache); + break; + case Opcode::FPMul32: + case Opcode::FPMul64: + OperationMul(inst, false, inst_values, cache); + break; + case Opcode::FPDiv32: + case Opcode::FPDiv64: + OperationDiv(inst, false, inst_values, cache); + break; + case Opcode::FPFma32: + case Opcode::FPFma64: + OperationFma(inst, inst_values, cache); + break; + case Opcode::FPMin32: + case Opcode::FPMin64: + OperationMin(inst, false, inst_values, cache); + break; + case Opcode::FPMax32: + case Opcode::FPMax64: + OperationMax(inst, false, inst_values, cache); + break; + case Opcode::FPNeg32: + case Opcode::FPNeg64: + OperationNeg(inst, inst_values, cache); + break; + case Opcode::FPRecip32: + case Opcode::FPRecip64: + OperationRecip(inst, inst_values, cache); + break; + case Opcode::FPRecipSqrt32: + case Opcode::FPRecipSqrt64: + OperationRecipSqrt(inst, inst_values, cache); + break; + case Opcode::FPSqrt: + OperationSqrt(inst, inst_values, cache); + break; + case Opcode::FPSin: + OperationSin(inst, inst_values, cache); + break; + case Opcode::FPCos: + OperationCos(inst, inst_values, cache); + break; + case Opcode::FPExp2: + OperationExp2(inst, inst_values, cache); + break; + case Opcode::FPLdexp: + OperationLdexp(inst, inst_values, cache); + break; + case Opcode::FPLog2: + OperationLog2(inst, inst_values, cache); + break; + case Opcode::FPClamp32: + case Opcode::FPClamp64: + OperationClamp(inst, false, inst_values, cache); + break; + case Opcode::FPRoundEven32: + case Opcode::FPRoundEven64: + OperationRound(inst, inst_values, cache); + break; + case Opcode::FPFloor32: + case Opcode::FPFloor64: + OperationFloor(inst, inst_values, cache); + break; + case Opcode::FPCeil32: + case Opcode::FPCeil64: + OperationCeil(inst, inst_values, cache); + break; + case Opcode::FPTrunc32: + case Opcode::FPTrunc64: + OperationTrunc(inst, inst_values, cache); + break; + case Opcode::FPFract32: + case Opcode::FPFract64: + OperationFract(inst, inst_values, cache); + break; default: break; } } +static bool IsSelectInst(Inst* inst) { + switch (inst->GetOpcode()) { + case Opcode::SelectU1: + case Opcode::SelectU8: + case Opcode::SelectU16: + case Opcode::SelectU32: + case Opcode::SelectU64: + case Opcode::SelectF32: + case Opcode::SelectF64: + return true; + default: + return false; + } +} + void ComputeImmValues(const Value& value, ImmValueList& values, ComputeImmValuesCache& cache) { Value resolved = value.Resolve(); if (ImmValue::IsSupportedValue(resolved)) { @@ -494,8 +674,11 @@ void ComputeImmValues(const Value& value, ImmValueList& values, ComputeImmValues for (size_t i = 0; i < inst->NumArgs(); ++i) { ComputeImmValues(inst->Arg(i), inst_values, cache); } + } if (IsSelectInst(inst)) { + ComputeImmValues(inst->Arg(1), inst_values, cache); + ComputeImmValues(inst->Arg(2), inst_values, cache); } else { - + DoInstructionOperation(inst, inst_values, cache); } values.insert(inst_values.begin(), inst_values.end()); } From 21aeccad36bb0b489971cf03639920af5343a653 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Wed, 12 Mar 2025 20:20:53 +0100 Subject: [PATCH 20/49] Typo --- src/shader_recompiler/ir/compute_value/imm_value.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shader_recompiler/ir/compute_value/imm_value.h b/src/shader_recompiler/ir/compute_value/imm_value.h index 017bd339d..9ffefc382 100644 --- a/src/shader_recompiler/ir/compute_value/imm_value.h +++ b/src/shader_recompiler/ir/compute_value/imm_value.h @@ -13,7 +13,7 @@ namespace Shader::IR { -// Live IR::Value but can only hold immediate values. Additionally, can hold vectors of values. +// Like IR::Value but can only hold immediate values. Additionally, can hold vectors of values. // Has arithmetic operations defined for it. Usefull for computing a value at shader compile time. template From 47cc2752030043d0a0ad02bd3bab2c7fdd102142 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Wed, 12 Mar 2025 21:02:12 +0100 Subject: [PATCH 21/49] Compute value --- .../ir/compute_value/compute.cpp | 634 ++++++++++-------- .../ir/compute_value/compute.h | 6 +- .../ir/compute_value/imm_value.cpp | 65 +- .../ir/compute_value/imm_value.h | 15 +- 4 files changed, 403 insertions(+), 317 deletions(-) diff --git a/src/shader_recompiler/ir/compute_value/compute.cpp b/src/shader_recompiler/ir/compute_value/compute.cpp index 829de9d72..7ba40d5bb 100644 --- a/src/shader_recompiler/ir/compute_value/compute.cpp +++ b/src/shader_recompiler/ir/compute_value/compute.cpp @@ -11,8 +11,8 @@ namespace Shader::IR { template static void CartesianInvokeImpl(Func func, OutputIt out_it, - std::tuple& arglists_its, - const std::tuple& arglists_tuple) { + std::tuple& arglists_its, + const std::tuple& arglists_tuple) { if constexpr (Level == N) { auto get_tuple = [&](std::index_sequence) { return std::forward_as_tuple(*std::get(arglists_its)...); @@ -23,7 +23,8 @@ static void CartesianInvokeImpl(Func func, OutputIt out_it, const auto& arglist = std::get(arglists_tuple); for (auto it = arglist.begin(); it != arglist.end(); ++it) { std::get(arglists_its) = it; - CartesianInvokeImpl(func, out_it, arglists_its, arglists_tuple); + CartesianInvokeImpl( + func, out_it, arglists_its, arglists_tuple); } } } @@ -34,7 +35,8 @@ static void CartesianInvoke(Func func, OutputIt out_it, const ArgLists&... arg_l const std::tuple arglists_tuple = std::forward_as_tuple(arg_lists...); std::tuple arglists_it; - CartesianInvokeImpl(func, out_it, arglists_it, arglists_tuple); + CartesianInvokeImpl(func, out_it, arglists_it, + arglists_tuple); } static void SetSigned(ImmValueList& values, bool is_signed) { @@ -47,35 +49,31 @@ static void OperationAbs(Inst* inst, ImmValueList& inst_values, ComputeImmValues ImmValueList args; ComputeImmValues(inst->Arg(0), args, cache); - const auto op = [](const ImmValue& a) { - return a.abs(); - }; + const auto op = [](const ImmValue& a) { return a.abs(); }; std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); } -static void OperationAdd(Inst* inst, bool is_signed, ImmValueList& inst_values, ComputeImmValuesCache& cache) { +static void OperationAdd(Inst* inst, bool is_signed, ImmValueList& inst_values, + ComputeImmValuesCache& cache) { ImmValueList args0, args1; ComputeImmValues(inst->Arg(0), args0, cache); ComputeImmValues(inst->Arg(1), args1, cache); - const auto op = [](const ImmValue& a, const ImmValue& b) { - return a + b; - }; + const auto op = [](const ImmValue& a, const ImmValue& b) { return a + b; }; SetSigned(args0, is_signed); SetSigned(args1, is_signed); CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); } -static void OperationSub(Inst* inst, bool is_signed, ImmValueList& inst_values, ComputeImmValuesCache& cache) { +static void OperationSub(Inst* inst, bool is_signed, ImmValueList& inst_values, + ComputeImmValuesCache& cache) { ImmValueList args0, args1; ComputeImmValues(inst->Arg(0), args0, cache); ComputeImmValues(inst->Arg(1), args1, cache); - const auto op = [](const ImmValue& a, const ImmValue& b) { - return a - b; - }; + const auto op = [](const ImmValue& a, const ImmValue& b) { return a - b; }; SetSigned(args0, is_signed); SetSigned(args1, is_signed); @@ -95,7 +93,8 @@ static void OperationFma(Inst* inst, ImmValueList& inst_values, ComputeImmValues CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1, args2); } -static void OperationMin(Inst* inst, bool is_signed, ImmValueList& inst_values, ComputeImmValuesCache& cache) { +static void OperationMin(Inst* inst, bool is_signed, ImmValueList& inst_values, + ComputeImmValuesCache& cache) { ImmValueList args0, args1, is_legacy_args; ComputeImmValues(inst->Arg(0), args0, cache); ComputeImmValues(inst->Arg(1), args1, cache); @@ -107,18 +106,22 @@ static void OperationMin(Inst* inst, bool is_signed, ImmValueList& inst_values, const auto op = [](const ImmValue& a, const ImmValue& b, const ImmValue& is_legacy) { if (is_legacy.U1()) { - if (a.isnan()) return b; - if (b.isnan()) return a; + if (a.isnan()) + return b; + if (b.isnan()) + return a; } return std::min(a, b); }; SetSigned(args0, is_signed); SetSigned(args1, is_signed); - CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1, is_legacy_args); + CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1, + is_legacy_args); } -static void OperationMax(Inst* inst, bool is_signed, ImmValueList& inst_values, ComputeImmValuesCache& cache) { +static void OperationMax(Inst* inst, bool is_signed, ImmValueList& inst_values, + ComputeImmValuesCache& cache) { ImmValueList args0, args1, is_legacy_args; ComputeImmValues(inst->Arg(0), args0, cache); ComputeImmValues(inst->Arg(1), args1, cache); @@ -130,39 +133,53 @@ static void OperationMax(Inst* inst, bool is_signed, ImmValueList& inst_values, const auto op = [](const ImmValue& a, const ImmValue& b, const ImmValue& is_legacy) { if (is_legacy.U1()) { - if (a.isnan()) return b; - if (b.isnan()) return a; + if (a.isnan()) + return b; + if (b.isnan()) + return a; } return std::max(a, b); }; SetSigned(args0, is_signed); SetSigned(args1, is_signed); - CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1, is_legacy_args); + CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1, + is_legacy_args); } -static void OperationMul(Inst* inst, bool is_signed, ImmValueList& inst_values, ComputeImmValuesCache& cache) { +static void OperationMul(Inst* inst, bool is_signed, ImmValueList& inst_values, + ComputeImmValuesCache& cache) { ImmValueList args0, args1; ComputeImmValues(inst->Arg(0), args0, cache); ComputeImmValues(inst->Arg(1), args1, cache); - const auto op = [](const ImmValue& a, const ImmValue& b) { - return a * b; - }; + const auto op = [](const ImmValue& a, const ImmValue& b) { return a * b; }; SetSigned(args0, is_signed); SetSigned(args1, is_signed); CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); } -static void OperationDiv(Inst* inst, bool is_signed, ImmValueList& inst_values, ComputeImmValuesCache& cache) { +static void OperationDiv(Inst* inst, bool is_signed, ImmValueList& inst_values, + ComputeImmValuesCache& cache) { ImmValueList args0, args1; ComputeImmValues(inst->Arg(0), args0, cache); ComputeImmValues(inst->Arg(1), args1, cache); - const auto op = [](const ImmValue& a, const ImmValue& b) { - return a / b; - }; + const auto op = [](const ImmValue& a, const ImmValue& b) { return a / b; }; + + SetSigned(args0, is_signed); + SetSigned(args1, is_signed); + CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); +} + +static void OperationMod(Inst* inst, bool is_signed, ImmValueList& inst_values, + ComputeImmValuesCache& cache) { + ImmValueList args0, args1; + ComputeImmValues(inst->Arg(0), args0, cache); + ComputeImmValues(inst->Arg(1), args1, cache); + + const auto op = [](const ImmValue& a, const ImmValue& b) { return a % b; }; SetSigned(args0, is_signed); SetSigned(args1, is_signed); @@ -173,9 +190,7 @@ static void OperationNeg(Inst* inst, ImmValueList& inst_values, ComputeImmValues ImmValueList args; ComputeImmValues(inst->Arg(0), args, cache); - const auto op = [](const ImmValue& a) { - return -a; - }; + const auto op = [](const ImmValue& a) { return -a; }; SetSigned(args, true); std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); @@ -185,20 +200,17 @@ static void OperationRecip(Inst* inst, ImmValueList& inst_values, ComputeImmValu ImmValueList args; ComputeImmValues(inst->Arg(0), args, cache); - const auto op = [](const ImmValue& a) { - return a.recip(); - }; + const auto op = [](const ImmValue& a) { return a.recip(); }; std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); } -static void OperationRecipSqrt(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { +static void OperationRecipSqrt(Inst* inst, ImmValueList& inst_values, + ComputeImmValuesCache& cache) { ImmValueList args; ComputeImmValues(inst->Arg(0), args, cache); - const auto op = [](const ImmValue& a) { - return a.rsqrt(); - }; + const auto op = [](const ImmValue& a) { return a.rsqrt(); }; std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); } @@ -207,9 +219,7 @@ static void OperationSqrt(Inst* inst, ImmValueList& inst_values, ComputeImmValue ImmValueList args; ComputeImmValues(inst->Arg(0), args, cache); - const auto op = [](const ImmValue& a) { - return a.sqrt(); - }; + const auto op = [](const ImmValue& a) { return a.sqrt(); }; std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); } @@ -218,9 +228,7 @@ static void OperationSin(Inst* inst, ImmValueList& inst_values, ComputeImmValues ImmValueList args; ComputeImmValues(inst->Arg(0), args, cache); - const auto op = [](const ImmValue& a) { - return a.sin(); - }; + const auto op = [](const ImmValue& a) { return a.sin(); }; std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); } @@ -229,9 +237,7 @@ static void OperationExp2(Inst* inst, ImmValueList& inst_values, ComputeImmValue ImmValueList args; ComputeImmValues(inst->Arg(0), args, cache); - const auto op = [](const ImmValue& a) { - return a.exp2(); - }; + const auto op = [](const ImmValue& a) { return a.exp2(); }; std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); } @@ -241,9 +247,7 @@ static void OperationLdexp(Inst* inst, ImmValueList& inst_values, ComputeImmValu ComputeImmValues(inst->Arg(0), args0, cache); ComputeImmValues(inst->Arg(1), args1, cache); - const auto op = [](const ImmValue& a, const ImmValue& b) { - return a.ldexp(ImmU32(b)); - }; + const auto op = [](const ImmValue& a, const ImmValue& b) { return a.ldexp(ImmU32(b)); }; CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); } @@ -252,9 +256,7 @@ static void OperationCos(Inst* inst, ImmValueList& inst_values, ComputeImmValues ImmValueList args; ComputeImmValues(inst->Arg(0), args, cache); - const auto op = [](const ImmValue& a) { - return a.cos(); - }; + const auto op = [](const ImmValue& a) { return a.cos(); }; std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); } @@ -263,14 +265,13 @@ static void OperationLog2(Inst* inst, ImmValueList& inst_values, ComputeImmValue ImmValueList args; ComputeImmValues(inst->Arg(0), args, cache); - const auto op = [](const ImmValue& a) { - return a.log2(); - }; + const auto op = [](const ImmValue& a) { return a.log2(); }; std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); } -static void OperationClamp(Inst* inst, bool is_signed, ImmValueList& inst_values, ComputeImmValuesCache& cache) { +static void OperationClamp(Inst* inst, bool is_signed, ImmValueList& inst_values, + ComputeImmValuesCache& cache) { ImmValueList args0, args1, args2; ComputeImmValues(inst->Arg(0), args0, cache); ComputeImmValues(inst->Arg(1), args1, cache); @@ -290,9 +291,7 @@ static void OperationRound(Inst* inst, ImmValueList& inst_values, ComputeImmValu ImmValueList args; ComputeImmValues(inst->Arg(0), args, cache); - const auto op = [](const ImmValue& a) { - return a.round(); - }; + const auto op = [](const ImmValue& a) { return a.round(); }; std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); } @@ -301,9 +300,7 @@ static void OperationFloor(Inst* inst, ImmValueList& inst_values, ComputeImmValu ImmValueList args; ComputeImmValues(inst->Arg(0), args, cache); - const auto op = [](const ImmValue& a) { - return a.floor(); - }; + const auto op = [](const ImmValue& a) { return a.floor(); }; std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); } @@ -312,9 +309,7 @@ static void OperationCeil(Inst* inst, ImmValueList& inst_values, ComputeImmValue ImmValueList args; ComputeImmValues(inst->Arg(0), args, cache); - const auto op = [](const ImmValue& a) { - return a.ceil(); - }; + const auto op = [](const ImmValue& a) { return a.ceil(); }; std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); } @@ -323,9 +318,7 @@ static void OperationTrunc(Inst* inst, ImmValueList& inst_values, ComputeImmValu ImmValueList args; ComputeImmValues(inst->Arg(0), args, cache); - const auto op = [](const ImmValue& a) { - return a.trunc(); - }; + const auto op = [](const ImmValue& a) { return a.trunc(); }; std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); } @@ -334,87 +327,80 @@ static void OperationFract(Inst* inst, ImmValueList& inst_values, ComputeImmValu ImmValueList args; ComputeImmValues(inst->Arg(0), args, cache); - const auto op = [](const ImmValue& a) { - return a.fract(); - }; + const auto op = [](const ImmValue& a) { return a.fract(); }; std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); } -static void OperationShiftLeft(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { +static void OperationShiftLeft(Inst* inst, ImmValueList& inst_values, + ComputeImmValuesCache& cache) { ImmValueList args0, args1; ComputeImmValues(inst->Arg(0), args0, cache); ComputeImmValues(inst->Arg(1), args1, cache); - const auto op = [](const ImmValue& a, const ImmValue& b) { - return a << ImmU32(b); - }; + const auto op = [](const ImmValue& a, const ImmValue& b) { return a << ImmU32(b); }; SetSigned(args1, false); CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); } -static void OperationShiftRight(Inst* inst, bool is_signed, ImmValueList& inst_values, ComputeImmValuesCache& cache) { +static void OperationShiftRight(Inst* inst, bool is_signed, ImmValueList& inst_values, + ComputeImmValuesCache& cache) { ImmValueList args0, args1; ComputeImmValues(inst->Arg(0), args0, cache); ComputeImmValues(inst->Arg(1), args1, cache); - const auto op = [](const ImmValue& a, const ImmValue& b) { - return a >> ImmU32(b); - }; + const auto op = [](const ImmValue& a, const ImmValue& b) { return a >> ImmU32(b); }; SetSigned(args0, is_signed); SetSigned(args1, false); CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); } -static void OperationBitwiseNot(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { +static void OperationBitwiseNot(Inst* inst, ImmValueList& inst_values, + ComputeImmValuesCache& cache) { ImmValueList args; ComputeImmValues(inst->Arg(0), args, cache); - const auto op = [](const ImmValue& a) { - return ~a; - }; + const auto op = [](const ImmValue& a) { return ~a; }; std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); } -static void OperationBitwiseAnd(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { +static void OperationBitwiseAnd(Inst* inst, ImmValueList& inst_values, + ComputeImmValuesCache& cache) { ImmValueList args0, args1; ComputeImmValues(inst->Arg(0), args0, cache); ComputeImmValues(inst->Arg(1), args1, cache); - const auto op = [](const ImmValue& a, const ImmValue& b) { - return a & b; - }; + const auto op = [](const ImmValue& a, const ImmValue& b) { return a & b; }; CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); } -static void OperationBitwiseOr(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { +static void OperationBitwiseOr(Inst* inst, ImmValueList& inst_values, + ComputeImmValuesCache& cache) { ImmValueList args0, args1; ComputeImmValues(inst->Arg(0), args0, cache); ComputeImmValues(inst->Arg(1), args1, cache); - const auto op = [](const ImmValue& a, const ImmValue& b) { - return a | b; - }; + const auto op = [](const ImmValue& a, const ImmValue& b) { return a | b; }; CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); } -static void OperationBitwiseXor(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { +static void OperationBitwiseXor(Inst* inst, ImmValueList& inst_values, + ComputeImmValuesCache& cache) { ImmValueList args0, args1; ComputeImmValues(inst->Arg(0), args0, cache); ComputeImmValues(inst->Arg(1), args1, cache); - const auto op = [](const ImmValue& a, const ImmValue& b) { - return a ^ b; - }; + const auto op = [](const ImmValue& a, const ImmValue& b) { return a ^ b; }; CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); } -static void OperationConvert(Inst* inst, bool is_signed, Type new_type, bool new_signed, ImmValueList& inst_values, ComputeImmValuesCache& cache) { +static void OperationConvert(Inst* inst, bool is_signed, Type new_type, bool new_signed, + ImmValueList& inst_values, ComputeImmValuesCache& cache) { ImmValueList args; ComputeImmValues(inst->Arg(0), args, cache); @@ -426,7 +412,8 @@ static void OperationConvert(Inst* inst, bool is_signed, Type new_type, bool new std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); } -static void OperationBitCast(Inst* inst, Type new_type, bool new_signed, ImmValueList& inst_values, ComputeImmValuesCache& cache) { +static void OperationBitCast(Inst* inst, Type new_type, bool new_signed, ImmValueList& inst_values, + ComputeImmValuesCache& cache) { ImmValueList args; ComputeImmValues(inst->Arg(0), args, cache); @@ -437,16 +424,15 @@ static void OperationBitCast(Inst* inst, Type new_type, bool new_signed, ImmValu std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); } -template -static void OperationCompositeConstruct(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { +template +static void OperationCompositeConstruct(Inst* inst, ImmValueList& inst_values, + ComputeImmValuesCache& cache) { std::array args; for (size_t i = 0; i < N; ++i) { ComputeImmValues(inst->Arg(i), args[i], cache); } - const auto op = [](const Args&... args) { - return ImmValue(args...); - }; + const auto op = [](const Args&... args) { return ImmValue(args...); }; const auto call_cartesian = [&](std::index_sequence) { CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args[I]...); @@ -454,14 +440,13 @@ static void OperationCompositeConstruct(Inst* inst, ImmValueList& inst_values, C call_cartesian(std::make_index_sequence{}); } -static void OperationCompositeExtract(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { +static void OperationCompositeExtract(Inst* inst, ImmValueList& inst_values, + ComputeImmValuesCache& cache) { ImmValueList args0, args1; ComputeImmValues(inst->Arg(0), args0, cache); ComputeImmValues(inst->Arg(1), args1, cache); - const auto op = [](const ImmValue& a, const ImmValue& b) { - return a.Extract(ImmU32(b)); - }; + const auto op = [](const ImmValue& a, const ImmValue& b) { return a.Extract(ImmU32(b)); }; SetSigned(args1, false); CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); @@ -481,176 +466,274 @@ static void OperationInsert(Inst* inst, ImmValueList& inst_values, ComputeImmVal CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1, args2); } -static void DoInstructionOperation(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { +static void DoInstructionOperation(Inst* inst, ImmValueList& inst_values, + ComputeImmValuesCache& cache) { switch (inst->GetOpcode()) { - case Opcode::CompositeConstructU32x2: - case Opcode::CompositeConstructU32x2x2: - case Opcode::CompositeConstructF16x2: - case Opcode::CompositeConstructF32x2: - case Opcode::CompositeConstructF32x2x2: - case Opcode::CompositeConstructF64x2: - OperationCompositeConstruct<2>(inst, inst_values, cache); - break; - case Opcode::CompositeConstructU32x3: - case Opcode::CompositeConstructF16x3: - case Opcode::CompositeConstructF32x3: - case Opcode::CompositeConstructF64x3: - OperationCompositeConstruct<3>(inst, inst_values, cache); - break; - case Opcode::CompositeConstructU32x4: - case Opcode::CompositeConstructF16x4: - case Opcode::CompositeConstructF32x4: - case Opcode::CompositeConstructF64x4: - OperationCompositeConstruct<4>(inst, inst_values, cache); - break; - case Opcode::CompositeExtractU32x2: - case Opcode::CompositeExtractU32x3: - case Opcode::CompositeExtractU32x4: - case Opcode::CompositeExtractF16x2: - case Opcode::CompositeExtractF16x3: - case Opcode::CompositeExtractF16x4: - case Opcode::CompositeExtractF32x2: - case Opcode::CompositeExtractF32x3: - case Opcode::CompositeExtractF32x4: - case Opcode::CompositeExtractF64x2: - case Opcode::CompositeExtractF64x3: - case Opcode::CompositeExtractF64x4: - OperationCompositeExtract(inst, inst_values, cache); - break; - case Opcode::CompositeInsertU32x2: - case Opcode::CompositeInsertU32x3: - case Opcode::CompositeInsertU32x4: - case Opcode::CompositeInsertF16x2: - case Opcode::CompositeInsertF16x3: - case Opcode::CompositeInsertF16x4: - case Opcode::CompositeInsertF32x2: - case Opcode::CompositeInsertF32x3: - case Opcode::CompositeInsertF32x4: - case Opcode::CompositeInsertF64x2: - case Opcode::CompositeInsertF64x3: - case Opcode::CompositeInsertF64x4: - OperationInsert(inst, inst_values, cache); - break; - case Opcode::BitCastU16F16: - OperationBitCast(inst, IR::Type::U16, false, inst_values, cache); - break; - case Opcode::BitCastU32F32: - OperationBitCast(inst, IR::Type::U32, false, inst_values, cache); - break; - case Opcode::BitCastU64F64: - OperationBitCast(inst, IR::Type::U64, false, inst_values, cache); - break; - case Opcode::BitCastF16U16: - OperationBitCast(inst, IR::Type::F16, true, inst_values, cache); - break; - case Opcode::BitCastF32U32: - OperationBitCast(inst, IR::Type::F32, true, inst_values, cache); - break; - case Opcode::BitCastF64U64: - OperationBitCast(inst, IR::Type::F64, true, inst_values, cache); - break; - case Opcode::FPAbs32: - case Opcode::FPAbs64: - OperationAbs(inst, inst_values, cache); - break; - case Opcode::FPAdd32: - case Opcode::FPAdd64: - OperationAdd(inst, false, inst_values, cache); - break; - case Opcode::FPSub32: - OperationSub(inst, false, inst_values, cache); - break; - case Opcode::FPMul32: - case Opcode::FPMul64: - OperationMul(inst, false, inst_values, cache); - break; - case Opcode::FPDiv32: - case Opcode::FPDiv64: - OperationDiv(inst, false, inst_values, cache); - break; - case Opcode::FPFma32: - case Opcode::FPFma64: - OperationFma(inst, inst_values, cache); - break; - case Opcode::FPMin32: - case Opcode::FPMin64: - OperationMin(inst, false, inst_values, cache); - break; - case Opcode::FPMax32: - case Opcode::FPMax64: - OperationMax(inst, false, inst_values, cache); - break; - case Opcode::FPNeg32: - case Opcode::FPNeg64: - OperationNeg(inst, inst_values, cache); - break; - case Opcode::FPRecip32: - case Opcode::FPRecip64: - OperationRecip(inst, inst_values, cache); - break; - case Opcode::FPRecipSqrt32: - case Opcode::FPRecipSqrt64: - OperationRecipSqrt(inst, inst_values, cache); - break; - case Opcode::FPSqrt: - OperationSqrt(inst, inst_values, cache); - break; - case Opcode::FPSin: - OperationSin(inst, inst_values, cache); - break; - case Opcode::FPCos: - OperationCos(inst, inst_values, cache); - break; - case Opcode::FPExp2: - OperationExp2(inst, inst_values, cache); - break; - case Opcode::FPLdexp: - OperationLdexp(inst, inst_values, cache); - break; - case Opcode::FPLog2: - OperationLog2(inst, inst_values, cache); - break; - case Opcode::FPClamp32: - case Opcode::FPClamp64: - OperationClamp(inst, false, inst_values, cache); - break; - case Opcode::FPRoundEven32: - case Opcode::FPRoundEven64: - OperationRound(inst, inst_values, cache); - break; - case Opcode::FPFloor32: - case Opcode::FPFloor64: - OperationFloor(inst, inst_values, cache); - break; - case Opcode::FPCeil32: - case Opcode::FPCeil64: - OperationCeil(inst, inst_values, cache); - break; - case Opcode::FPTrunc32: - case Opcode::FPTrunc64: - OperationTrunc(inst, inst_values, cache); - break; - case Opcode::FPFract32: - case Opcode::FPFract64: - OperationFract(inst, inst_values, cache); - break; - default: - break; + case Opcode::CompositeConstructU32x2: + case Opcode::CompositeConstructU32x2x2: + case Opcode::CompositeConstructF16x2: + case Opcode::CompositeConstructF32x2: + case Opcode::CompositeConstructF32x2x2: + case Opcode::CompositeConstructF64x2: + OperationCompositeConstruct<2>(inst, inst_values, cache); + break; + case Opcode::CompositeConstructU32x3: + case Opcode::CompositeConstructF16x3: + case Opcode::CompositeConstructF32x3: + case Opcode::CompositeConstructF64x3: + OperationCompositeConstruct<3>(inst, inst_values, cache); + break; + case Opcode::CompositeConstructU32x4: + case Opcode::CompositeConstructF16x4: + case Opcode::CompositeConstructF32x4: + case Opcode::CompositeConstructF64x4: + OperationCompositeConstruct<4>(inst, inst_values, cache); + break; + case Opcode::CompositeExtractU32x2: + case Opcode::CompositeExtractU32x3: + case Opcode::CompositeExtractU32x4: + case Opcode::CompositeExtractF16x2: + case Opcode::CompositeExtractF16x3: + case Opcode::CompositeExtractF16x4: + case Opcode::CompositeExtractF32x2: + case Opcode::CompositeExtractF32x3: + case Opcode::CompositeExtractF32x4: + case Opcode::CompositeExtractF64x2: + case Opcode::CompositeExtractF64x3: + case Opcode::CompositeExtractF64x4: + OperationCompositeExtract(inst, inst_values, cache); + break; + case Opcode::CompositeInsertU32x2: + case Opcode::CompositeInsertU32x3: + case Opcode::CompositeInsertU32x4: + case Opcode::CompositeInsertF16x2: + case Opcode::CompositeInsertF16x3: + case Opcode::CompositeInsertF16x4: + case Opcode::CompositeInsertF32x2: + case Opcode::CompositeInsertF32x3: + case Opcode::CompositeInsertF32x4: + case Opcode::CompositeInsertF64x2: + case Opcode::CompositeInsertF64x3: + case Opcode::CompositeInsertF64x4: + OperationInsert(inst, inst_values, cache); + break; + case Opcode::BitCastU16F16: + OperationBitCast(inst, IR::Type::U16, false, inst_values, cache); + break; + case Opcode::BitCastU32F32: + OperationBitCast(inst, IR::Type::U32, false, inst_values, cache); + break; + case Opcode::BitCastU64F64: + OperationBitCast(inst, IR::Type::U64, false, inst_values, cache); + break; + case Opcode::BitCastF16U16: + OperationBitCast(inst, IR::Type::F16, true, inst_values, cache); + break; + case Opcode::BitCastF32U32: + OperationBitCast(inst, IR::Type::F32, true, inst_values, cache); + break; + case Opcode::BitCastF64U64: + OperationBitCast(inst, IR::Type::F64, true, inst_values, cache); + break; + case Opcode::FPAbs32: + case Opcode::FPAbs64: + case Opcode::IAbs32: + OperationAbs(inst, inst_values, cache); + break; + case Opcode::FPAdd32: + case Opcode::FPAdd64: + OperationAdd(inst, true, inst_values, cache); + break; + case Opcode::IAdd32: + case Opcode::IAdd64: + OperationAdd(inst, false, inst_values, cache); + break; + case Opcode::FPSub32: + OperationSub(inst, true, inst_values, cache); + break; + case Opcode::ISub32: + case Opcode::ISub64: + OperationSub(inst, false, inst_values, cache); + break; + case Opcode::FPMul32: + case Opcode::FPMul64: + OperationMul(inst, true, inst_values, cache); + break; + case Opcode::IMul32: + case Opcode::IMul64: + OperationMul(inst, false, inst_values, cache); + break; + case Opcode::FPDiv32: + case Opcode::FPDiv64: + case Opcode::SDiv32: + OperationDiv(inst, true, inst_values, cache); + break; + case Opcode::UDiv32: + OperationDiv(inst, false, inst_values, cache); + break; + case Opcode::SMod32: + OperationMod(inst, true, inst_values, cache); + break; + case Opcode::UMod32: + OperationMod(inst, false, inst_values, cache); + break; + case Opcode::INeg32: + case Opcode::INeg64: + OperationNeg(inst, inst_values, cache); + break; + case Opcode::FPFma32: + case Opcode::FPFma64: + OperationFma(inst, inst_values, cache); + break; + case Opcode::FPMin32: + case Opcode::FPMin64: + case Opcode::SMin32: + OperationMin(inst, true, inst_values, cache); + break; + case Opcode::UMin32: + OperationMin(inst, false, inst_values, cache); + break; + case Opcode::FPMax32: + case Opcode::FPMax64: + case Opcode::SMax32: + OperationMax(inst, true, inst_values, cache); + break; + case Opcode::UMax32: + OperationMax(inst, false, inst_values, cache); + break; + case Opcode::FPNeg32: + case Opcode::FPNeg64: + OperationNeg(inst, inst_values, cache); + break; + case Opcode::FPRecip32: + case Opcode::FPRecip64: + OperationRecip(inst, inst_values, cache); + break; + case Opcode::FPRecipSqrt32: + case Opcode::FPRecipSqrt64: + OperationRecipSqrt(inst, inst_values, cache); + break; + case Opcode::FPSqrt: + OperationSqrt(inst, inst_values, cache); + break; + case Opcode::FPSin: + OperationSin(inst, inst_values, cache); + break; + case Opcode::FPCos: + OperationCos(inst, inst_values, cache); + break; + case Opcode::FPExp2: + OperationExp2(inst, inst_values, cache); + break; + case Opcode::FPLdexp: + OperationLdexp(inst, inst_values, cache); + break; + case Opcode::FPLog2: + OperationLog2(inst, inst_values, cache); + break; + case Opcode::FPClamp32: + case Opcode::FPClamp64: + case Opcode::SClamp32: + OperationClamp(inst, true, inst_values, cache); + break; + case Opcode::UClamp32: + OperationClamp(inst, false, inst_values, cache); + break; + case Opcode::FPRoundEven32: + case Opcode::FPRoundEven64: + OperationRound(inst, inst_values, cache); + break; + case Opcode::FPFloor32: + case Opcode::FPFloor64: + OperationFloor(inst, inst_values, cache); + break; + case Opcode::FPCeil32: + case Opcode::FPCeil64: + OperationCeil(inst, inst_values, cache); + break; + case Opcode::FPTrunc32: + case Opcode::FPTrunc64: + OperationTrunc(inst, inst_values, cache); + break; + case Opcode::FPFract32: + case Opcode::FPFract64: + OperationFract(inst, inst_values, cache); + break; + case Opcode::ShiftLeftLogical32: + case Opcode::ShiftLeftLogical64: + OperationShiftLeft(inst, inst_values, cache); + break; + case Opcode::ShiftRightLogical32: + case Opcode::ShiftRightLogical64: + OperationShiftRight(inst, false, inst_values, cache); + break; + case Opcode::ShiftRightArithmetic32: + case Opcode::ShiftRightArithmetic64: + OperationShiftRight(inst, true, inst_values, cache); + break; + case Opcode::BitwiseAnd32: + case Opcode::BitwiseAnd64: + case Opcode::LogicalAnd: + OperationBitwiseAnd(inst, inst_values, cache); + break; + case Opcode::BitwiseOr32: + case Opcode::BitwiseOr64: + case Opcode::LogicalOr: + OperationBitwiseOr(inst, inst_values, cache); + break; + case Opcode::BitwiseXor32: + case Opcode::LogicalXor: + OperationBitwiseXor(inst, inst_values, cache); + break; + case Opcode::BitwiseNot32: + case Opcode::LogicalNot: + OperationBitwiseNot(inst, inst_values, cache); + break; + case Opcode::ConvertU16U32: + OperationConvert(inst, false, Type::U16, false, inst_values, cache); + break; + case Opcode::ConvertS32F32: + case Opcode::ConvertS32F64: + OperationConvert(inst, true, Type::U32, true, inst_values, cache); + break; + case Opcode::ConvertU32F32: + OperationConvert(inst, true, Type::U32, false, inst_values, cache); + break; + case Opcode::ConvertU32U16: + OperationConvert(inst, false, Type::U32, false, inst_values, cache); + break; + case Opcode::ConvertF32F16: + case Opcode::ConvertF32F64: + case Opcode::ConvertF32S32: + OperationConvert(inst, true, Type::F32, true, inst_values, cache); + break; + case Opcode::ConvertF32U32: + OperationConvert(inst, false, Type::F32, true, inst_values, cache); + break; + case Opcode::ConvertF64F32: + case Opcode::ConvertF64S32: + OperationConvert(inst, true, Type::F64, true, inst_values, cache); + break; + case Opcode::ConvertF64U32: + OperationConvert(inst, false, Type::F64, true, inst_values, cache); + break; + default: + break; } } static bool IsSelectInst(Inst* inst) { switch (inst->GetOpcode()) { - case Opcode::SelectU1: - case Opcode::SelectU8: - case Opcode::SelectU16: - case Opcode::SelectU32: - case Opcode::SelectU64: - case Opcode::SelectF32: - case Opcode::SelectF64: - return true; - default: - return false; + case Opcode::SelectU1: + case Opcode::SelectU8: + case Opcode::SelectU16: + case Opcode::SelectU32: + case Opcode::SelectU64: + case Opcode::SelectF32: + case Opcode::SelectF64: + return true; + default: + return false; } } @@ -674,7 +757,8 @@ void ComputeImmValues(const Value& value, ImmValueList& values, ComputeImmValues for (size_t i = 0; i < inst->NumArgs(); ++i) { ComputeImmValues(inst->Arg(i), inst_values, cache); } - } if (IsSelectInst(inst)) { + } + if (IsSelectInst(inst)) { ComputeImmValues(inst->Arg(1), inst_values, cache); ComputeImmValues(inst->Arg(2), inst_values, cache); } else { diff --git a/src/shader_recompiler/ir/compute_value/compute.h b/src/shader_recompiler/ir/compute_value/compute.h index fbfe46575..8b6e7b86b 100644 --- a/src/shader_recompiler/ir/compute_value/compute.h +++ b/src/shader_recompiler/ir/compute_value/compute.h @@ -3,11 +3,15 @@ #pragma once -#include #include +#include #include "shader_recompiler/ir/compute_value/imm_value.h" #include "shader_recompiler/ir/value.h" +// Given a value (inmediate or not), compute all the possible inmediate values +// that can represent. If the value can't be computed statically, the list will +// be empty. + namespace Shader::IR { using ImmValueList = boost::container::flat_set; diff --git a/src/shader_recompiler/ir/compute_value/imm_value.cpp b/src/shader_recompiler/ir/compute_value/imm_value.cpp index 068069d2e..e94533e57 100644 --- a/src/shader_recompiler/ir/compute_value/imm_value.cpp +++ b/src/shader_recompiler/ir/compute_value/imm_value.cpp @@ -180,17 +180,20 @@ ImmValue::ImmValue(const ImmValue& value1, const ImmValue& value2) noexcept ImmValue::ImmValue(const ImmValue& value1, const ImmValue& value2, const ImmValue& value3) noexcept : type{value1.type}, is_signed{value1.is_signed} { - ASSERT(value1.type == value2.type && value1.type == value3.type && value1.is_signed == value2.is_signed && - value1.is_signed == value3.is_signed && value1.Dimensions() == 1); + ASSERT(value1.type == value2.type && value1.type == value3.type && + value1.is_signed == value2.is_signed && value1.is_signed == value3.is_signed && + value1.Dimensions() == 1); imm_values[0] = value1.imm_values[0]; imm_values[1] = value2.imm_values[0]; imm_values[2] = value3.imm_values[0]; } -ImmValue::ImmValue(const ImmValue& value1, const ImmValue& value2, const ImmValue& value3, const ImmValue& value4) noexcept +ImmValue::ImmValue(const ImmValue& value1, const ImmValue& value2, const ImmValue& value3, + const ImmValue& value4) noexcept : type{value1.type}, is_signed{value1.is_signed} { - ASSERT(value1.type == value2.type && value1.type == value3.type && value1.type == value4.type && value1.is_signed == value2.is_signed && - value1.is_signed == value3.is_signed && value1.is_signed == value4.is_signed && value1.Dimensions() == 1); + ASSERT(value1.type == value2.type && value1.type == value3.type && value1.type == value4.type && + value1.is_signed == value2.is_signed && value1.is_signed == value3.is_signed && + value1.is_signed == value4.is_signed && value1.Dimensions() == 1); imm_values[0] = value1.imm_values[0]; imm_values[1] = value2.imm_values[0]; imm_values[2] = value3.imm_values[0]; @@ -280,12 +283,12 @@ ImmValue ImmValue::Convert(IR::Type new_type, bool new_signed) const noexcept { case Type::U32: { if (new_signed) { switch (type) { - case Type::F32: - return ImmValue(static_cast(imm_values[0].imm_f32)); - case Type::F64: - return ImmValue(static_cast(imm_values[0].imm_f64)); - default: - break; + case Type::F32: + return ImmValue(static_cast(imm_values[0].imm_f32)); + case Type::F64: + return ImmValue(static_cast(imm_values[0].imm_f64)); + default: + break; } } else { switch (type) { @@ -884,21 +887,17 @@ ImmValue ImmValue::operator<<(const ImmU32& other) const noexcept { case Type::U1: return ImmValue(imm_values[0].imm_u1 << other.imm_values[0].imm_u1); case Type::U8: - return is_signed - ? ImmValue(imm_values[0].imm_s8 << other.imm_values[0].imm_s8) - : ImmValue(imm_values[0].imm_u8 << other.imm_values[0].imm_u8); + return is_signed ? ImmValue(imm_values[0].imm_s8 << other.imm_values[0].imm_s8) + : ImmValue(imm_values[0].imm_u8 << other.imm_values[0].imm_u8); case Type::U16: - return is_signed - ? ImmValue(imm_values[0].imm_s16 << other.imm_values[0].imm_s16) - : ImmValue(imm_values[0].imm_u16 << other.imm_values[0].imm_u16); + return is_signed ? ImmValue(imm_values[0].imm_s16 << other.imm_values[0].imm_s16) + : ImmValue(imm_values[0].imm_u16 << other.imm_values[0].imm_u16); case Type::U32: - return is_signed - ? ImmValue(imm_values[0].imm_s32 << other.imm_values[0].imm_s32) - : ImmValue(imm_values[0].imm_u32 << other.imm_values[0].imm_u32); + return is_signed ? ImmValue(imm_values[0].imm_s32 << other.imm_values[0].imm_s32) + : ImmValue(imm_values[0].imm_u32 << other.imm_values[0].imm_u32); case Type::U64: - return is_signed - ? ImmValue(imm_values[0].imm_s64 << other.imm_values[0].imm_s64) - : ImmValue(imm_values[0].imm_u64 << other.imm_values[0].imm_u64); + return is_signed ? ImmValue(imm_values[0].imm_s64 << other.imm_values[0].imm_s64) + : ImmValue(imm_values[0].imm_u64 << other.imm_values[0].imm_u64); default: UNREACHABLE_MSG("Invalid type {}", type); } @@ -909,21 +908,17 @@ ImmValue ImmValue::operator>>(const ImmU32& other) const noexcept { case Type::U1: return ImmValue(imm_values[0].imm_u1 >> other.imm_values[0].imm_u1); case Type::U8: - return is_signed - ? ImmValue(imm_values[0].imm_s8 >> other.imm_values[0].imm_s8) - : ImmValue(imm_values[0].imm_u8 >> other.imm_values[0].imm_u8); + return is_signed ? ImmValue(imm_values[0].imm_s8 >> other.imm_values[0].imm_s8) + : ImmValue(imm_values[0].imm_u8 >> other.imm_values[0].imm_u8); case Type::U16: - return is_signed - ? ImmValue(imm_values[0].imm_s16 >> other.imm_values[0].imm_s16) - : ImmValue(imm_values[0].imm_u16 >> other.imm_values[0].imm_u16); + return is_signed ? ImmValue(imm_values[0].imm_s16 >> other.imm_values[0].imm_s16) + : ImmValue(imm_values[0].imm_u16 >> other.imm_values[0].imm_u16); case Type::U32: - return is_signed - ? ImmValue(imm_values[0].imm_s32 >> other.imm_values[0].imm_s32) - : ImmValue(imm_values[0].imm_u32 >> other.imm_values[0].imm_u32); + return is_signed ? ImmValue(imm_values[0].imm_s32 >> other.imm_values[0].imm_s32) + : ImmValue(imm_values[0].imm_u32 >> other.imm_values[0].imm_u32); case Type::U64: - return is_signed - ? ImmValue(imm_values[0].imm_s64 >> other.imm_values[0].imm_s64) - : ImmValue(imm_values[0].imm_u64 >> other.imm_values[0].imm_u64); + return is_signed ? ImmValue(imm_values[0].imm_s64 >> other.imm_values[0].imm_s64) + : ImmValue(imm_values[0].imm_u64 >> other.imm_values[0].imm_u64); default: UNREACHABLE_MSG("Invalid type {}", type); } diff --git a/src/shader_recompiler/ir/compute_value/imm_value.h b/src/shader_recompiler/ir/compute_value/imm_value.h index 9ffefc382..74b9d39b7 100644 --- a/src/shader_recompiler/ir/compute_value/imm_value.h +++ b/src/shader_recompiler/ir/compute_value/imm_value.h @@ -62,10 +62,10 @@ using ImmS32xAny = TypedImmValue; using ImmF64xAny = TypedImmValue; using ImmS32F32xAny = TypedImmValue; using ImmF32F64xAny = TypedImmValue; class ImmValue { @@ -98,7 +98,8 @@ public: ImmValue(f64 value1, f64 value2, f64 value3, f64 value4) noexcept; ImmValue(const ImmValue& value1, const ImmValue& value2) noexcept; ImmValue(const ImmValue& value1, const ImmValue& value2, const ImmValue& value3) noexcept; - ImmValue(const ImmValue& value1, const ImmValue& value2, const ImmValue& value3, const ImmValue& value4) noexcept; + ImmValue(const ImmValue& value1, const ImmValue& value2, const ImmValue& value3, + const ImmValue& value4) noexcept; [[nodiscard]] bool IsEmpty() const noexcept; [[nodiscard]] IR::Type Type() const noexcept; @@ -196,10 +197,12 @@ public: [[nodiscard]] ImmValue trunc() const noexcept; [[nodiscard]] ImmValue fract() const noexcept; [[nodiscard]] bool isnan() const noexcept; - - [[nodiscard]] static ImmValue fma(const ImmF32F64& a, const ImmF32F64& b, const ImmF32F64& c) noexcept; - + + [[nodiscard]] static ImmValue fma(const ImmF32F64& a, const ImmF32F64& b, + const ImmF32F64& c) noexcept; + static bool IsSupportedValue(const IR::Value& value) noexcept; + private: union Value { bool imm_u1; From 301f51e57d35be5274a049154f66fee3ce57553c Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Wed, 19 Mar 2025 23:55:32 +0100 Subject: [PATCH 22/49] ComputeValue rewrite --- CMakeLists.txt | 16 + src/common/cartesian_invoke.h | 43 + src/common/func_traits.h | 1 + .../ir/compute_value/compute.cpp | 747 +----- .../ir/compute_value/compute.h | 10 +- .../ir/compute_value/do_bitcast.cpp | 32 + .../ir/compute_value/do_bitcast.h | 17 + .../ir/compute_value/do_composite.cpp | 230 ++ .../ir/compute_value/do_composite.h | 64 + .../ir/compute_value/do_convert.cpp | 81 + .../ir/compute_value/do_convert.h | 25 + .../ir/compute_value/do_float_operations.cpp | 254 ++ .../ir/compute_value/do_float_operations.h | 62 + .../compute_value/do_integer_operations.cpp | 233 ++ .../ir/compute_value/do_integer_operations.h | 70 + .../compute_value/do_logical_operations.cpp | 29 + .../ir/compute_value/do_logical_operations.h | 15 + .../ir/compute_value/do_nop_functions.h | 210 ++ .../ir/compute_value/do_packing.cpp | 132 + .../ir/compute_value/do_packing.h | 42 + .../ir/compute_value/imm_value.cpp | 2240 ++++++++--------- .../ir/compute_value/imm_value.h | 377 ++- 22 files changed, 2739 insertions(+), 2191 deletions(-) create mode 100644 src/common/cartesian_invoke.h create mode 100644 src/shader_recompiler/ir/compute_value/do_bitcast.cpp create mode 100644 src/shader_recompiler/ir/compute_value/do_bitcast.h create mode 100644 src/shader_recompiler/ir/compute_value/do_composite.cpp create mode 100644 src/shader_recompiler/ir/compute_value/do_composite.h create mode 100644 src/shader_recompiler/ir/compute_value/do_convert.cpp create mode 100644 src/shader_recompiler/ir/compute_value/do_convert.h create mode 100644 src/shader_recompiler/ir/compute_value/do_float_operations.cpp create mode 100644 src/shader_recompiler/ir/compute_value/do_float_operations.h create mode 100644 src/shader_recompiler/ir/compute_value/do_integer_operations.cpp create mode 100644 src/shader_recompiler/ir/compute_value/do_integer_operations.h create mode 100644 src/shader_recompiler/ir/compute_value/do_logical_operations.cpp create mode 100644 src/shader_recompiler/ir/compute_value/do_logical_operations.h create mode 100644 src/shader_recompiler/ir/compute_value/do_nop_functions.h create mode 100644 src/shader_recompiler/ir/compute_value/do_packing.cpp create mode 100644 src/shader_recompiler/ir/compute_value/do_packing.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 5b723f76c..26c03c301 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -630,6 +630,7 @@ set(COMMON src/common/logging/backend.cpp src/common/assert.h src/common/bit_field.h src/common/bounded_threadsafe_queue.h + src/common/cartesian_invoke.h src/common/concepts.h src/common/config.cpp src/common/config.h @@ -845,6 +846,21 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp src/shader_recompiler/ir/compute_value/compute.cpp src/shader_recompiler/ir/compute_value/compute.h + src/shader_recompiler/ir/compute_value/do_bitcast.cpp + src/shader_recompiler/ir/compute_value/do_bitcast.h + src/shader_recompiler/ir/compute_value/do_composite.cpp + src/shader_recompiler/ir/compute_value/do_composite.h + src/shader_recompiler/ir/compute_value/do_convert.cpp + src/shader_recompiler/ir/compute_value/do_convert.h + src/shader_recompiler/ir/compute_value/do_float_operations.cpp + src/shader_recompiler/ir/compute_value/do_float_operations.h + src/shader_recompiler/ir/compute_value/do_integer_operations.cpp + src/shader_recompiler/ir/compute_value/do_integer_operations.h + src/shader_recompiler/ir/compute_value/do_logical_operations.cpp + src/shader_recompiler/ir/compute_value/do_logical_operations.h + src/shader_recompiler/ir/compute_value/do_nop_functions.h + src/shader_recompiler/ir/compute_value/do_packing.cpp + src/shader_recompiler/ir/compute_value/do_packing.h src/shader_recompiler/ir/compute_value/imm_value.cpp src/shader_recompiler/ir/compute_value/imm_value.h src/shader_recompiler/ir/abstract_syntax_list.cpp diff --git a/src/common/cartesian_invoke.h b/src/common/cartesian_invoke.h new file mode 100644 index 000000000..6bbc4cd4a --- /dev/null +++ b/src/common/cartesian_invoke.h @@ -0,0 +1,43 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +namespace Common { +namespace Detail { + +template +void CartesianInvokeImpl(Func func, OutputIt out_it, + std::tuple& arglists_its, + const std::tuple& arglists_tuple) { + if constexpr (Level == N) { + auto get_tuple = [&](std::index_sequence) { + return std::forward_as_tuple(*std::get(arglists_its)...); + }; + *out_it++ = std::move(std::apply(func, get_tuple(std::make_index_sequence{}))); + return; + } else { + const auto& arglist = std::get(arglists_tuple); + for (auto it = arglist.begin(); it != arglist.end(); ++it) { + std::get(arglists_its) = it; + CartesianInvokeImpl( + func, out_it, arglists_its, arglists_tuple); + } + } +} + +} // namespace Detail + +template +void CartesianInvoke(Func func, OutputIt out_it, const ArgLists&... arg_lists) { + constexpr std::size_t N = sizeof...(ArgLists); + const std::tuple arglists_tuple = std::forward_as_tuple(arg_lists...); + + std::tuple arglists_it; + Detail::CartesianInvokeImpl(func, out_it, arglists_it, + arglists_tuple); +} + +} // namespace Common diff --git a/src/common/func_traits.h b/src/common/func_traits.h index 407b2dbe6..c3035d7cc 100644 --- a/src/common/func_traits.h +++ b/src/common/func_traits.h @@ -4,6 +4,7 @@ #pragma once #include +#include namespace Common { diff --git a/src/shader_recompiler/ir/compute_value/compute.cpp b/src/shader_recompiler/ir/compute_value/compute.cpp index 7ba40d5bb..ad01ae799 100644 --- a/src/shader_recompiler/ir/compute_value/compute.cpp +++ b/src/shader_recompiler/ir/compute_value/compute.cpp @@ -2,724 +2,51 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include -#include -#include -#include +#include "common/cartesian_invoke.h" +#include "common/func_traits.h" #include "shader_recompiler/ir/compute_value/compute.h" +#include "shader_recompiler/ir/compute_value/do_bitcast.h" +#include "shader_recompiler/ir/compute_value/do_composite.h" +#include "shader_recompiler/ir/compute_value/do_convert.h" +#include "shader_recompiler/ir/compute_value/do_float_operations.h" +#include "shader_recompiler/ir/compute_value/do_integer_operations.h" +#include "shader_recompiler/ir/compute_value/do_logical_operations.h" +#include "shader_recompiler/ir/compute_value/do_nop_functions.h" +#include "shader_recompiler/ir/compute_value/do_packing.h" -namespace Shader::IR { +namespace Shader::IR::ComputeValue { -template -static void CartesianInvokeImpl(Func func, OutputIt out_it, - std::tuple& arglists_its, - const std::tuple& arglists_tuple) { - if constexpr (Level == N) { - auto get_tuple = [&](std::index_sequence) { - return std::forward_as_tuple(*std::get(arglists_its)...); - }; - *out_it++ = std::move(std::apply(func, get_tuple(std::make_index_sequence{}))); - return; - } else { - const auto& arglist = std::get(arglists_tuple); - for (auto it = arglist.begin(); it != arglist.end(); ++it) { - std::get(arglists_its) = it; - CartesianInvokeImpl( - func, out_it, arglists_its, arglists_tuple); - } +template +static void Invoke(ImmValueList& inst_values, const std::array& args, + std::index_sequence) { + func(inst_values, args[I]...); +} + +template +static void Invoke(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { + using Traits = Common::FuncTraits; + constexpr size_t num_args = Traits::NUM_ARGS - 1; + ASSERT(inst->NumArgs() >= num_args); + std::array args{}; + for (size_t i = 0; i < num_args; ++i) { + Compute(inst->Arg(i), args[i], cache); } -} - -template -static void CartesianInvoke(Func func, OutputIt out_it, const ArgLists&... arg_lists) { - constexpr size_t N = sizeof...(ArgLists); - const std::tuple arglists_tuple = std::forward_as_tuple(arg_lists...); - - std::tuple arglists_it; - CartesianInvokeImpl(func, out_it, arglists_it, - arglists_tuple); -} - -static void SetSigned(ImmValueList& values, bool is_signed) { - for (auto& value : values) { - value.SetSigned(is_signed); - } -} - -static void OperationAbs(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { - ImmValueList args; - ComputeImmValues(inst->Arg(0), args, cache); - - const auto op = [](const ImmValue& a) { return a.abs(); }; - - std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); -} - -static void OperationAdd(Inst* inst, bool is_signed, ImmValueList& inst_values, - ComputeImmValuesCache& cache) { - ImmValueList args0, args1; - ComputeImmValues(inst->Arg(0), args0, cache); - ComputeImmValues(inst->Arg(1), args1, cache); - - const auto op = [](const ImmValue& a, const ImmValue& b) { return a + b; }; - - SetSigned(args0, is_signed); - SetSigned(args1, is_signed); - CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); -} - -static void OperationSub(Inst* inst, bool is_signed, ImmValueList& inst_values, - ComputeImmValuesCache& cache) { - ImmValueList args0, args1; - ComputeImmValues(inst->Arg(0), args0, cache); - ComputeImmValues(inst->Arg(1), args1, cache); - - const auto op = [](const ImmValue& a, const ImmValue& b) { return a - b; }; - - SetSigned(args0, is_signed); - SetSigned(args1, is_signed); - CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); -} - -static void OperationFma(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { - ImmValueList args0, args1, args2; - ComputeImmValues(inst->Arg(0), args0, cache); - ComputeImmValues(inst->Arg(1), args1, cache); - ComputeImmValues(inst->Arg(2), args2, cache); - - const auto op = [](const ImmValue& a, const ImmValue& b, const ImmValue& c) { - return ImmValue::fma(ImmF32F64(a), ImmF32F64(b), ImmF32F64(c)); - }; - - CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1, args2); -} - -static void OperationMin(Inst* inst, bool is_signed, ImmValueList& inst_values, - ComputeImmValuesCache& cache) { - ImmValueList args0, args1, is_legacy_args; - ComputeImmValues(inst->Arg(0), args0, cache); - ComputeImmValues(inst->Arg(1), args1, cache); - if (inst->NumArgs() > 2) { - ComputeImmValues(inst->Arg(2), is_legacy_args, cache); - } else { - is_legacy_args.insert(ImmValue(false)); - } - - const auto op = [](const ImmValue& a, const ImmValue& b, const ImmValue& is_legacy) { - if (is_legacy.U1()) { - if (a.isnan()) - return b; - if (b.isnan()) - return a; - } - return std::min(a, b); - }; - - SetSigned(args0, is_signed); - SetSigned(args1, is_signed); - CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1, - is_legacy_args); -} - -static void OperationMax(Inst* inst, bool is_signed, ImmValueList& inst_values, - ComputeImmValuesCache& cache) { - ImmValueList args0, args1, is_legacy_args; - ComputeImmValues(inst->Arg(0), args0, cache); - ComputeImmValues(inst->Arg(1), args1, cache); - if (inst->NumArgs() > 2) { - ComputeImmValues(inst->Arg(2), is_legacy_args, cache); - } else { - is_legacy_args.insert(ImmValue(false)); - } - - const auto op = [](const ImmValue& a, const ImmValue& b, const ImmValue& is_legacy) { - if (is_legacy.U1()) { - if (a.isnan()) - return b; - if (b.isnan()) - return a; - } - return std::max(a, b); - }; - - SetSigned(args0, is_signed); - SetSigned(args1, is_signed); - CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1, - is_legacy_args); -} - -static void OperationMul(Inst* inst, bool is_signed, ImmValueList& inst_values, - ComputeImmValuesCache& cache) { - ImmValueList args0, args1; - ComputeImmValues(inst->Arg(0), args0, cache); - ComputeImmValues(inst->Arg(1), args1, cache); - - const auto op = [](const ImmValue& a, const ImmValue& b) { return a * b; }; - - SetSigned(args0, is_signed); - SetSigned(args1, is_signed); - CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); -} - -static void OperationDiv(Inst* inst, bool is_signed, ImmValueList& inst_values, - ComputeImmValuesCache& cache) { - ImmValueList args0, args1; - ComputeImmValues(inst->Arg(0), args0, cache); - ComputeImmValues(inst->Arg(1), args1, cache); - - const auto op = [](const ImmValue& a, const ImmValue& b) { return a / b; }; - - SetSigned(args0, is_signed); - SetSigned(args1, is_signed); - CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); -} - -static void OperationMod(Inst* inst, bool is_signed, ImmValueList& inst_values, - ComputeImmValuesCache& cache) { - ImmValueList args0, args1; - ComputeImmValues(inst->Arg(0), args0, cache); - ComputeImmValues(inst->Arg(1), args1, cache); - - const auto op = [](const ImmValue& a, const ImmValue& b) { return a % b; }; - - SetSigned(args0, is_signed); - SetSigned(args1, is_signed); - CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); -} - -static void OperationNeg(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { - ImmValueList args; - ComputeImmValues(inst->Arg(0), args, cache); - - const auto op = [](const ImmValue& a) { return -a; }; - - SetSigned(args, true); - std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); -} - -static void OperationRecip(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { - ImmValueList args; - ComputeImmValues(inst->Arg(0), args, cache); - - const auto op = [](const ImmValue& a) { return a.recip(); }; - - std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); -} - -static void OperationRecipSqrt(Inst* inst, ImmValueList& inst_values, - ComputeImmValuesCache& cache) { - ImmValueList args; - ComputeImmValues(inst->Arg(0), args, cache); - - const auto op = [](const ImmValue& a) { return a.rsqrt(); }; - - std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); -} - -static void OperationSqrt(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { - ImmValueList args; - ComputeImmValues(inst->Arg(0), args, cache); - - const auto op = [](const ImmValue& a) { return a.sqrt(); }; - - std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); -} - -static void OperationSin(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { - ImmValueList args; - ComputeImmValues(inst->Arg(0), args, cache); - - const auto op = [](const ImmValue& a) { return a.sin(); }; - - std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); -} - -static void OperationExp2(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { - ImmValueList args; - ComputeImmValues(inst->Arg(0), args, cache); - - const auto op = [](const ImmValue& a) { return a.exp2(); }; - - std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); -} - -static void OperationLdexp(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { - ImmValueList args0, args1; - ComputeImmValues(inst->Arg(0), args0, cache); - ComputeImmValues(inst->Arg(1), args1, cache); - - const auto op = [](const ImmValue& a, const ImmValue& b) { return a.ldexp(ImmU32(b)); }; - - CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); -} - -static void OperationCos(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { - ImmValueList args; - ComputeImmValues(inst->Arg(0), args, cache); - - const auto op = [](const ImmValue& a) { return a.cos(); }; - - std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); -} - -static void OperationLog2(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { - ImmValueList args; - ComputeImmValues(inst->Arg(0), args, cache); - - const auto op = [](const ImmValue& a) { return a.log2(); }; - - std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); -} - -static void OperationClamp(Inst* inst, bool is_signed, ImmValueList& inst_values, - ComputeImmValuesCache& cache) { - ImmValueList args0, args1, args2; - ComputeImmValues(inst->Arg(0), args0, cache); - ComputeImmValues(inst->Arg(1), args1, cache); - ComputeImmValues(inst->Arg(2), args2, cache); - - const auto op = [](const ImmValue& a, const ImmValue& b, const ImmValue& c) { - return a.clamp(b, c); - }; - - SetSigned(args0, is_signed); - SetSigned(args1, is_signed); - SetSigned(args2, is_signed); - CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1, args2); -} - -static void OperationRound(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { - ImmValueList args; - ComputeImmValues(inst->Arg(0), args, cache); - - const auto op = [](const ImmValue& a) { return a.round(); }; - - std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); -} - -static void OperationFloor(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { - ImmValueList args; - ComputeImmValues(inst->Arg(0), args, cache); - - const auto op = [](const ImmValue& a) { return a.floor(); }; - - std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); -} - -static void OperationCeil(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { - ImmValueList args; - ComputeImmValues(inst->Arg(0), args, cache); - - const auto op = [](const ImmValue& a) { return a.ceil(); }; - - std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); -} - -static void OperationTrunc(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { - ImmValueList args; - ComputeImmValues(inst->Arg(0), args, cache); - - const auto op = [](const ImmValue& a) { return a.trunc(); }; - - std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); -} - -static void OperationFract(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { - ImmValueList args; - ComputeImmValues(inst->Arg(0), args, cache); - - const auto op = [](const ImmValue& a) { return a.fract(); }; - std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); -} - -static void OperationShiftLeft(Inst* inst, ImmValueList& inst_values, - ComputeImmValuesCache& cache) { - ImmValueList args0, args1; - ComputeImmValues(inst->Arg(0), args0, cache); - ComputeImmValues(inst->Arg(1), args1, cache); - - const auto op = [](const ImmValue& a, const ImmValue& b) { return a << ImmU32(b); }; - - SetSigned(args1, false); - CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); -} - -static void OperationShiftRight(Inst* inst, bool is_signed, ImmValueList& inst_values, - ComputeImmValuesCache& cache) { - ImmValueList args0, args1; - ComputeImmValues(inst->Arg(0), args0, cache); - ComputeImmValues(inst->Arg(1), args1, cache); - - const auto op = [](const ImmValue& a, const ImmValue& b) { return a >> ImmU32(b); }; - - SetSigned(args0, is_signed); - SetSigned(args1, false); - CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); -} - -static void OperationBitwiseNot(Inst* inst, ImmValueList& inst_values, - ComputeImmValuesCache& cache) { - ImmValueList args; - ComputeImmValues(inst->Arg(0), args, cache); - - const auto op = [](const ImmValue& a) { return ~a; }; - - std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); -} - -static void OperationBitwiseAnd(Inst* inst, ImmValueList& inst_values, - ComputeImmValuesCache& cache) { - ImmValueList args0, args1; - ComputeImmValues(inst->Arg(0), args0, cache); - ComputeImmValues(inst->Arg(1), args1, cache); - - const auto op = [](const ImmValue& a, const ImmValue& b) { return a & b; }; - - CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); -} - -static void OperationBitwiseOr(Inst* inst, ImmValueList& inst_values, - ComputeImmValuesCache& cache) { - ImmValueList args0, args1; - ComputeImmValues(inst->Arg(0), args0, cache); - ComputeImmValues(inst->Arg(1), args1, cache); - - const auto op = [](const ImmValue& a, const ImmValue& b) { return a | b; }; - - CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); -} - -static void OperationBitwiseXor(Inst* inst, ImmValueList& inst_values, - ComputeImmValuesCache& cache) { - ImmValueList args0, args1; - ComputeImmValues(inst->Arg(0), args0, cache); - ComputeImmValues(inst->Arg(1), args1, cache); - - const auto op = [](const ImmValue& a, const ImmValue& b) { return a ^ b; }; - - CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); -} - -static void OperationConvert(Inst* inst, bool is_signed, Type new_type, bool new_signed, - ImmValueList& inst_values, ComputeImmValuesCache& cache) { - ImmValueList args; - ComputeImmValues(inst->Arg(0), args, cache); - - const auto op = [new_type, new_signed](const ImmValue& a) { - return a.Convert(new_type, new_signed); - }; - - SetSigned(args, is_signed); - std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); -} - -static void OperationBitCast(Inst* inst, Type new_type, bool new_signed, ImmValueList& inst_values, - ComputeImmValuesCache& cache) { - ImmValueList args; - ComputeImmValues(inst->Arg(0), args, cache); - - const auto op = [new_type, new_signed](const ImmValue& a) { - return a.Bitcast(new_type, new_signed); - }; - - std::transform(args.begin(), args.end(), std::inserter(inst_values, inst_values.begin()), op); -} - -template -static void OperationCompositeConstruct(Inst* inst, ImmValueList& inst_values, - ComputeImmValuesCache& cache) { - std::array args; - for (size_t i = 0; i < N; ++i) { - ComputeImmValues(inst->Arg(i), args[i], cache); - } - - const auto op = [](const Args&... args) { return ImmValue(args...); }; - - const auto call_cartesian = [&](std::index_sequence) { - CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args[I]...); - }; - call_cartesian(std::make_index_sequence{}); -} - -static void OperationCompositeExtract(Inst* inst, ImmValueList& inst_values, - ComputeImmValuesCache& cache) { - ImmValueList args0, args1; - ComputeImmValues(inst->Arg(0), args0, cache); - ComputeImmValues(inst->Arg(1), args1, cache); - - const auto op = [](const ImmValue& a, const ImmValue& b) { return a.Extract(ImmU32(b)); }; - - SetSigned(args1, false); - CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1); -} - -static void OperationInsert(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { - ImmValueList args0, args1, args2; - ComputeImmValues(inst->Arg(0), args0, cache); - ComputeImmValues(inst->Arg(1), args1, cache); - ComputeImmValues(inst->Arg(2), args2, cache); - - const auto op = [](const ImmValue& a, const ImmValue& b, const ImmValue& c) { - return a.Insert(b, ImmU32(c)); - }; - - SetSigned(args2, false); - CartesianInvoke(op, std::inserter(inst_values, inst_values.begin()), args0, args1, args2); + Invoke(inst_values, args, std::make_index_sequence{}); } static void DoInstructionOperation(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { switch (inst->GetOpcode()) { - case Opcode::CompositeConstructU32x2: - case Opcode::CompositeConstructU32x2x2: - case Opcode::CompositeConstructF16x2: - case Opcode::CompositeConstructF32x2: - case Opcode::CompositeConstructF32x2x2: - case Opcode::CompositeConstructF64x2: - OperationCompositeConstruct<2>(inst, inst_values, cache); - break; - case Opcode::CompositeConstructU32x3: - case Opcode::CompositeConstructF16x3: - case Opcode::CompositeConstructF32x3: - case Opcode::CompositeConstructF64x3: - OperationCompositeConstruct<3>(inst, inst_values, cache); - break; - case Opcode::CompositeConstructU32x4: - case Opcode::CompositeConstructF16x4: - case Opcode::CompositeConstructF32x4: - case Opcode::CompositeConstructF64x4: - OperationCompositeConstruct<4>(inst, inst_values, cache); - break; - case Opcode::CompositeExtractU32x2: - case Opcode::CompositeExtractU32x3: - case Opcode::CompositeExtractU32x4: - case Opcode::CompositeExtractF16x2: - case Opcode::CompositeExtractF16x3: - case Opcode::CompositeExtractF16x4: - case Opcode::CompositeExtractF32x2: - case Opcode::CompositeExtractF32x3: - case Opcode::CompositeExtractF32x4: - case Opcode::CompositeExtractF64x2: - case Opcode::CompositeExtractF64x3: - case Opcode::CompositeExtractF64x4: - OperationCompositeExtract(inst, inst_values, cache); - break; - case Opcode::CompositeInsertU32x2: - case Opcode::CompositeInsertU32x3: - case Opcode::CompositeInsertU32x4: - case Opcode::CompositeInsertF16x2: - case Opcode::CompositeInsertF16x3: - case Opcode::CompositeInsertF16x4: - case Opcode::CompositeInsertF32x2: - case Opcode::CompositeInsertF32x3: - case Opcode::CompositeInsertF32x4: - case Opcode::CompositeInsertF64x2: - case Opcode::CompositeInsertF64x3: - case Opcode::CompositeInsertF64x4: - OperationInsert(inst, inst_values, cache); - break; - case Opcode::BitCastU16F16: - OperationBitCast(inst, IR::Type::U16, false, inst_values, cache); - break; - case Opcode::BitCastU32F32: - OperationBitCast(inst, IR::Type::U32, false, inst_values, cache); - break; - case Opcode::BitCastU64F64: - OperationBitCast(inst, IR::Type::U64, false, inst_values, cache); - break; - case Opcode::BitCastF16U16: - OperationBitCast(inst, IR::Type::F16, true, inst_values, cache); - break; - case Opcode::BitCastF32U32: - OperationBitCast(inst, IR::Type::F32, true, inst_values, cache); - break; - case Opcode::BitCastF64U64: - OperationBitCast(inst, IR::Type::F64, true, inst_values, cache); - break; - case Opcode::FPAbs32: - case Opcode::FPAbs64: - case Opcode::IAbs32: - OperationAbs(inst, inst_values, cache); - break; - case Opcode::FPAdd32: - case Opcode::FPAdd64: - OperationAdd(inst, true, inst_values, cache); - break; - case Opcode::IAdd32: - case Opcode::IAdd64: - OperationAdd(inst, false, inst_values, cache); - break; - case Opcode::FPSub32: - OperationSub(inst, true, inst_values, cache); - break; - case Opcode::ISub32: - case Opcode::ISub64: - OperationSub(inst, false, inst_values, cache); - break; - case Opcode::FPMul32: - case Opcode::FPMul64: - OperationMul(inst, true, inst_values, cache); - break; - case Opcode::IMul32: - case Opcode::IMul64: - OperationMul(inst, false, inst_values, cache); - break; - case Opcode::FPDiv32: - case Opcode::FPDiv64: - case Opcode::SDiv32: - OperationDiv(inst, true, inst_values, cache); - break; - case Opcode::UDiv32: - OperationDiv(inst, false, inst_values, cache); - break; - case Opcode::SMod32: - OperationMod(inst, true, inst_values, cache); - break; - case Opcode::UMod32: - OperationMod(inst, false, inst_values, cache); - break; - case Opcode::INeg32: - case Opcode::INeg64: - OperationNeg(inst, inst_values, cache); - break; - case Opcode::FPFma32: - case Opcode::FPFma64: - OperationFma(inst, inst_values, cache); - break; - case Opcode::FPMin32: - case Opcode::FPMin64: - case Opcode::SMin32: - OperationMin(inst, true, inst_values, cache); - break; - case Opcode::UMin32: - OperationMin(inst, false, inst_values, cache); - break; - case Opcode::FPMax32: - case Opcode::FPMax64: - case Opcode::SMax32: - OperationMax(inst, true, inst_values, cache); - break; - case Opcode::UMax32: - OperationMax(inst, false, inst_values, cache); - break; - case Opcode::FPNeg32: - case Opcode::FPNeg64: - OperationNeg(inst, inst_values, cache); - break; - case Opcode::FPRecip32: - case Opcode::FPRecip64: - OperationRecip(inst, inst_values, cache); - break; - case Opcode::FPRecipSqrt32: - case Opcode::FPRecipSqrt64: - OperationRecipSqrt(inst, inst_values, cache); - break; - case Opcode::FPSqrt: - OperationSqrt(inst, inst_values, cache); - break; - case Opcode::FPSin: - OperationSin(inst, inst_values, cache); - break; - case Opcode::FPCos: - OperationCos(inst, inst_values, cache); - break; - case Opcode::FPExp2: - OperationExp2(inst, inst_values, cache); - break; - case Opcode::FPLdexp: - OperationLdexp(inst, inst_values, cache); - break; - case Opcode::FPLog2: - OperationLog2(inst, inst_values, cache); - break; - case Opcode::FPClamp32: - case Opcode::FPClamp64: - case Opcode::SClamp32: - OperationClamp(inst, true, inst_values, cache); - break; - case Opcode::UClamp32: - OperationClamp(inst, false, inst_values, cache); - break; - case Opcode::FPRoundEven32: - case Opcode::FPRoundEven64: - OperationRound(inst, inst_values, cache); - break; - case Opcode::FPFloor32: - case Opcode::FPFloor64: - OperationFloor(inst, inst_values, cache); - break; - case Opcode::FPCeil32: - case Opcode::FPCeil64: - OperationCeil(inst, inst_values, cache); - break; - case Opcode::FPTrunc32: - case Opcode::FPTrunc64: - OperationTrunc(inst, inst_values, cache); - break; - case Opcode::FPFract32: - case Opcode::FPFract64: - OperationFract(inst, inst_values, cache); - break; - case Opcode::ShiftLeftLogical32: - case Opcode::ShiftLeftLogical64: - OperationShiftLeft(inst, inst_values, cache); - break; - case Opcode::ShiftRightLogical32: - case Opcode::ShiftRightLogical64: - OperationShiftRight(inst, false, inst_values, cache); - break; - case Opcode::ShiftRightArithmetic32: - case Opcode::ShiftRightArithmetic64: - OperationShiftRight(inst, true, inst_values, cache); - break; - case Opcode::BitwiseAnd32: - case Opcode::BitwiseAnd64: - case Opcode::LogicalAnd: - OperationBitwiseAnd(inst, inst_values, cache); - break; - case Opcode::BitwiseOr32: - case Opcode::BitwiseOr64: - case Opcode::LogicalOr: - OperationBitwiseOr(inst, inst_values, cache); - break; - case Opcode::BitwiseXor32: - case Opcode::LogicalXor: - OperationBitwiseXor(inst, inst_values, cache); - break; - case Opcode::BitwiseNot32: - case Opcode::LogicalNot: - OperationBitwiseNot(inst, inst_values, cache); - break; - case Opcode::ConvertU16U32: - OperationConvert(inst, false, Type::U16, false, inst_values, cache); - break; - case Opcode::ConvertS32F32: - case Opcode::ConvertS32F64: - OperationConvert(inst, true, Type::U32, true, inst_values, cache); - break; - case Opcode::ConvertU32F32: - OperationConvert(inst, true, Type::U32, false, inst_values, cache); - break; - case Opcode::ConvertU32U16: - OperationConvert(inst, false, Type::U32, false, inst_values, cache); - break; - case Opcode::ConvertF32F16: - case Opcode::ConvertF32F64: - case Opcode::ConvertF32S32: - OperationConvert(inst, true, Type::F32, true, inst_values, cache); - break; - case Opcode::ConvertF32U32: - OperationConvert(inst, false, Type::F32, true, inst_values, cache); - break; - case Opcode::ConvertF64F32: - case Opcode::ConvertF64S32: - OperationConvert(inst, true, Type::F64, true, inst_values, cache); - break; - case Opcode::ConvertF64U32: - OperationConvert(inst, false, Type::F64, true, inst_values, cache); +#define OPCODE(name, result_type, ...) \ + case Opcode::name: \ + Invoke<&Do##name>(inst, inst_values, cache); \ break; +#include "shader_recompiler/ir/opcodes.inc" +#undef OPCODE default: break; } + UNREACHABLE_MSG("Invalid opcode: {}", inst->GetOpcode()); } static bool IsSelectInst(Inst* inst) { @@ -737,7 +64,7 @@ static bool IsSelectInst(Inst* inst) { } } -void ComputeImmValues(const Value& value, ImmValueList& values, ComputeImmValuesCache& cache) { +void Compute(const Value& value, ImmValueList& values, ComputeImmValuesCache& cache) { Value resolved = value.Resolve(); if (ImmValue::IsSupportedValue(resolved)) { values.insert(ImmValue(resolved)); @@ -755,16 +82,16 @@ void ComputeImmValues(const Value& value, ImmValueList& values, ComputeImmValues auto& inst_values = cache.emplace(inst, ImmValueList{}).first->second; if (inst->GetOpcode() == Opcode::Phi) { for (size_t i = 0; i < inst->NumArgs(); ++i) { - ComputeImmValues(inst->Arg(i), inst_values, cache); + Compute(inst->Arg(i), inst_values, cache); } } if (IsSelectInst(inst)) { - ComputeImmValues(inst->Arg(1), inst_values, cache); - ComputeImmValues(inst->Arg(2), inst_values, cache); + Compute(inst->Arg(1), inst_values, cache); + Compute(inst->Arg(2), inst_values, cache); } else { DoInstructionOperation(inst, inst_values, cache); } values.insert(inst_values.begin(), inst_values.end()); } -} // namespace Shader::IR +} // namespace Shader::IR::ComputeValue \ No newline at end of file diff --git a/src/shader_recompiler/ir/compute_value/compute.h b/src/shader_recompiler/ir/compute_value/compute.h index 8b6e7b86b..b98b4ecae 100644 --- a/src/shader_recompiler/ir/compute_value/compute.h +++ b/src/shader_recompiler/ir/compute_value/compute.h @@ -3,8 +3,8 @@ #pragma once +#include #include -#include #include "shader_recompiler/ir/compute_value/imm_value.h" #include "shader_recompiler/ir/value.h" @@ -12,11 +12,11 @@ // that can represent. If the value can't be computed statically, the list will // be empty. -namespace Shader::IR { +namespace Shader::IR::ComputeValue { -using ImmValueList = boost::container::flat_set; +using ImmValueList = std::unordered_set; using ComputeImmValuesCache = boost::container::flat_map; -void ComputeImmValues(const Value& value, ImmValueList& values, ComputeImmValuesCache& cache); +void Compute(const Value& value, ImmValueList& values, ComputeImmValuesCache& cache); -} // namespace Shader::IR +} // namespace Shader::IR::ComputeValue \ No newline at end of file diff --git a/src/shader_recompiler/ir/compute_value/do_bitcast.cpp b/src/shader_recompiler/ir/compute_value/do_bitcast.cpp new file mode 100644 index 000000000..2d011bc62 --- /dev/null +++ b/src/shader_recompiler/ir/compute_value/do_bitcast.cpp @@ -0,0 +1,32 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/ir/compute_value/do_bitcast.h" + +namespace Shader::IR::ComputeValue { + +void DoBitCastU16F16(ImmValueList& inst_values, const ImmValueList& src_values) { + inst_values.insert(src_values.begin(), src_values.end()); +} + +void DoBitCastU32F32(ImmValueList& inst_values, const ImmValueList& src_values) { + inst_values.insert(src_values.begin(), src_values.end()); +} + +void DoBitCastU64F64(ImmValueList& inst_values, const ImmValueList& src_values) { + inst_values.insert(src_values.begin(), src_values.end()); +} + +void DoBitCastF16U16(ImmValueList& inst_values, const ImmValueList& src_values) { + inst_values.insert(src_values.begin(), src_values.end()); +} + +void DoBitCastF32U32(ImmValueList& inst_values, const ImmValueList& src_values) { + inst_values.insert(src_values.begin(), src_values.end()); +} + +void DoBitCastF64U64(ImmValueList& inst_values, const ImmValueList& src_values) { + inst_values.insert(src_values.begin(), src_values.end()); +} + +} // namespace Shader::IR::ComputeValue diff --git a/src/shader_recompiler/ir/compute_value/do_bitcast.h b/src/shader_recompiler/ir/compute_value/do_bitcast.h new file mode 100644 index 000000000..4f9441612 --- /dev/null +++ b/src/shader_recompiler/ir/compute_value/do_bitcast.h @@ -0,0 +1,17 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "shader_recompiler/ir/compute_value/compute.h" + +namespace Shader::IR::ComputeValue { + +void DoBitCastU16F16(ImmValueList& inst_values, const ImmValueList& src_values); +void DoBitCastU32F32(ImmValueList& inst_values, const ImmValueList& src_values); +void DoBitCastU64F64(ImmValueList& inst_values, const ImmValueList& src_values); +void DoBitCastF16U16(ImmValueList& inst_values, const ImmValueList& src_values); +void DoBitCastF32U32(ImmValueList& inst_values, const ImmValueList& src_values); +void DoBitCastF64U64(ImmValueList& inst_values, const ImmValueList& src_values); + +} // namespace Shader::IR::ComputeValue diff --git a/src/shader_recompiler/ir/compute_value/do_composite.cpp b/src/shader_recompiler/ir/compute_value/do_composite.cpp new file mode 100644 index 000000000..41cbd04f9 --- /dev/null +++ b/src/shader_recompiler/ir/compute_value/do_composite.cpp @@ -0,0 +1,230 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/cartesian_invoke.h" +#include "shader_recompiler/ir/compute_value/do_composite.h" + +namespace Shader::IR::ComputeValue { + +static void CommonCompositeConstruct(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1) { + const auto op = [](const ImmValue& a, const ImmValue& b) { + return ImmValue(a, b); + }; + Common::CartesianInvoke(op, std::insert_iterator(inst_values, inst_values.begin()), arg0, arg1); +} + +static void CommonCompositeConstruct(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2) { + const auto op = [](const ImmValue& a, const ImmValue& b, const ImmValue& c) { + return ImmValue(a, b, c); + }; + Common::CartesianInvoke(op, std::insert_iterator(inst_values, inst_values.begin()), arg0, arg1, arg2); +} + +static void CommonCompositeConstruct(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2, const ImmValueList& arg3) { + const auto op = [](const ImmValue& a, const ImmValue& b, const ImmValue& c, const ImmValue& d) { + return ImmValue(a, b, c, d); + }; + Common::CartesianInvoke(op, std::insert_iterator(inst_values, inst_values.begin()), arg0, arg1, arg2, arg3); +} + +void DoCompositeConstructU32x2(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1) { + CommonCompositeConstruct(inst_values, arg0, arg1); +} + +void DoCompositeConstructU32x3(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2) { + CommonCompositeConstruct(inst_values, arg0, arg1, arg2); +} + +void DoCompositeConstructU32x4(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2, const ImmValueList& arg3) { + CommonCompositeConstruct(inst_values, arg0, arg1, arg2, arg3); +} + +void DoCompositeConstructU32x2x2(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1) { + Common::CartesianInvoke(ImmValue::CompositeFrom2x2, std::insert_iterator(inst_values, inst_values.begin()), arg0, arg1); +} + +void DoCompositeExtractU32x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Extract, std::insert_iterator(inst_values, inst_values.begin()), vec, idx); +} + +void DoCompositeExtractU32x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Extract, std::insert_iterator(inst_values, inst_values.begin()), vec, idx); +} + +void DoCompositeExtractU32x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Extract, std::insert_iterator(inst_values, inst_values.begin()), vec, idx); +} + +void DoCompositeInsertU32x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Insert, std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); +} + +void DoCompositeInsertU32x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Insert, std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); +} + +void DoCompositeInsertU32x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Insert, std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); +} + +void DoCompositeShuffleU32x2(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoCompositeShuffleU32x3(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoCompositeShuffleU32x4(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2, const ImmValueList& idx3) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoCompositeConstructF16x2(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1) { + CommonCompositeConstruct(inst_values, arg0, arg1); +} + +void DoCompositeConstructF16x3(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2) { + CommonCompositeConstruct(inst_values, arg0, arg1, arg2); +} + +void DoCompositeConstructF16x4(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2, const ImmValueList& arg3) { + CommonCompositeConstruct(inst_values, arg0, arg1, arg2, arg3); +} + +void DoCompositeConstructF32x2x2(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1) { + Common::CartesianInvoke(ImmValue::CompositeFrom2x2, std::insert_iterator(inst_values, inst_values.begin()), arg0, arg1); +} + +void DoCompositeExtractF16x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Extract, std::insert_iterator(inst_values, inst_values.begin()), vec, idx); +} + +void DoCompositeExtractF16x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Extract, std::insert_iterator(inst_values, inst_values.begin()), vec, idx); +} + +void DoCompositeExtractF16x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Extract, std::insert_iterator(inst_values, inst_values.begin()), vec, idx); +} + +void DoCompositeInsertF16x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Insert, std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); +} + +void DoCompositeInsertF16x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Insert, std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); +} + +void DoCompositeInsertF16x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Insert, std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); +} + +void DoCompositeShuffleF16x2(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoCompositeShuffleF16x3(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoCompositeShuffleF16x4(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2, const ImmValueList& idx3) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoCompositeConstructF32x2(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1) { + CommonCompositeConstruct(inst_values, arg0, arg1); +} + +void DoCompositeConstructF32x3(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2) { + CommonCompositeConstruct(inst_values, arg0, arg1, arg2); +} + +void DoCompositeConstructF32x4(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2, const ImmValueList& arg3) { + CommonCompositeConstruct(inst_values, arg0, arg1, arg2, arg3); +} + +void DoCompositeExtractF32x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Extract, std::insert_iterator(inst_values, inst_values.begin()), vec, idx); +} + +void DoCompositeExtractF32x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Extract, std::insert_iterator(inst_values, inst_values.begin()), vec, idx); +} + +void DoCompositeExtractF32x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Extract, std::insert_iterator(inst_values, inst_values.begin()), vec, idx); +} + +void DoCompositeInsertF32x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Insert, std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); +} + +void DoCompositeInsertF32x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Insert, std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); +} + +void DoCompositeInsertF32x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Insert, std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); +} + +void DoCompositeShuffleF32x2(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoCompositeShuffleF32x3(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoCompositeShuffleF32x4(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2, const ImmValueList& idx3) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoCompositeConstructF64x2(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1) { + CommonCompositeConstruct(inst_values, arg0, arg1); +} + +void DoCompositeConstructF64x3(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2) { + CommonCompositeConstruct(inst_values, arg0, arg1, arg2); +} + +void DoCompositeConstructF64x4(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2, const ImmValueList& arg3) { + CommonCompositeConstruct(inst_values, arg0, arg1, arg2, arg3); +} + +void DoCompositeExtractF64x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Extract, std::insert_iterator(inst_values, inst_values.begin()), vec, idx); +} + +void DoCompositeExtractF64x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Extract, std::insert_iterator(inst_values, inst_values.begin()), vec, idx); +} + +void DoCompositeExtractF64x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Extract, std::insert_iterator(inst_values, inst_values.begin()), vec, idx); +} + +void DoCompositeInsertF64x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Insert, std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); +} + +void DoCompositeInsertF64x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Insert, std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); +} + +void DoCompositeInsertF64x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Insert, std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); +} + +void DoCompositeShuffleF64x2(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoCompositeShuffleF64x3(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoCompositeShuffleF64x4(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2, const ImmValueList& idx3) { + UNREACHABLE_MSG("Unimplemented"); +} + +} // namespace Shader::IR::ComputeValue diff --git a/src/shader_recompiler/ir/compute_value/do_composite.h b/src/shader_recompiler/ir/compute_value/do_composite.h new file mode 100644 index 000000000..a55fd0fd8 --- /dev/null +++ b/src/shader_recompiler/ir/compute_value/do_composite.h @@ -0,0 +1,64 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "shader_recompiler/ir/compute_value/compute.h" + +namespace Shader::IR::ComputeValue { + +void DoCompositeConstructU32x2(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1); +void DoCompositeConstructU32x3(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2); +void DoCompositeConstructU32x4(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2, const ImmValueList& arg3); +void DoCompositeConstructU32x2x2(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1); +void DoCompositeExtractU32x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx); +void DoCompositeExtractU32x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx); +void DoCompositeExtractU32x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx); +void DoCompositeInsertU32x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx); +void DoCompositeInsertU32x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx); +void DoCompositeInsertU32x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx); +void DoCompositeShuffleU32x2(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1); +void DoCompositeShuffleU32x3(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2); +void DoCompositeShuffleU32x4(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2, const ImmValueList& idx3); + +void DoCompositeConstructF16x2(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1); +void DoCompositeConstructF16x3(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2); +void DoCompositeConstructF16x4(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2, const ImmValueList& arg3); +void DoCompositeExtractF16x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx); +void DoCompositeExtractF16x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx); +void DoCompositeExtractF16x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx); +void DoCompositeInsertF16x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx); +void DoCompositeInsertF16x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx); +void DoCompositeInsertF16x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx); +void DoCompositeShuffleF16x2(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1); +void DoCompositeShuffleF16x3(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2); +void DoCompositeShuffleF16x4(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2, const ImmValueList& idx3); + +void DoCompositeConstructF32x2(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1); +void DoCompositeConstructF32x3(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2); +void DoCompositeConstructF32x4(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2, const ImmValueList& arg3); +void DoCompositeConstructF32x2x2(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1); +void DoCompositeExtractF32x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx); +void DoCompositeExtractF32x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx); +void DoCompositeExtractF32x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx); +void DoCompositeInsertF32x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx); +void DoCompositeInsertF32x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx); +void DoCompositeInsertF32x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx); +void DoCompositeShuffleF32x2(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1); +void DoCompositeShuffleF32x3(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2); +void DoCompositeShuffleF32x4(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2, const ImmValueList& idx3); + +void DoCompositeConstructF64x2(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1); +void DoCompositeConstructF64x3(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2); +void DoCompositeConstructF64x4(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2, const ImmValueList& arg3); +void DoCompositeExtractF64x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx); +void DoCompositeExtractF64x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx); +void DoCompositeExtractF64x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx); +void DoCompositeInsertF64x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx); +void DoCompositeInsertF64x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx); +void DoCompositeInsertF64x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx); +void DoCompositeShuffleF64x2(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1); +void DoCompositeShuffleF64x3(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2); +void DoCompositeShuffleF64x4(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2, const ImmValueList& idx3); + +} // namespace Shader::IR::ComputeValue diff --git a/src/shader_recompiler/ir/compute_value/do_convert.cpp b/src/shader_recompiler/ir/compute_value/do_convert.cpp new file mode 100644 index 000000000..aad5d24e4 --- /dev/null +++ b/src/shader_recompiler/ir/compute_value/do_convert.cpp @@ -0,0 +1,81 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/cartesian_invoke.h" +#include "shader_recompiler/ir/compute_value/do_convert.h" + +namespace Shader::IR::ComputeValue { + +void DoConvertS32F32(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Convert, + std::insert_iterator(inst_values, inst_values.begin()), args); +} + +void DoConvertS32F64(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Convert, + std::insert_iterator(inst_values, inst_values.begin()), args); +} + +void DoConvertU32F32(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Convert, + std::insert_iterator(inst_values, inst_values.begin()), args); +} + +void DoConvertF16F32(ImmValueList& inst_values, const ImmValueList& args) { + // Common::CartesianInvoke(ImmValue::Convert, + // std::insert_iterator(inst_values, inst_values.begin()), args); + UNREACHABLE_MSG("F32 to F16 conversion is not implemented"); +} + +void DoConvertF32F16(ImmValueList& inst_values, const ImmValueList& args) { + // Common::CartesianInvoke(ImmValue::Convert, + // std::insert_iterator(inst_values, inst_values.begin()), args); + UNREACHABLE_MSG("F16 to F32 conversion is not implemented"); +} + +void DoConvertF32F64(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Convert, + std::insert_iterator(inst_values, inst_values.begin()), args); +} + +void DoConvertF64F32(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Convert, + std::insert_iterator(inst_values, inst_values.begin()), args); +} + +void DoConvertF32S32(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Convert, + std::insert_iterator(inst_values, inst_values.begin()), args); +} + +void DoConvertF32U32(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Convert, + std::insert_iterator(inst_values, inst_values.begin()), args); +} + +void DoConvertF64S32(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Convert, + std::insert_iterator(inst_values, inst_values.begin()), args); +} + +void DoConvertF64U32(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Convert, + std::insert_iterator(inst_values, inst_values.begin()), args); +} + +void DoConvertF32U16(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Convert, + std::insert_iterator(inst_values, inst_values.begin()), args); +} + +void DoConvertU16U32(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Convert, + std::insert_iterator(inst_values, inst_values.begin()), args); +} + +void DoConvertU32U16(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Convert, + std::insert_iterator(inst_values, inst_values.begin()), args); +} + +} // namespace Shader::IR::ComputeValue \ No newline at end of file diff --git a/src/shader_recompiler/ir/compute_value/do_convert.h b/src/shader_recompiler/ir/compute_value/do_convert.h new file mode 100644 index 000000000..9fd5e0302 --- /dev/null +++ b/src/shader_recompiler/ir/compute_value/do_convert.h @@ -0,0 +1,25 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "shader_recompiler/ir/compute_value/compute.h" + +namespace Shader::IR::ComputeValue { + +void DoConvertS32F32(ImmValueList& inst_values, const ImmValueList& args); +void DoConvertS32F64(ImmValueList& inst_values, const ImmValueList& args); +void DoConvertU32F32(ImmValueList& inst_values, const ImmValueList& args); +void DoConvertF16F32(ImmValueList& inst_values, const ImmValueList& args); +void DoConvertF32F16(ImmValueList& inst_values, const ImmValueList& args); +void DoConvertF32F64(ImmValueList& inst_values, const ImmValueList& args); +void DoConvertF64F32(ImmValueList& inst_values, const ImmValueList& args); +void DoConvertF32S32(ImmValueList& inst_values, const ImmValueList& args); +void DoConvertF32U32(ImmValueList& inst_values, const ImmValueList& args); +void DoConvertF64S32(ImmValueList& inst_values, const ImmValueList& args); +void DoConvertF64U32(ImmValueList& inst_values, const ImmValueList& args); +void DoConvertF32U16(ImmValueList& inst_values, const ImmValueList& args); +void DoConvertU16U32(ImmValueList& inst_values, const ImmValueList& args); +void DoConvertU32U16(ImmValueList& inst_values, const ImmValueList& args); + +} // namespace Shader::IR::ComputeValue \ No newline at end of file diff --git a/src/shader_recompiler/ir/compute_value/do_float_operations.cpp b/src/shader_recompiler/ir/compute_value/do_float_operations.cpp new file mode 100644 index 000000000..9868c2333 --- /dev/null +++ b/src/shader_recompiler/ir/compute_value/do_float_operations.cpp @@ -0,0 +1,254 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/cartesian_invoke.h" +#include "shader_recompiler/ir/compute_value/do_float_operations.h" + +namespace Shader::IR::ComputeValue { + +void DoFPAbs32(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Abs, + std::insert_iterator(inst_values, inst_values.end()), args); +} + +void DoFPAbs64(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Abs, + std::insert_iterator(inst_values, inst_values.end()), args); +} + +void DoFPAdd32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { + Common::CartesianInvoke(ImmValue::Add, + std::insert_iterator(inst_values, inst_values.end()), args0, args1); +} + +void DoFPAdd64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { + Common::CartesianInvoke(ImmValue::Add, + std::insert_iterator(inst_values, inst_values.end()), args0, args1); +} + +void DoFPSub32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { + Common::CartesianInvoke(ImmValue::Sub, + std::insert_iterator(inst_values, inst_values.end()), args0, args1); +} + +void DoFPFma32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, + const ImmValueList& args2) { + Common::CartesianInvoke(ImmValue::Fma, + std::insert_iterator(inst_values, inst_values.end()), args0, args1, + args2); +} + +void DoFPFma64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, + const ImmValueList& args2) { + Common::CartesianInvoke(ImmValue::Fma, + std::insert_iterator(inst_values, inst_values.end()), args0, args1, + args2); +} + +void DoFPMax32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, + const ImmValueList& args_legacy) { + const auto& op = [](const ImmValue& a, const ImmValue& b, const ImmValue& legacy) { + if (legacy.U1()) { + if (ImmValue::IsNan(a)) + return b; + if (ImmValue::IsNan(b)) + return a; + } + return ImmValue::Max(a, b); + }; + Common::CartesianInvoke(op, std::insert_iterator(inst_values, inst_values.end()), args0, args1, + args_legacy); +} + +void DoFPMax64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { + Common::CartesianInvoke(ImmValue::Max, + std::insert_iterator(inst_values, inst_values.end()), args0, args1); +} + +void DoFPMin32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, + const ImmValueList& args_legacy) { + const auto& op = [](const ImmValue& a, const ImmValue& b, const ImmValue& legacy) { + if (legacy.U1()) { + if (ImmValue::IsNan(a)) + return b; + if (ImmValue::IsNan(b)) + return a; + } + return ImmValue::Min(a, b); + }; + Common::CartesianInvoke(op, std::insert_iterator(inst_values, inst_values.end()), args0, args1, + args_legacy); +} + +void DoFPMin64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { + Common::CartesianInvoke(ImmValue::Min, + std::insert_iterator(inst_values, inst_values.end()), args0, args1); +} + +void DoFPMul32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { + Common::CartesianInvoke(ImmValue::Mul, + std::insert_iterator(inst_values, inst_values.end()), args0, args1); +} + +void DoFPMul64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { + Common::CartesianInvoke(ImmValue::Mul, + std::insert_iterator(inst_values, inst_values.end()), args0, args1); +} + +void DoFPDiv32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { + Common::CartesianInvoke(ImmValue::Div, + std::insert_iterator(inst_values, inst_values.end()), args0, args1); +} + +void DoFPDiv64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { + Common::CartesianInvoke(ImmValue::Div, + std::insert_iterator(inst_values, inst_values.end()), args0, args1); +} + +void DoFPNeg32(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Neg, + std::insert_iterator(inst_values, inst_values.end()), args); +} + +void DoFPNeg64(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Neg, + std::insert_iterator(inst_values, inst_values.end()), args); +} + +void DoFPRecip32(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Recip, + std::insert_iterator(inst_values, inst_values.end()), args); +} + +void DoFPRecip64(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Recip, + std::insert_iterator(inst_values, inst_values.end()), args); +} + +void DoFPRecipSqrt32(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Rsqrt, + std::insert_iterator(inst_values, inst_values.end()), args); +} + +void DoFPRecipSqrt64(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Rsqrt, + std::insert_iterator(inst_values, inst_values.end()), args); +} + +void DoFPSqrt(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Sqrt, + std::insert_iterator(inst_values, inst_values.end()), args); +} + +void DoFPSin(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Sin, + std::insert_iterator(inst_values, inst_values.end()), args); +} + +void DoFPExp2(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Exp2, + std::insert_iterator(inst_values, inst_values.end()), args); +} + +void DoFPLdexp(ImmValueList& inst_values, const ImmValueList& args, const ImmValueList& exponents) { + Common::CartesianInvoke(ImmValue::Ldexp, + std::insert_iterator(inst_values, inst_values.end()), args, exponents); +} + +void DoFPCos(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Cos, + std::insert_iterator(inst_values, inst_values.end()), args); +} + +void DoFPLog2(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Log2, + std::insert_iterator(inst_values, inst_values.end()), args); +} + +void DoFPSaturate32(ImmValueList& inst_values, const ImmValueList& args) { + UNREACHABLE_MSG("FPSaturate32 not implemented"); +} + +void DoFPSaturate64(ImmValueList& inst_values, const ImmValueList& args) { + UNREACHABLE_MSG("FPSaturate64 not implemented"); +} + +void DoFPClamp32(ImmValueList& inst_values, const ImmValueList& args, const ImmValueList& mins, + const ImmValueList& maxs) { + Common::CartesianInvoke(ImmValue::Clamp, + std::insert_iterator(inst_values, inst_values.end()), args, mins, maxs); +} + +void DoFPClamp64(ImmValueList& inst_values, const ImmValueList& args, const ImmValueList& mins, + const ImmValueList& maxs) { + Common::CartesianInvoke(ImmValue::Clamp, + std::insert_iterator(inst_values, inst_values.end()), args, mins, maxs); +} + +void DoFPRoundEven32(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Round, + std::insert_iterator(inst_values, inst_values.end()), args); +} + +void DoFPRoundEven64(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Round, + std::insert_iterator(inst_values, inst_values.end()), args); +} + +void DoFPFloor32(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Floor, + std::insert_iterator(inst_values, inst_values.end()), args); +} + +void DoFPFloor64(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Floor, + std::insert_iterator(inst_values, inst_values.end()), args); +} + +void DoFPCeil32(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Ceil, + std::insert_iterator(inst_values, inst_values.end()), args); +} + +void DoFPCeil64(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Ceil, + std::insert_iterator(inst_values, inst_values.end()), args); +} + +void DoFPTrunc32(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Trunc, + std::insert_iterator(inst_values, inst_values.end()), args); +} + +void DoFPTrunc64(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Trunc, + std::insert_iterator(inst_values, inst_values.end()), args); +} + +void DoFPFract32(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Fract, + std::insert_iterator(inst_values, inst_values.end()), args); +} + +void DoFPFract64(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Fract, + std::insert_iterator(inst_values, inst_values.end()), args); +} + +void DoFPFrexpSig32(ImmValueList& inst_values, const ImmValueList& args) { + UNREACHABLE_MSG("FPFrexpSig32 not implemented"); +} + +void DoFPFrexpSig64(ImmValueList& inst_values, const ImmValueList& args) { + UNREACHABLE_MSG("FPFrexpSig64 not implemented"); +} + +void DoFPFrexpExp32(ImmValueList& inst_values, const ImmValueList& args) { + UNREACHABLE_MSG("FPFrexpExp32 not implemented"); +} + +void DoFPFrexpExp64(ImmValueList& inst_values, const ImmValueList& args) { + UNREACHABLE_MSG("FPFrexpExp64 not implemented"); +} + +} // namespace Shader::IR::ComputeValue \ No newline at end of file diff --git a/src/shader_recompiler/ir/compute_value/do_float_operations.h b/src/shader_recompiler/ir/compute_value/do_float_operations.h new file mode 100644 index 000000000..ffc8040d1 --- /dev/null +++ b/src/shader_recompiler/ir/compute_value/do_float_operations.h @@ -0,0 +1,62 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "shader_recompiler/ir/compute_value/compute.h" + +namespace Shader::IR::ComputeValue { + +void DoFPAbs32(ImmValueList& inst_values, const ImmValueList& args); +void DoFPAbs64(ImmValueList& inst_values, const ImmValueList& args); +void DoFPAdd32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); +void DoFPAdd64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); +void DoFPSub32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); +void DoFPFma32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, + const ImmValueList& args2); +void DoFPFma64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, + const ImmValueList& args2); +void DoFPMax32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, + const ImmValueList& args_legacy); +void DoFPMax64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); +void DoFPMin32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, + const ImmValueList& args_legacy); +void DoFPMin64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); +void DoFPMul32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); +void DoFPMul64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); +void DoFPDiv32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); +void DoFPDiv64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); +void DoFPNeg32(ImmValueList& inst_values, const ImmValueList& args); +void DoFPNeg64(ImmValueList& inst_values, const ImmValueList& args); +void DoFPRecip32(ImmValueList& inst_values, const ImmValueList& args); +void DoFPRecip64(ImmValueList& inst_values, const ImmValueList& args); +void DoFPRecipSqrt32(ImmValueList& inst_values, const ImmValueList& args); +void DoFPRecipSqrt64(ImmValueList& inst_values, const ImmValueList& args); +void DoFPSqrt(ImmValueList& inst_values, const ImmValueList& args); +void DoFPSin(ImmValueList& inst_values, const ImmValueList& args); +void DoFPExp2(ImmValueList& inst_values, const ImmValueList& args); +void DoFPLdexp(ImmValueList& inst_values, const ImmValueList& args, const ImmValueList& exponents); +void DoFPCos(ImmValueList& inst_values, const ImmValueList& args); +void DoFPLog2(ImmValueList& inst_values, const ImmValueList& args); +void DoFPSaturate32(ImmValueList& inst_values, const ImmValueList& args); +void DoFPSaturate64(ImmValueList& inst_values, const ImmValueList& args); +void DoFPClamp32(ImmValueList& inst_values, const ImmValueList& args, const ImmValueList& mins, + const ImmValueList& maxs); +void DoFPClamp64(ImmValueList& inst_values, const ImmValueList& args, const ImmValueList& mins, + const ImmValueList& maxs); +void DoFPRoundEven32(ImmValueList& inst_values, const ImmValueList& args); +void DoFPRoundEven64(ImmValueList& inst_values, const ImmValueList& args); +void DoFPFloor32(ImmValueList& inst_values, const ImmValueList& args); +void DoFPFloor64(ImmValueList& inst_values, const ImmValueList& args); +void DoFPCeil32(ImmValueList& inst_values, const ImmValueList& args); +void DoFPCeil64(ImmValueList& inst_values, const ImmValueList& args); +void DoFPTrunc32(ImmValueList& inst_values, const ImmValueList& args); +void DoFPTrunc64(ImmValueList& inst_values, const ImmValueList& args); +void DoFPFract32(ImmValueList& inst_values, const ImmValueList& args); +void DoFPFract64(ImmValueList& inst_values, const ImmValueList& args); +void DoFPFrexpSig32(ImmValueList& inst_values, const ImmValueList& args); +void DoFPFrexpSig64(ImmValueList& inst_values, const ImmValueList& args); +void DoFPFrexpExp32(ImmValueList& inst_values, const ImmValueList& args); +void DoFPFrexpExp64(ImmValueList& inst_values, const ImmValueList& args); + +} // namespace Shader::IR::ComputeValue \ No newline at end of file diff --git a/src/shader_recompiler/ir/compute_value/do_integer_operations.cpp b/src/shader_recompiler/ir/compute_value/do_integer_operations.cpp new file mode 100644 index 000000000..bcc101bde --- /dev/null +++ b/src/shader_recompiler/ir/compute_value/do_integer_operations.cpp @@ -0,0 +1,233 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/cartesian_invoke.h" +#include "shader_recompiler/ir/compute_value/do_integer_operations.h" + +namespace Shader::IR::ComputeValue { + +void DoIAdd32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { + Common::CartesianInvoke(ImmValue::Add, + std::insert_iterator(inst_values, inst_values.end()), args0, args1); +} + +void DoIAdd64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { + Common::CartesianInvoke(ImmValue::Add, + std::insert_iterator(inst_values, inst_values.end()), args0, args1); +} + +void DoIAddCary32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { + UNREACHABLE_MSG("IAddCary32 not implemented"); +} + +void DoISub32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { + Common::CartesianInvoke(ImmValue::Sub, + std::insert_iterator(inst_values, inst_values.end()), args0, args1); +} + +void DoISub64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { + Common::CartesianInvoke(ImmValue::Sub, + std::insert_iterator(inst_values, inst_values.end()), args0, args1); +} + +void DoIMul32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { + Common::CartesianInvoke(ImmValue::Mul, + std::insert_iterator(inst_values, inst_values.end()), args0, args1); +} + +void DoIMul64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { + Common::CartesianInvoke(ImmValue::Mul, + std::insert_iterator(inst_values, inst_values.end()), args0, args1); +} + +void DoSMulExt(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { + UNREACHABLE_MSG("SMulExt not implemented"); +} + +void DoUMulExt(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { + UNREACHABLE_MSG("UMulExt not implemented"); +} + +void DoSDiv32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { + Common::CartesianInvoke(ImmValue::Div, + std::insert_iterator(inst_values, inst_values.end()), args0, args1); +} + +void DoUDiv32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { + Common::CartesianInvoke(ImmValue::Div, + std::insert_iterator(inst_values, inst_values.end()), args0, args1); +} + +void DoSMod32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { + Common::CartesianInvoke(ImmValue::Mod, + std::insert_iterator(inst_values, inst_values.end()), args0, args1); +} + +void DoUMod32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { + Common::CartesianInvoke(ImmValue::Mod, + std::insert_iterator(inst_values, inst_values.end()), args0, args1); +} + +void DoINeg32(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Neg, + std::insert_iterator(inst_values, inst_values.end()), args); +} + +void DoINeg64(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Neg, + std::insert_iterator(inst_values, inst_values.end()), args); +} + +void DoIAbs32(ImmValueList& inst_values, const ImmValueList& args) { + Common::CartesianInvoke(ImmValue::Abs, + std::insert_iterator(inst_values, inst_values.end()), args); +} + +void DoShiftLeftLogical32(ImmValueList& inst_values, const ImmValueList& args, + const ImmValueList& shift) { + Common::CartesianInvoke(ImmValue::LShift, + std::insert_iterator(inst_values, inst_values.end()), args, shift); +} + +void DoShiftLeftLogical64(ImmValueList& inst_values, const ImmValueList& args, + const ImmValueList& shift) { + Common::CartesianInvoke(ImmValue::LShift, + std::insert_iterator(inst_values, inst_values.end()), args, shift); +} + +void DoShiftRightLogical32(ImmValueList& inst_values, const ImmValueList& args, + const ImmValueList& shift) { + Common::CartesianInvoke(ImmValue::RShift, + std::insert_iterator(inst_values, inst_values.end()), args, shift); +} + +void DoShiftRightLogical64(ImmValueList& inst_values, const ImmValueList& args, + const ImmValueList& shift) { + Common::CartesianInvoke(ImmValue::RShift, + std::insert_iterator(inst_values, inst_values.end()), args, shift); +} + +void DoShiftRightArithmetic32(ImmValueList& inst_values, const ImmValueList& args, + const ImmValueList& shift) { + Common::CartesianInvoke(ImmValue::RShift, + std::insert_iterator(inst_values, inst_values.end()), args, shift); +} + +void DoShiftRightArithmetic64(ImmValueList& inst_values, const ImmValueList& args, + const ImmValueList& shift) { + Common::CartesianInvoke(ImmValue::RShift, + std::insert_iterator(inst_values, inst_values.end()), args, shift); +} + +void DoBitwiseAnd32(ImmValueList& inst_values, const ImmValueList& args0, + const ImmValueList& args1) { + Common::CartesianInvoke(ImmValue::And, + std::insert_iterator(inst_values, inst_values.end()), args0, args1); +} + +void DoBitwiseAnd64(ImmValueList& inst_values, const ImmValueList& args0, + const ImmValueList& args1) { + Common::CartesianInvoke(ImmValue::And, + std::insert_iterator(inst_values, inst_values.end()), args0, args1); +} + +void DoBitwiseOr32(ImmValueList& inst_values, const ImmValueList& args0, + const ImmValueList& args1) { + Common::CartesianInvoke(ImmValue::Or, + std::insert_iterator(inst_values, inst_values.end()), args0, args1); +} + +void DoBitwiseOr64(ImmValueList& inst_values, const ImmValueList& args0, + const ImmValueList& args1) { + Common::CartesianInvoke(ImmValue::Or, + std::insert_iterator(inst_values, inst_values.end()), args0, args1); +} + +void DoBitwiseXor32(ImmValueList& inst_values, const ImmValueList& args0, + const ImmValueList& args1) { + Common::CartesianInvoke(ImmValue::Xor, + std::insert_iterator(inst_values, inst_values.end()), args0, args1); +} + +void DoBitFieldInsert(ImmValueList& inst_values, const ImmValueList& arg, + const ImmValueList& insert, const ImmValueList& offset, + const ImmValueList& count) { + UNREACHABLE_MSG("BitFieldInsert not implemented"); +} + +void DoBitFieldSExtract(ImmValueList& inst_values, const ImmValueList& arg, + const ImmValueList& offset, const ImmValueList& count) { + UNREACHABLE_MSG("BitFieldSExtract not implemented"); +} + +void DoBitFieldUExtract(ImmValueList& inst_values, const ImmValueList& arg, + const ImmValueList& offset, const ImmValueList& count) { + UNREACHABLE_MSG("BitFieldUExtract not implemented"); +} + +void DoBitReverse32(ImmValueList& inst_values, const ImmValueList& arg) { + UNREACHABLE_MSG("BitReverse32 not implemented"); +} + +void DoBitCount32(ImmValueList& inst_values, const ImmValueList& arg) { + UNREACHABLE_MSG("BitCount32 not implemented"); +} + +void DoBitCount64(ImmValueList& inst_values, const ImmValueList& arg) { + UNREACHABLE_MSG("BitCount64 not implemented"); +} + +void DoBitwiseNot32(ImmValueList& inst_values, const ImmValueList& arg) { + Common::CartesianInvoke(ImmValue::Not, + std::insert_iterator(inst_values, inst_values.end()), arg); +} + +void DoFindSMsb32(ImmValueList& inst_values, const ImmValueList& arg) { + UNREACHABLE_MSG("FindSMsb32 not implemented"); +} + +void DoFindUMsb32(ImmValueList& inst_values, const ImmValueList& arg) { + UNREACHABLE_MSG("FindUMsb32 not implemented"); +} + +void DoFindILsb32(ImmValueList& inst_values, const ImmValueList& arg) { + UNREACHABLE_MSG("FindILsb32 not implemented"); +} + +void DoFindILsb64(ImmValueList& inst_values, const ImmValueList& arg) { + UNREACHABLE_MSG("FindILsb64 not implemented"); +} + +void DoSMin32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { + Common::CartesianInvoke(ImmValue::Min, + std::insert_iterator(inst_values, inst_values.end()), args0, args1); +} + +void DoUMin32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { + Common::CartesianInvoke(ImmValue::Min, + std::insert_iterator(inst_values, inst_values.end()), args0, args1); +} + +void DoSMax32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { + Common::CartesianInvoke(ImmValue::Max, + std::insert_iterator(inst_values, inst_values.end()), args0, args1); +} + +void DoUMax32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { + Common::CartesianInvoke(ImmValue::Max, + std::insert_iterator(inst_values, inst_values.end()), args0, args1); +} + +void DoSClamp32(ImmValueList& inst_values, const ImmValueList& value, const ImmValueList& min, + const ImmValueList& max) { + Common::CartesianInvoke(ImmValue::Clamp, + std::insert_iterator(inst_values, inst_values.end()), value, min, max); +} + +void DoUClamp32(ImmValueList& inst_values, const ImmValueList& value, const ImmValueList& min, + const ImmValueList& max) { + Common::CartesianInvoke(ImmValue::Clamp, + std::insert_iterator(inst_values, inst_values.end()), value, min, max); +} + +} // namespace Shader::IR::ComputeValue \ No newline at end of file diff --git a/src/shader_recompiler/ir/compute_value/do_integer_operations.h b/src/shader_recompiler/ir/compute_value/do_integer_operations.h new file mode 100644 index 000000000..e698f2b12 --- /dev/null +++ b/src/shader_recompiler/ir/compute_value/do_integer_operations.h @@ -0,0 +1,70 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "shader_recompiler/ir/compute_value/compute.h" + +namespace Shader::IR::ComputeValue { + +void DoIAdd32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); +void DoIAdd64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); +void DoIAddCary32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); +void DoISub32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); +void DoISub64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); +void DoIMul32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); +void DoIMul64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); +void DoSMulExt(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); +void DoUMulExt(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); +void DoSDiv32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); +void DoUDiv32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); +void DoSMod32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); +void DoUMod32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); +void DoINeg32(ImmValueList& inst_values, const ImmValueList& args); +void DoINeg64(ImmValueList& inst_values, const ImmValueList& args); +void DoIAbs32(ImmValueList& inst_values, const ImmValueList& args); +void DoShiftLeftLogical32(ImmValueList& inst_values, const ImmValueList& args, + const ImmValueList& shift); +void DoShiftLeftLogical64(ImmValueList& inst_values, const ImmValueList& args, + const ImmValueList& shift); +void DoShiftRightLogical32(ImmValueList& inst_values, const ImmValueList& args, + const ImmValueList& shift); +void DoShiftRightLogical64(ImmValueList& inst_values, const ImmValueList& args, + const ImmValueList& shift); +void DoShiftRightArithmetic32(ImmValueList& inst_values, const ImmValueList& args, + const ImmValueList& shift); +void DoShiftRightArithmetic64(ImmValueList& inst_values, const ImmValueList& args, + const ImmValueList& shift); +void DoBitwiseAnd32(ImmValueList& inst_values, const ImmValueList& args0, + const ImmValueList& args1); +void DoBitwiseAnd64(ImmValueList& inst_values, const ImmValueList& args0, + const ImmValueList& args1); +void DoBitwiseOr32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); +void DoBitwiseOr64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); +void DoBitwiseXor32(ImmValueList& inst_values, const ImmValueList& args0, + const ImmValueList& args1); +void DoBitFieldInsert(ImmValueList& inst_values, const ImmValueList& arg, + const ImmValueList& insert, const ImmValueList& offset, + const ImmValueList& count); +void DoBitFieldSExtract(ImmValueList& inst_values, const ImmValueList& arg, + const ImmValueList& offset, const ImmValueList& count); +void DoBitFieldUExtract(ImmValueList& inst_values, const ImmValueList& arg, + const ImmValueList& offset, const ImmValueList& count); +void DoBitReverse32(ImmValueList& inst_values, const ImmValueList& arg); +void DoBitCount32(ImmValueList& inst_values, const ImmValueList& arg); +void DoBitCount64(ImmValueList& inst_values, const ImmValueList& arg); +void DoBitwiseNot32(ImmValueList& inst_values, const ImmValueList& arg); +void DoFindSMsb32(ImmValueList& inst_values, const ImmValueList& arg); +void DoFindUMsb32(ImmValueList& inst_values, const ImmValueList& arg); +void DoFindILsb32(ImmValueList& inst_values, const ImmValueList& arg); +void DoFindILsb64(ImmValueList& inst_values, const ImmValueList& arg); +void DoSMin32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); +void DoUMin32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); +void DoSMax32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); +void DoUMax32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); +void DoSClamp32(ImmValueList& inst_values, const ImmValueList& value, const ImmValueList& min, + const ImmValueList& max); +void DoUClamp32(ImmValueList& inst_values, const ImmValueList& value, const ImmValueList& min, + const ImmValueList& max); + +} // namespace Shader::IR::ComputeValue \ No newline at end of file diff --git a/src/shader_recompiler/ir/compute_value/do_logical_operations.cpp b/src/shader_recompiler/ir/compute_value/do_logical_operations.cpp new file mode 100644 index 000000000..8b494aafa --- /dev/null +++ b/src/shader_recompiler/ir/compute_value/do_logical_operations.cpp @@ -0,0 +1,29 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/cartesian_invoke.h" +#include "shader_recompiler/ir/compute_value/do_logical_operations.h" + +namespace Shader::IR::ComputeValue { + +void DoLogicalOr(ImmValueList& inst_values, const ImmValueList& arg1, const ImmValueList& arg2) { + Common::CartesianInvoke(ImmValue::Or, + std::insert_iterator(inst_values, inst_values.end()), arg1, arg2); +} + +void DoLogicalAnd(ImmValueList& inst_values, const ImmValueList& arg1, const ImmValueList& arg2) { + Common::CartesianInvoke(ImmValue::And, + std::insert_iterator(inst_values, inst_values.end()), arg1, arg2); +} + +void DoLogicalXor(ImmValueList& inst_values, const ImmValueList& arg1, const ImmValueList& arg2) { + Common::CartesianInvoke(ImmValue::Xor, + std::insert_iterator(inst_values, inst_values.end()), arg1, arg2); +} + +void DoLogicalNot(ImmValueList& inst_values, const ImmValueList& arg1) { + Common::CartesianInvoke(ImmValue::Not, + std::insert_iterator(inst_values, inst_values.end()), arg1); +} + +} // namespace Shader::IR::ComputeValue \ No newline at end of file diff --git a/src/shader_recompiler/ir/compute_value/do_logical_operations.h b/src/shader_recompiler/ir/compute_value/do_logical_operations.h new file mode 100644 index 000000000..1e2b3dca2 --- /dev/null +++ b/src/shader_recompiler/ir/compute_value/do_logical_operations.h @@ -0,0 +1,15 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "shader_recompiler/ir/compute_value/compute.h" + +namespace Shader::IR::ComputeValue { + +void DoLogicalOr(ImmValueList& inst_values, const ImmValueList& arg1, const ImmValueList& arg2); +void DoLogicalAnd(ImmValueList& inst_values, const ImmValueList& arg1, const ImmValueList& arg2); +void DoLogicalXor(ImmValueList& inst_values, const ImmValueList& arg1, const ImmValueList& arg2); +void DoLogicalNot(ImmValueList& inst_values, const ImmValueList& arg1); + +} // namespace Shader::IR::ComputeValue \ No newline at end of file diff --git a/src/shader_recompiler/ir/compute_value/do_nop_functions.h b/src/shader_recompiler/ir/compute_value/do_nop_functions.h new file mode 100644 index 000000000..69acced68 --- /dev/null +++ b/src/shader_recompiler/ir/compute_value/do_nop_functions.h @@ -0,0 +1,210 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +namespace Shader::IR::ComputeValue { + +#define NOP_FUNCTION(name) inline void Do##name(ImmValueList& inst_values) {} + +NOP_FUNCTION(Phi) +NOP_FUNCTION(Identity) +NOP_FUNCTION(Void) +NOP_FUNCTION(ConditionRef) +NOP_FUNCTION(Reference) +NOP_FUNCTION(PhiMove) + +NOP_FUNCTION(Prologue) +NOP_FUNCTION(Epilogue) +NOP_FUNCTION(Discard) +NOP_FUNCTION(DiscardCond) +NOP_FUNCTION(DebugPrint) + +NOP_FUNCTION(ReadConst) +NOP_FUNCTION(ReadConstBuffer) + +NOP_FUNCTION(Barrier) +NOP_FUNCTION(WorkgroupMemoryBarrier) +NOP_FUNCTION(DeviceMemoryBarrier) + +NOP_FUNCTION(EmitVertex) +NOP_FUNCTION(EmitPrimitive) + +NOP_FUNCTION(LoadSharedU32) +NOP_FUNCTION(LoadSharedU64) +NOP_FUNCTION(WriteSharedU32) +NOP_FUNCTION(WriteSharedU64) + +NOP_FUNCTION(SharedAtomicIAdd32) +NOP_FUNCTION(SharedAtomicSMin32) +NOP_FUNCTION(SharedAtomicUMin32) +NOP_FUNCTION(SharedAtomicSMax32) +NOP_FUNCTION(SharedAtomicUMax32) +NOP_FUNCTION(SharedAtomicAnd32) +NOP_FUNCTION(SharedAtomicOr32) +NOP_FUNCTION(SharedAtomicXor32) + +NOP_FUNCTION(GetUserData) +NOP_FUNCTION(GetThreadBitScalarReg) +NOP_FUNCTION(SetThreadBitScalarReg) +NOP_FUNCTION(GetScalarRegister) +NOP_FUNCTION(SetScalarRegister) +NOP_FUNCTION(GetVectorRegister) +NOP_FUNCTION(SetVectorRegister) +NOP_FUNCTION(GetGotoVariable) +NOP_FUNCTION(SetGotoVariable) +NOP_FUNCTION(GetAttribute) +NOP_FUNCTION(GetAttributeU32) +NOP_FUNCTION(SetAttribute) +NOP_FUNCTION(GetPatch) +NOP_FUNCTION(SetPatch) +NOP_FUNCTION(GetTessGenericAttribute) +NOP_FUNCTION(SetTcsGenericAttribute) +NOP_FUNCTION(ReadTcsGenericOuputAttribute) + +NOP_FUNCTION(GetScc) +NOP_FUNCTION(GetExec) +NOP_FUNCTION(GetVcc) +NOP_FUNCTION(GetVccLo) +NOP_FUNCTION(GetVccHi) +NOP_FUNCTION(GetM0) +NOP_FUNCTION(SetScc) +NOP_FUNCTION(SetExec) +NOP_FUNCTION(SetVcc) +NOP_FUNCTION(SetSccLo) +NOP_FUNCTION(SetVccLo) +NOP_FUNCTION(SetVccHi) +NOP_FUNCTION(SetM0) + +NOP_FUNCTION(UndefU1) +NOP_FUNCTION(UndefU8) +NOP_FUNCTION(UndefU16) +NOP_FUNCTION(UndefU32) +NOP_FUNCTION(UndefU64) + +NOP_FUNCTION(LoadBufferU8) +NOP_FUNCTION(LoadBufferU16) +NOP_FUNCTION(LoadBufferU32) +NOP_FUNCTION(LoadBufferU32x2) +NOP_FUNCTION(LoadBufferU32x3) +NOP_FUNCTION(LoadBufferU32x4) +NOP_FUNCTION(LoadBufferF32) +NOP_FUNCTION(LoadBufferF32x2) +NOP_FUNCTION(LoadBufferF32x3) +NOP_FUNCTION(LoadBufferF32x4) +NOP_FUNCTION(LoadBufferFormatF32) +NOP_FUNCTION(StoreBufferU8) +NOP_FUNCTION(StoreBufferU16) +NOP_FUNCTION(StoreBufferU32) +NOP_FUNCTION(StoreBufferU32x2) +NOP_FUNCTION(StoreBufferU32x3) +NOP_FUNCTION(StoreBufferU32x4) +NOP_FUNCTION(StoreBufferF32) +NOP_FUNCTION(StoreBufferF32x2) +NOP_FUNCTION(StoreBufferF32x3) +NOP_FUNCTION(StoreBufferF32x4) +NOP_FUNCTION(StoreBufferFormatF32) + +NOP_FUNCTION(BufferAtomicIAdd32) +NOP_FUNCTION(BufferAtomicSMin32) +NOP_FUNCTION(BufferAtomicUMin32) +NOP_FUNCTION(BufferAtomicSMax32) +NOP_FUNCTION(BufferAtomicUMax32) +NOP_FUNCTION(BufferAtomicInc32) +NOP_FUNCTION(BufferAtomicDec32) +NOP_FUNCTION(BufferAtomicAnd32) +NOP_FUNCTION(BufferAtomicOr32) +NOP_FUNCTION(BufferAtomicXor32) +NOP_FUNCTION(BufferAtomicSwap32) + +// Select instructions are handled separately +NOP_FUNCTION(SelectU1) +NOP_FUNCTION(SelectU8) +NOP_FUNCTION(SelectU16) +NOP_FUNCTION(SelectU32) +NOP_FUNCTION(SelectU64) +NOP_FUNCTION(SelectF32) +NOP_FUNCTION(SelectF64) + +NOP_FUNCTION(FPOrdEqual32) +NOP_FUNCTION(FPOrdEqual64) +NOP_FUNCTION(FPUnordEqual32) +NOP_FUNCTION(FPUnordEqual64) +NOP_FUNCTION(FPOrdNotEqual32) +NOP_FUNCTION(FPOrdNotEqual64) +NOP_FUNCTION(FPUnordNotEqual32) +NOP_FUNCTION(FPUnordNotEqual64) +NOP_FUNCTION(FPOrdLessThan32) +NOP_FUNCTION(FPOrdLessThan64) +NOP_FUNCTION(FPUnordLessThan32) +NOP_FUNCTION(FPUnordLessThan64) +NOP_FUNCTION(FPOrdGreaterThan32) +NOP_FUNCTION(FPOrdGreaterThan64) +NOP_FUNCTION(FPUnordGreaterThan32) +NOP_FUNCTION(FPUnordGreaterThan64) +NOP_FUNCTION(FPOrdLessThanEqual32) +NOP_FUNCTION(FPOrdLessThanEqual64) +NOP_FUNCTION(FPUnordLessThanEqual32) +NOP_FUNCTION(FPUnordLessThanEqual64) +NOP_FUNCTION(FPOrdGreaterThanEqual32) +NOP_FUNCTION(FPOrdGreaterThanEqual64) +NOP_FUNCTION(FPUnordGreaterThanEqual32) +NOP_FUNCTION(FPUnordGreaterThanEqual64) +NOP_FUNCTION(FPIsNan32) +NOP_FUNCTION(FPIsNan64) +NOP_FUNCTION(FPIsInf32) +NOP_FUNCTION(FPIsInf64) +NOP_FUNCTION(FPCmpClass32) + +NOP_FUNCTION(SLessThan32) +NOP_FUNCTION(SLessThan64) +NOP_FUNCTION(ULessThan32) +NOP_FUNCTION(ULessThan64) +NOP_FUNCTION(IEqual32) +NOP_FUNCTION(IEqual64) +NOP_FUNCTION(SLessThanEqual) +NOP_FUNCTION(ULessThanEqual) +NOP_FUNCTION(SGreaterThan) +NOP_FUNCTION(UGreaterThan) +NOP_FUNCTION(INotEqual32) +NOP_FUNCTION(INotEqual64) +NOP_FUNCTION(SGreaterThanEqual) +NOP_FUNCTION(UGreaterThanEqual) + +NOP_FUNCTION(ImageSampleRaw) +NOP_FUNCTION(ImageSampleImplicitLod) +NOP_FUNCTION(ImageSampleExplicitLod) +NOP_FUNCTION(ImageSampleDrefImplicitLod) +NOP_FUNCTION(ImageSampleDrefExplicitLod) +NOP_FUNCTION(ImageGather) +NOP_FUNCTION(ImageGatherDref) +NOP_FUNCTION(ImageQueryDimensions) +NOP_FUNCTION(ImageQueryLod) +NOP_FUNCTION(ImageGradient) +NOP_FUNCTION(ImageRead) +NOP_FUNCTION(ImageWrite) + +NOP_FUNCTION(ImageAtomicIAdd32) +NOP_FUNCTION(ImageAtomicSMin32) +NOP_FUNCTION(ImageAtomicUMin32) +NOP_FUNCTION(ImageAtomicSMax32) +NOP_FUNCTION(ImageAtomicUMax32) +NOP_FUNCTION(ImageAtomicInc32) +NOP_FUNCTION(ImageAtomicDec32) +NOP_FUNCTION(ImageAtomicAnd32) +NOP_FUNCTION(ImageAtomicOr32) +NOP_FUNCTION(ImageAtomicXor32) +NOP_FUNCTION(ImageAtomicExchange32) + +NOP_FUNCTION(CubeFaceIndex) + +NOP_FUNCTION(LaneId) +NOP_FUNCTION(WarpId) +NOP_FUNCTION(QuadShuffle) +NOP_FUNCTION(ReadFirstLane) +NOP_FUNCTION(ReadLane) +NOP_FUNCTION(WriteLane) +NOP_FUNCTION(DataAppend) +NOP_FUNCTION(DataConsume) + +#undef NOP_FUNCTION + +} // namespace Shader::IR::ComputeValue diff --git a/src/shader_recompiler/ir/compute_value/do_packing.cpp b/src/shader_recompiler/ir/compute_value/do_packing.cpp new file mode 100644 index 000000000..9d5169f43 --- /dev/null +++ b/src/shader_recompiler/ir/compute_value/do_packing.cpp @@ -0,0 +1,132 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/ir/compute_value/do_packing.h" + +namespace Shader::IR::ComputeValue { + +void DoPackUint2x32(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoUnpackUint2x32(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoPackFloat2x32(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoPackUnorm2x16(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoUnpackUnorm2x16(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoPackSnorm2x16(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoUnpackSnorm2x16(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoPackUint2x16(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoUnpackUint2x16(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoPackSint2x16(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoUnpackSint2x16(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoPackHalf2x16(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoUnpackHalf2x16(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoPackUnorm4x8(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoUnpackUnorm4x8(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoPackSnorm4x8(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoUnpackSnorm4x8(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoPackUint4x8(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoUnpackUint4x8(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoPackSint4x8(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoUnpackSint4x8(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoPackUfloat10_11_11(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoUnpackUfloat10_11_11(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoPackUnorm2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoUnpackUnorm2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoPackSnorm2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoUnpackSnorm2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoPackUint2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoUnpackUint2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoPackSint2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +void DoUnpackSint2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0) { + UNREACHABLE_MSG("Unimplemented"); +} + +} // namespace Shader::IR::ComputeValue diff --git a/src/shader_recompiler/ir/compute_value/do_packing.h b/src/shader_recompiler/ir/compute_value/do_packing.h new file mode 100644 index 000000000..9699894f4 --- /dev/null +++ b/src/shader_recompiler/ir/compute_value/do_packing.h @@ -0,0 +1,42 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "shader_recompiler/ir/compute_value/compute.h" + +namespace Shader::IR::ComputeValue { + +void DoPackUint2x32(ImmValueList& inst_values, const ImmValueList& args0); +void DoUnpackUint2x32(ImmValueList& inst_values, const ImmValueList& args0); +void DoPackFloat2x32(ImmValueList& inst_values, const ImmValueList& args0); +void DoPackUnorm2x16(ImmValueList& inst_values, const ImmValueList& args0); +void DoUnpackUnorm2x16(ImmValueList& inst_values, const ImmValueList& args0); +void DoPackSnorm2x16(ImmValueList& inst_values, const ImmValueList& args0); +void DoUnpackSnorm2x16(ImmValueList& inst_values, const ImmValueList& args0); +void DoPackUint2x16(ImmValueList& inst_values, const ImmValueList& args0); +void DoUnpackUint2x16(ImmValueList& inst_values, const ImmValueList& args0); +void DoPackSint2x16(ImmValueList& inst_values, const ImmValueList& args0); +void DoUnpackSint2x16(ImmValueList& inst_values, const ImmValueList& args0); +void DoPackHalf2x16(ImmValueList& inst_values, const ImmValueList& args0); +void DoUnpackHalf2x16(ImmValueList& inst_values, const ImmValueList& args0); +void DoPackUnorm4x8(ImmValueList& inst_values, const ImmValueList& args0); +void DoUnpackUnorm4x8(ImmValueList& inst_values, const ImmValueList& args0); +void DoPackSnorm4x8(ImmValueList& inst_values, const ImmValueList& args0); +void DoUnpackSnorm4x8(ImmValueList& inst_values, const ImmValueList& args0); +void DoPackUint4x8(ImmValueList& inst_values, const ImmValueList& args0); +void DoUnpackUint4x8(ImmValueList& inst_values, const ImmValueList& args0); +void DoPackSint4x8(ImmValueList& inst_values, const ImmValueList& args0); +void DoUnpackSint4x8(ImmValueList& inst_values, const ImmValueList& args0); +void DoPackUfloat10_11_11(ImmValueList& inst_values, const ImmValueList& args0); +void DoUnpackUfloat10_11_11(ImmValueList& inst_values, const ImmValueList& args0); +void DoPackUnorm2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0); +void DoUnpackUnorm2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0); +void DoPackSnorm2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0); +void DoUnpackSnorm2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0); +void DoPackUint2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0); +void DoUnpackUint2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0); +void DoPackSint2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0); +void DoUnpackSint2_10_10_10(ImmValueList& inst_values, const ImmValueList& args0); + +} // namespace Shader::IR::ComputeValue diff --git a/src/shader_recompiler/ir/compute_value/imm_value.cpp b/src/shader_recompiler/ir/compute_value/imm_value.cpp index e94533e57..d92aed43c 100644 --- a/src/shader_recompiler/ir/compute_value/imm_value.cpp +++ b/src/shader_recompiler/ir/compute_value/imm_value.cpp @@ -4,11 +4,10 @@ #include "common/hash.h" #include "shader_recompiler/ir/compute_value/imm_value.h" -namespace Shader::IR { +namespace Shader::IR::ComputeValue { ImmValue::ImmValue(const IR::Value& value) noexcept { - type = value.Type(); - switch (type) { + switch (value.Type()) { case Type::U1: imm_values[0].imm_u1 = value.U1(); break; @@ -31,1423 +30,1196 @@ ImmValue::ImmValue(const IR::Value& value) noexcept { imm_values[0].imm_f64 = value.F64(); break; default: - UNREACHABLE_MSG("Invalid type {}", type); + UNREACHABLE_MSG("Invalid type {}", value.Type()); } } -ImmValue::ImmValue(bool value) noexcept : type{Type::U1}, is_signed{false} { +ImmValue::ImmValue(bool value) noexcept { imm_values[0].imm_u1 = value; } -ImmValue::ImmValue(u8 value) noexcept : type{Type::U8}, is_signed{false} { +ImmValue::ImmValue(u8 value) noexcept { imm_values[0].imm_u8 = value; } -ImmValue::ImmValue(s8 value) noexcept : type{Type::U8}, is_signed{true} { +ImmValue::ImmValue(s8 value) noexcept { imm_values[0].imm_s8 = value; } -ImmValue::ImmValue(u16 value) noexcept : type{Type::U16}, is_signed{false} { +ImmValue::ImmValue(u16 value) noexcept { imm_values[0].imm_u16 = value; } -ImmValue::ImmValue(s16 value) noexcept : type{Type::U16}, is_signed{true} { +ImmValue::ImmValue(s16 value) noexcept { imm_values[0].imm_s16 = value; } -ImmValue::ImmValue(u32 value) noexcept : type{Type::U32}, is_signed{false} { +ImmValue::ImmValue(u32 value) noexcept { imm_values[0].imm_u32 = value; } -ImmValue::ImmValue(s32 value) noexcept : type{Type::U32}, is_signed{true} { +ImmValue::ImmValue(s32 value) noexcept { imm_values[0].imm_s32 = value; } -ImmValue::ImmValue(f32 value) noexcept : type{Type::F32}, is_signed{true} { +ImmValue::ImmValue(f32 value) noexcept { imm_values[0].imm_f32 = value; } -ImmValue::ImmValue(u64 value) noexcept : type{Type::U64}, is_signed{false} { +ImmValue::ImmValue(u64 value) noexcept { imm_values[0].imm_u64 = value; } -ImmValue::ImmValue(s64 value) noexcept : type{Type::U64}, is_signed{true} { +ImmValue::ImmValue(s64 value) noexcept { imm_values[0].imm_s64 = value; } -ImmValue::ImmValue(f64 value) noexcept : type{Type::F64}, is_signed{true} { +ImmValue::ImmValue(f64 value) noexcept { imm_values[0].imm_f64 = value; } -ImmValue::ImmValue(u32 value1, u32 value2) noexcept : type{Type::U32x2}, is_signed{false} { +ImmValue::ImmValue(u32 value1, u32 value2) noexcept { imm_values[0].imm_u32 = value1; imm_values[1].imm_u32 = value2; } -ImmValue::ImmValue(u32 value1, u32 value2, u32 value3) noexcept - : type{Type::U32x3}, is_signed{false} { +ImmValue::ImmValue(u32 value1, u32 value2, u32 value3) noexcept { imm_values[0].imm_u32 = value1; imm_values[1].imm_u32 = value2; imm_values[2].imm_u32 = value3; } -ImmValue::ImmValue(u32 value1, u32 value2, u32 value3, u32 value4) noexcept - : type{Type::U32x4}, is_signed{false} { +ImmValue::ImmValue(u32 value1, u32 value2, u32 value3, u32 value4) noexcept { imm_values[0].imm_u32 = value1; imm_values[1].imm_u32 = value2; imm_values[2].imm_u32 = value3; imm_values[3].imm_u32 = value4; } -ImmValue::ImmValue(s32 value1, s32 value2) noexcept : type{Type::U32x2}, is_signed{true} { +ImmValue::ImmValue(s32 value1, s32 value2) noexcept { imm_values[0].imm_s32 = value1; imm_values[1].imm_s32 = value2; } -ImmValue::ImmValue(s32 value1, s32 value2, s32 value3) noexcept - : type{Type::U32x3}, is_signed{true} { +ImmValue::ImmValue(s32 value1, s32 value2, s32 value3) noexcept { imm_values[0].imm_s32 = value1; imm_values[1].imm_s32 = value2; imm_values[2].imm_s32 = value3; } -ImmValue::ImmValue(s32 value1, s32 value2, s32 value3, s32 value4) noexcept - : type{Type::U32x4}, is_signed{true} { +ImmValue::ImmValue(s32 value1, s32 value2, s32 value3, s32 value4) noexcept { imm_values[0].imm_s32 = value1; imm_values[1].imm_s32 = value2; imm_values[2].imm_s32 = value3; imm_values[3].imm_s32 = value4; } -ImmValue::ImmValue(f32 value1, f32 value2) noexcept : type{Type::F32x2}, is_signed{true} { +ImmValue::ImmValue(f32 value1, f32 value2) noexcept { imm_values[0].imm_f32 = value1; imm_values[1].imm_f32 = value2; } -ImmValue::ImmValue(f32 value1, f32 value2, f32 value3) noexcept - : type{Type::F32x3}, is_signed{true} { +ImmValue::ImmValue(f32 value1, f32 value2, f32 value3) noexcept { imm_values[0].imm_f32 = value1; imm_values[1].imm_f32 = value2; imm_values[2].imm_f32 = value3; } -ImmValue::ImmValue(f32 value1, f32 value2, f32 value3, f32 value4) noexcept - : type{Type::F32x4}, is_signed{true} { +ImmValue::ImmValue(f32 value1, f32 value2, f32 value3, f32 value4) noexcept { imm_values[0].imm_f32 = value1; imm_values[1].imm_f32 = value2; imm_values[2].imm_f32 = value3; imm_values[3].imm_f32 = value4; } -ImmValue::ImmValue(f64 value1, f64 value2) noexcept : type{Type::F64x2}, is_signed{true} { +ImmValue::ImmValue(u64 value1, u64 value2) noexcept { + imm_values[0].imm_u64 = value1; + imm_values[1].imm_u64 = value2; +} + +ImmValue::ImmValue(u64 value1, u64 value2, u64 value3) noexcept { + imm_values[0].imm_u64 = value1; + imm_values[1].imm_u64 = value2; + imm_values[2].imm_u64 = value3; +} + +ImmValue::ImmValue(u64 value1, u64 value2, u64 value3, u64 value4) noexcept { + imm_values[0].imm_u64 = value1; + imm_values[1].imm_u64 = value2; + imm_values[2].imm_u64 = value3; + imm_values[3].imm_u64 = value4; +} + +ImmValue::ImmValue(s64 value1, s64 value2) noexcept { + imm_values[0].imm_s64 = value1; + imm_values[1].imm_s64 = value2; +} + +ImmValue::ImmValue(s64 value1, s64 value2, s64 value3) noexcept { + imm_values[0].imm_s64 = value1; + imm_values[1].imm_s64 = value2; + imm_values[2].imm_s64 = value3; +} + +ImmValue::ImmValue(s64 value1, s64 value2, s64 value3, s64 value4) noexcept { + imm_values[0].imm_s64 = value1; + imm_values[1].imm_s64 = value2; + imm_values[2].imm_s64 = value3; + imm_values[3].imm_s64 = value4; +} + +ImmValue::ImmValue(f64 value1, f64 value2) noexcept { imm_values[0].imm_f64 = value1; imm_values[1].imm_f64 = value2; } -ImmValue::ImmValue(f64 value1, f64 value2, f64 value3) noexcept - : type{Type::F64x3}, is_signed{true} { +ImmValue::ImmValue(f64 value1, f64 value2, f64 value3) noexcept { imm_values[0].imm_f64 = value1; imm_values[1].imm_f64 = value2; imm_values[2].imm_f64 = value3; } -ImmValue::ImmValue(f64 value1, f64 value2, f64 value3, f64 value4) noexcept - : type{Type::F64x4}, is_signed{true} { +ImmValue::ImmValue(f64 value1, f64 value2, f64 value3, f64 value4) noexcept { imm_values[0].imm_f64 = value1; imm_values[1].imm_f64 = value2; imm_values[2].imm_f64 = value3; imm_values[3].imm_f64 = value4; } -ImmValue::ImmValue(const ImmValue& value1, const ImmValue& value2) noexcept - : type{value1.type}, is_signed{value1.is_signed} { - ASSERT(value1.type == value2.type && value1.is_signed == value2.is_signed); - switch (value1.Dimensions()) { - case 1: - imm_values[0] = value1.imm_values[0]; - imm_values[1] = value2.imm_values[0]; - break; - case 2: - imm_values[0] = value1.imm_values[0]; - imm_values[1] = value1.imm_values[1]; - imm_values[2] = value2.imm_values[0]; - imm_values[3] = value2.imm_values[1]; - break; - default: - UNREACHABLE_MSG("Invalid dimensions {}", value1.Dimensions()); - } +ImmValue::ImmValue(const ImmValue& value1, const ImmValue& value2) noexcept { + imm_values[0] = value1.imm_values[0]; + imm_values[1] = value2.imm_values[0]; } -ImmValue::ImmValue(const ImmValue& value1, const ImmValue& value2, const ImmValue& value3) noexcept - : type{value1.type}, is_signed{value1.is_signed} { - ASSERT(value1.type == value2.type && value1.type == value3.type && - value1.is_signed == value2.is_signed && value1.is_signed == value3.is_signed && - value1.Dimensions() == 1); +ImmValue::ImmValue(const ImmValue& value1, const ImmValue& value2, + const ImmValue& value3) noexcept { imm_values[0] = value1.imm_values[0]; imm_values[1] = value2.imm_values[0]; imm_values[2] = value3.imm_values[0]; } ImmValue::ImmValue(const ImmValue& value1, const ImmValue& value2, const ImmValue& value3, - const ImmValue& value4) noexcept - : type{value1.type}, is_signed{value1.is_signed} { - ASSERT(value1.type == value2.type && value1.type == value3.type && value1.type == value4.type && - value1.is_signed == value2.is_signed && value1.is_signed == value3.is_signed && - value1.is_signed == value4.is_signed && value1.Dimensions() == 1); + const ImmValue& value4) noexcept { imm_values[0] = value1.imm_values[0]; imm_values[1] = value2.imm_values[0]; imm_values[2] = value3.imm_values[0]; imm_values[3] = value4.imm_values[0]; } -IR::Type ImmValue::BaseType() const noexcept { - switch (type) { - case Type::U1: - return Type::U1; - case Type::U8: - return Type::U8; - case Type::U16: - return Type::U16; - case Type::U32: - case Type::U32x2: - case Type::U32x3: - case Type::U32x4: - return Type::U32; - case Type::U64: - return Type::U64; - case Type::F32: - case Type::F32x2: - case Type::F32x3: - case Type::F32x4: - return Type::F32; - case Type::F64: - case Type::F64x2: - case Type::F64x3: - case Type::F64x4: - return Type::F64; - default: - UNREACHABLE_MSG("Invalid type {}", type); - } -} - -u32 ImmValue::Dimensions() const noexcept { - switch (type) { - case Type::U1: - case Type::U8: - case Type::U16: - case Type::U32: - case Type::U64: - case Type::F32: - case Type::F64: - return 1; - case Type::U32x2: - case Type::F32x2: - case Type::F64x2: - return 2; - case Type::U32x3: - case Type::F32x3: - case Type::F64x3: - return 3; - case Type::U32x4: - case Type::F32x4: - case Type::F64x4: - return 4; - default: - UNREACHABLE_MSG("Invalid type {}", type); - } -} - -bool ImmValue::IsSigned() const noexcept { - return is_signed; -} - -void ImmValue::SetSigned(bool signed_) noexcept { - is_signed = signed_; -} - -void ImmValue::SameSignAs(const ImmValue& other) noexcept { - SetSigned(other.IsSigned()); -} - -ImmValue ImmValue::Convert(IR::Type new_type, bool new_signed) const noexcept { - switch (new_type) { - case Type::U16: { - switch (type) { - case Type::U32: - return ImmValue(static_cast(imm_values[0].imm_u32)); - default: - break; - } - break; - } - case Type::U32: { - if (new_signed) { - switch (type) { - case Type::F32: - return ImmValue(static_cast(imm_values[0].imm_f32)); - case Type::F64: - return ImmValue(static_cast(imm_values[0].imm_f64)); - default: - break; - } - } else { - switch (type) { - case Type::U16: - return ImmValue(static_cast(imm_values[0].imm_u16)); - case Type::U32: - if (is_signed) { - return ImmValue(static_cast(imm_values[0].imm_s32)); - } - break; - case Type::F32: - return ImmValue(static_cast(imm_values[0].imm_f32)); - default: - break; - } - } - } - case Type::F32: { - switch (type) { - case Type::U16: - return ImmValue(static_cast(imm_values[0].imm_u16)); - case Type::U32: - if (is_signed) { - return ImmValue(static_cast(imm_values[0].imm_s32)); - } else { - return ImmValue(static_cast(imm_values[0].imm_u32)); - } - case Type::F64: - return ImmValue(static_cast(imm_values[0].imm_f64)); - default: - break; - } - break; - } - case Type::F64: { - switch (type) { - case Type::F32: - return ImmValue(static_cast(imm_values[0].imm_f32)); - default: - break; - } - break; - } - default: - break; - } - UNREACHABLE_MSG("Invalid conversion from {} {} to {} {}", is_signed ? "signed" : "unsigned", - type, new_signed ? "signed" : "unsigned", new_type); -} - -ImmValue ImmValue::Bitcast(IR::Type new_type, bool new_signed) const noexcept { +ImmValue ImmValue::CompositeFrom2x2(const ImmValue& value1, const ImmValue& value2) noexcept { ImmValue result; - result.type = new_type; - result.is_signed = new_signed; - result.imm_values = imm_values; - ASSERT(Dimensions() == result.Dimensions()); - return result; -} - -ImmValue ImmValue::Extract(const ImmU32& index) const noexcept { - ASSERT(index.imm_values[0].imm_u32 < Dimensions()); - ImmValue result; - result.type = BaseType(); - result.is_signed = IsSigned(); - result.imm_values[0] = imm_values[index.imm_values[0].imm_u32]; - return result; -} - -ImmValue ImmValue::Insert(const ImmValue& value, const ImmU32& index) const noexcept { - ASSERT(index.imm_values[0].imm_u32 < Dimensions()); - ASSERT(value.type == BaseType() && value.IsSigned() == IsSigned()); - ImmValue result = *this; - result.imm_values[index.imm_values[0].imm_u32] = value.imm_values[0]; + result.imm_values[0] = value1.imm_values[0]; + result.imm_values[1] = value1.imm_values[1]; + result.imm_values[2] = value2.imm_values[0]; + result.imm_values[3] = value2.imm_values[1]; return result; } bool ImmValue::operator==(const ImmValue& other) const noexcept { - if (type != other.type) { - return false; - } - switch (type) { - case Type::U1: - return imm_values[0].imm_u1 == other.imm_values[0].imm_u1; - case Type::U8: - return imm_values[0].imm_u8 == other.imm_values[0].imm_u8; - case Type::U16: - return imm_values[0].imm_u16 == other.imm_values[0].imm_u16; - case Type::U32: - case Type::F32: - return imm_values[0].imm_u32 == other.imm_values[0].imm_u32; - case Type::U64: - case Type::F64: - return imm_values[0].imm_u64 == other.imm_values[0].imm_u64; - case Type::U32x2: - case Type::F32x2: - case Type::F64x2: - return imm_values[0].imm_u32 == other.imm_values[0].imm_u32 && - imm_values[1].imm_u32 == other.imm_values[1].imm_u32; - case Type::U32x3: - case Type::F32x3: - case Type::F64x3: - return imm_values[0].imm_u32 == other.imm_values[0].imm_u32 && - imm_values[1].imm_u32 == other.imm_values[1].imm_u32 && - imm_values[2].imm_u32 == other.imm_values[2].imm_u32; - case Type::U32x4: - case Type::F32x4: - case Type::F64x4: - return imm_values[0].imm_u32 == other.imm_values[0].imm_u32 && - imm_values[1].imm_u32 == other.imm_values[1].imm_u32 && - imm_values[2].imm_u32 == other.imm_values[2].imm_u32 && - imm_values[3].imm_u32 == other.imm_values[3].imm_u32; - default: - UNREACHABLE_MSG("Invalid type {}", type); - } + return imm_values[0].imm_u64 == other.imm_values[0].imm_u64 && + imm_values[1].imm_u64 == other.imm_values[1].imm_u64 && + imm_values[2].imm_u64 == other.imm_values[2].imm_u64 && + imm_values[3].imm_u64 == other.imm_values[3].imm_u64; } bool ImmValue::operator!=(const ImmValue& other) const noexcept { return !operator==(other); } -bool ImmValue::operator<(const ImmValue& other) const noexcept { - ASSERT(type == other.type); - switch (type) { - case Type::U8: - return is_signed && other.is_signed ? imm_values[0].imm_s8 < other.imm_values[0].imm_s8 - : imm_values[0].imm_u8 < other.imm_values[0].imm_u8; - case Type::U16: - return is_signed && other.is_signed ? imm_values[0].imm_s16 < other.imm_values[0].imm_s16 - : imm_values[0].imm_u16 < other.imm_values[0].imm_u16; - case Type::U32: - return is_signed && other.is_signed ? imm_values[0].imm_s32 < other.imm_values[0].imm_s32 - : imm_values[0].imm_u32 < other.imm_values[0].imm_u32; - case Type::F32: - return imm_values[0].imm_f32 < other.imm_values[0].imm_f32; - case Type::U64: - return is_signed && other.is_signed ? imm_values[0].imm_s64 < other.imm_values[0].imm_s64 - : imm_values[0].imm_u64 < other.imm_values[0].imm_u64; - case Type::F64: - return imm_values[0].imm_f64 < other.imm_values[0].imm_f64; - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +ImmValue ImmValue::Extract(const ImmValue& vec, const ImmValue& index) noexcept { + ImmValue result; + result.imm_values[0] = vec.imm_values[index.imm_values[0].imm_u32]; + return result; } -bool ImmValue::operator>(const ImmValue& other) const noexcept { - ASSERT(type == other.type); - switch (type) { - case Type::U8: - return is_signed && other.is_signed ? imm_values[0].imm_s8 > other.imm_values[0].imm_s8 - : imm_values[0].imm_u8 > other.imm_values[0].imm_u8; - case Type::U16: - return is_signed && other.is_signed ? imm_values[0].imm_s16 > other.imm_values[0].imm_s16 - : imm_values[0].imm_u16 > other.imm_values[0].imm_u16; - case Type::U32: - return is_signed && other.is_signed ? imm_values[0].imm_s32 > other.imm_values[0].imm_s32 - : imm_values[0].imm_u32 > other.imm_values[0].imm_u32; - case Type::F32: - return imm_values[0].imm_f32 > other.imm_values[0].imm_f32; - case Type::U64: - return is_signed && other.is_signed ? imm_values[0].imm_s64 > other.imm_values[0].imm_s64 - : imm_values[0].imm_u64 > other.imm_values[0].imm_u64; - case Type::F64: - return imm_values[0].imm_f64 > other.imm_values[0].imm_f64; - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +ImmValue ImmValue::Insert(const ImmValue& vec, const ImmValue& value, + const ImmValue& index) noexcept { + ImmValue result = vec; + result.imm_values[index.imm_values[0].imm_u32] = value.imm_values[0]; + return result; } -bool ImmValue::operator<=(const ImmValue& other) const noexcept { - return !operator>(other); +template <> +ImmValue ImmValue::Convert(const ImmValue& in) noexcept { + return ImmValue(static_cast(in.imm_values[0].imm_u32)); } -bool ImmValue::operator>=(const ImmValue& other) const noexcept { - return !operator<(other); +template <> +ImmValue ImmValue::Convert(const ImmValue& in) noexcept { + return ImmValue(static_cast(in.imm_values[0].imm_u16)); } -ImmValue ImmValue::operator+(const ImmValue& other) const noexcept { - ASSERT(type == other.type); - switch (type) { - case Type::U8: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s8 + other.imm_values[0].imm_s8) - : ImmValue(imm_values[0].imm_u8 + other.imm_values[0].imm_u8); - case Type::U16: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s16 + other.imm_values[0].imm_s16) - : ImmValue(imm_values[0].imm_u16 + other.imm_values[0].imm_u16); - case Type::U32: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s32 + other.imm_values[0].imm_s32) - : ImmValue(imm_values[0].imm_u32 + other.imm_values[0].imm_u32); - case Type::F32: - return ImmValue(imm_values[0].imm_f32 + other.imm_values[0].imm_f32); - case Type::U32x2: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s32 + other.imm_values[0].imm_s32, - imm_values[1].imm_s32 + other.imm_values[1].imm_s32) - : ImmValue(imm_values[0].imm_u32 + other.imm_values[0].imm_u32, - imm_values[1].imm_u32 + other.imm_values[1].imm_u32); - case Type::F32x2: - return ImmValue(imm_values[0].imm_f32 + other.imm_values[0].imm_f32, - imm_values[1].imm_f32 + other.imm_values[1].imm_f32); - case Type::U32x3: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s32 + other.imm_values[0].imm_s32, - imm_values[1].imm_s32 + other.imm_values[1].imm_s32, - imm_values[2].imm_s32 + other.imm_values[2].imm_s32) - : ImmValue(imm_values[0].imm_u32 + other.imm_values[0].imm_u32, - imm_values[1].imm_u32 + other.imm_values[1].imm_u32, - imm_values[2].imm_u32 + other.imm_values[2].imm_u32); - case Type::F32x3: - return ImmValue(imm_values[0].imm_f32 + other.imm_values[0].imm_f32, - imm_values[1].imm_f32 + other.imm_values[1].imm_f32, - imm_values[2].imm_f32 + other.imm_values[2].imm_f32); - case Type::U32x4: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s32 + other.imm_values[0].imm_s32, - imm_values[1].imm_s32 + other.imm_values[1].imm_s32, - imm_values[2].imm_s32 + other.imm_values[2].imm_s32, - imm_values[3].imm_s32 + other.imm_values[3].imm_s32) - : ImmValue(imm_values[0].imm_u32 + other.imm_values[0].imm_u32, - imm_values[1].imm_u32 + other.imm_values[1].imm_u32, - imm_values[2].imm_u32 + other.imm_values[2].imm_u32, - imm_values[3].imm_u32 + other.imm_values[3].imm_u32); - case Type::F32x4: - return ImmValue(imm_values[0].imm_f32 + other.imm_values[0].imm_f32, - imm_values[1].imm_f32 + other.imm_values[1].imm_f32, - imm_values[2].imm_f32 + other.imm_values[2].imm_f32, - imm_values[3].imm_f32 + other.imm_values[3].imm_f32); - case Type::U64: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s64 + other.imm_values[0].imm_s64) - : ImmValue(imm_values[0].imm_u64 + other.imm_values[0].imm_u64); - case Type::F64: - return ImmValue(imm_values[0].imm_f64 + other.imm_values[0].imm_f64); - case Type::F64x2: - return ImmValue(imm_values[0].imm_f64 + other.imm_values[0].imm_f64, - imm_values[1].imm_f64 + other.imm_values[1].imm_f64); - case Type::F64x3: - return ImmValue(imm_values[0].imm_f64 + other.imm_values[0].imm_f64, - imm_values[1].imm_f64 + other.imm_values[1].imm_f64, - imm_values[2].imm_f64 + other.imm_values[2].imm_f64); - case Type::F64x4: - return ImmValue(imm_values[0].imm_f64 + other.imm_values[0].imm_f64, - imm_values[1].imm_f64 + other.imm_values[1].imm_f64, - imm_values[2].imm_f64 + other.imm_values[2].imm_f64, - imm_values[3].imm_f64 + other.imm_values[3].imm_f64); - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +template <> +ImmValue ImmValue::Convert(const ImmValue& in) noexcept { + return ImmValue(static_cast(in.imm_values[0].imm_s32)); } -ImmValue ImmValue::operator-(const ImmValue& other) const noexcept { - ASSERT(type == other.type); - switch (type) { - case Type::U8: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s8 - other.imm_values[0].imm_s8) - : ImmValue(imm_values[0].imm_u8 - other.imm_values[0].imm_u8); - case Type::U16: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s16 - other.imm_values[0].imm_s16) - : ImmValue(imm_values[0].imm_u16 - other.imm_values[0].imm_u16); - case Type::U32: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s32 - other.imm_values[0].imm_s32) - : ImmValue(imm_values[0].imm_u32 - other.imm_values[0].imm_u32); - case Type::F32: - return ImmValue(imm_values[0].imm_f32 - other.imm_values[0].imm_f32); - case Type::U32x2: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s32 - other.imm_values[0].imm_s32, - imm_values[1].imm_s32 - other.imm_values[1].imm_s32) - : ImmValue(imm_values[0].imm_u32 - other.imm_values[0].imm_u32, - imm_values[1].imm_u32 - other.imm_values[1].imm_u32); - case Type::F32x2: - return ImmValue(imm_values[0].imm_f32 - other.imm_values[0].imm_f32, - imm_values[1].imm_f32 - other.imm_values[1].imm_f32); - case Type::U32x3: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s32 - other.imm_values[0].imm_s32, - imm_values[1].imm_s32 - other.imm_values[1].imm_s32, - imm_values[2].imm_s32 - other.imm_values[2].imm_s32) - : ImmValue(imm_values[0].imm_u32 - other.imm_values[0].imm_u32, - imm_values[1].imm_u32 - other.imm_values[1].imm_u32, - imm_values[2].imm_u32 - other.imm_values[2].imm_u32); - case Type::F32x3: - return ImmValue(imm_values[0].imm_f32 - other.imm_values[0].imm_f32, - imm_values[1].imm_f32 - other.imm_values[1].imm_f32, - imm_values[2].imm_f32 - other.imm_values[2].imm_f32); - case Type::U32x4: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s32 - other.imm_values[0].imm_s32, - imm_values[1].imm_s32 - other.imm_values[1].imm_s32, - imm_values[2].imm_s32 - other.imm_values[2].imm_s32, - imm_values[3].imm_s32 - other.imm_values[3].imm_s32) - : ImmValue(imm_values[0].imm_u32 - other.imm_values[0].imm_u32, - imm_values[1].imm_u32 - other.imm_values[1].imm_u32, - imm_values[2].imm_u32 - other.imm_values[2].imm_u32, - imm_values[3].imm_u32 - other.imm_values[3].imm_u32); - case Type::F32x4: - return ImmValue(imm_values[0].imm_f32 - other.imm_values[0].imm_f32, - imm_values[1].imm_f32 - other.imm_values[1].imm_f32, - imm_values[2].imm_f32 - other.imm_values[2].imm_f32, - imm_values[3].imm_f32 - other.imm_values[3].imm_f32); - case Type::U64: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s64 - other.imm_values[0].imm_s64) - : ImmValue(imm_values[0].imm_u64 - other.imm_values[0].imm_u64); - case Type::F64: - return ImmValue(imm_values[0].imm_f64 - other.imm_values[0].imm_f64); - case Type::F64x2: - return ImmValue(imm_values[0].imm_f64 - other.imm_values[0].imm_f64, - imm_values[1].imm_f64 - other.imm_values[1].imm_f64); - case Type::F64x3: - return ImmValue(imm_values[0].imm_f64 - other.imm_values[0].imm_f64, - imm_values[1].imm_f64 - other.imm_values[1].imm_f64, - imm_values[2].imm_f64 - other.imm_values[2].imm_f64); - case Type::F64x4: - return ImmValue(imm_values[0].imm_f64 - other.imm_values[0].imm_f64, - imm_values[1].imm_f64 - other.imm_values[1].imm_f64, - imm_values[2].imm_f64 - other.imm_values[2].imm_f64, - imm_values[3].imm_f64 - other.imm_values[3].imm_f64); - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +template <> +ImmValue ImmValue::Convert(const ImmValue& in) noexcept { + return ImmValue(static_cast(in.imm_values[0].imm_f32)); } -ImmValue ImmValue::operator*(const ImmValue& other) const noexcept { - ASSERT(BaseType() == other.BaseType()); - const ImmValue* vector; - const ImmValue* scalar; - if (Dimensions() == 1) { - scalar = this; - vector = &other; - } else if (other.Dimensions() == 1) { - scalar = &other; - vector = this; - } else { - UNREACHABLE_MSG("Unspecified behavior for vector * vector multiplication"); - } - switch (vector->type) { - case Type::U8: - return is_signed && scalar->is_signed - ? ImmValue(scalar->imm_values[0].imm_s8 * vector->imm_values[0].imm_s8) - : ImmValue(scalar->imm_values[0].imm_u8 * vector->imm_values[0].imm_u8); - case Type::U16: - return is_signed && scalar->is_signed - ? ImmValue(scalar->imm_values[0].imm_s16 * vector->imm_values[0].imm_s16) - : ImmValue(scalar->imm_values[0].imm_u16 * vector->imm_values[0].imm_u16); - case Type::U32: - return is_signed && scalar->is_signed - ? ImmValue(scalar->imm_values[0].imm_s32 * vector->imm_values[0].imm_s32) - : ImmValue(scalar->imm_values[0].imm_u32 * vector->imm_values[0].imm_u32); - case Type::F32: - return ImmValue(scalar->imm_values[0].imm_f32 * vector->imm_values[0].imm_f32); - case Type::U32x2: - return is_signed && scalar->is_signed - ? ImmValue(scalar->imm_values[0].imm_s32 * vector->imm_values[0].imm_s32, - scalar->imm_values[0].imm_s32 * vector->imm_values[1].imm_s32) - : ImmValue(scalar->imm_values[0].imm_u32 * vector->imm_values[0].imm_u32, - scalar->imm_values[0].imm_u32 * vector->imm_values[1].imm_u32); - case Type::F32x2: - return ImmValue(scalar->imm_values[0].imm_f32 * vector->imm_values[0].imm_f32, - scalar->imm_values[0].imm_f32 * vector->imm_values[1].imm_f32); - case Type::U32x3: - return is_signed && scalar->is_signed - ? ImmValue(scalar->imm_values[0].imm_s32 * vector->imm_values[0].imm_s32, - scalar->imm_values[0].imm_s32 * vector->imm_values[1].imm_s32, - scalar->imm_values[0].imm_s32 * vector->imm_values[2].imm_s32) - : ImmValue(scalar->imm_values[0].imm_u32 * vector->imm_values[0].imm_u32, - scalar->imm_values[0].imm_u32 * vector->imm_values[1].imm_u32, - scalar->imm_values[0].imm_u32 * vector->imm_values[2].imm_u32); - case Type::F32x3: - return ImmValue(scalar->imm_values[0].imm_f32 * vector->imm_values[0].imm_f32, - scalar->imm_values[0].imm_f32 * vector->imm_values[1].imm_f32, - scalar->imm_values[0].imm_f32 * vector->imm_values[2].imm_f32); - case Type::U32x4: - return is_signed && scalar->is_signed - ? ImmValue(scalar->imm_values[0].imm_s32 * vector->imm_values[0].imm_s32, - scalar->imm_values[0].imm_s32 * vector->imm_values[1].imm_s32, - scalar->imm_values[0].imm_s32 * vector->imm_values[2].imm_s32, - scalar->imm_values[0].imm_s32 * vector->imm_values[3].imm_s32) - : ImmValue(scalar->imm_values[0].imm_u32 * vector->imm_values[0].imm_u32, - scalar->imm_values[0].imm_u32 * vector->imm_values[1].imm_u32, - scalar->imm_values[0].imm_u32 * vector->imm_values[2].imm_u32, - scalar->imm_values[0].imm_u32 * vector->imm_values[3].imm_u32); - case Type::F32x4: - return ImmValue(scalar->imm_values[0].imm_f32 * vector->imm_values[0].imm_f32, - scalar->imm_values[0].imm_f32 * vector->imm_values[1].imm_f32, - scalar->imm_values[0].imm_f32 * vector->imm_values[2].imm_f32, - scalar->imm_values[0].imm_f32 * vector->imm_values[3].imm_f32); - case Type::U64: - return is_signed && scalar->is_signed - ? ImmValue(scalar->imm_values[0].imm_s64 * vector->imm_values[0].imm_s64) - : ImmValue(scalar->imm_values[0].imm_u64 * vector->imm_values[0].imm_u64); - case Type::F64: - return ImmValue(scalar->imm_values[0].imm_f64 * vector->imm_values[0].imm_f64); - case Type::F64x2: - return ImmValue(scalar->imm_values[0].imm_f64 * vector->imm_values[0].imm_f64, - scalar->imm_values[0].imm_f64 * vector->imm_values[1].imm_f64); - case Type::F64x3: - return ImmValue(scalar->imm_values[0].imm_f64 * vector->imm_values[0].imm_f64, - scalar->imm_values[0].imm_f64 * vector->imm_values[1].imm_f64, - scalar->imm_values[0].imm_f64 * vector->imm_values[2].imm_f64); - case Type::F64x4: - return ImmValue(scalar->imm_values[0].imm_f64 * vector->imm_values[0].imm_f64, - scalar->imm_values[0].imm_f64 * vector->imm_values[1].imm_f64, - scalar->imm_values[0].imm_f64 * vector->imm_values[2].imm_f64, - scalar->imm_values[0].imm_f64 * vector->imm_values[3].imm_f64); - default: - UNREACHABLE_MSG("Invalid type {}", vector->type); - } +template <> +ImmValue ImmValue::Convert(const ImmValue& in) noexcept { + return ImmValue(static_cast(in.imm_values[0].imm_f32)); } -ImmValue ImmValue::operator/(const ImmValue& other) const { - ASSERT(BaseType() == other.BaseType() && other.Dimensions() == 1); - switch (type) { - case Type::U8: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s8 / other.imm_values[0].imm_s8) - : ImmValue(imm_values[0].imm_u8 / other.imm_values[0].imm_u8); - case Type::U16: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s16 / other.imm_values[0].imm_s16) - : ImmValue(imm_values[0].imm_u16 / other.imm_values[0].imm_u16); - case Type::U32: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s32 / other.imm_values[0].imm_s32) - : ImmValue(imm_values[0].imm_u32 / other.imm_values[0].imm_u32); - case Type::F32: - return ImmValue(imm_values[0].imm_f32 / other.imm_values[0].imm_f32); - case Type::U32x2: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s32 / other.imm_values[0].imm_s32, - imm_values[1].imm_s32 / other.imm_values[0].imm_s32) - : ImmValue(imm_values[0].imm_u32 / other.imm_values[0].imm_u32, - imm_values[1].imm_u32 / other.imm_values[0].imm_u32); - case Type::F32x2: - return ImmValue(imm_values[0].imm_f32 / other.imm_values[0].imm_f32, - imm_values[1].imm_f32 / other.imm_values[0].imm_f32); - case Type::U32x3: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s32 / other.imm_values[0].imm_s32, - imm_values[1].imm_s32 / other.imm_values[0].imm_s32, - imm_values[2].imm_s32 / other.imm_values[0].imm_s32) - : ImmValue(imm_values[0].imm_u32 / other.imm_values[0].imm_u32, - imm_values[1].imm_u32 / other.imm_values[0].imm_u32, - imm_values[2].imm_u32 / other.imm_values[0].imm_u32); - case Type::F32x3: - return ImmValue(imm_values[0].imm_f32 / other.imm_values[0].imm_f32, - imm_values[1].imm_f32 / other.imm_values[0].imm_f32, - imm_values[2].imm_f32 / other.imm_values[0].imm_f32); - case Type::U32x4: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s32 / other.imm_values[0].imm_s32, - imm_values[1].imm_s32 / other.imm_values[0].imm_s32, - imm_values[2].imm_s32 / other.imm_values[0].imm_s32, - imm_values[3].imm_s32 / other.imm_values[0].imm_s32) - : ImmValue(imm_values[0].imm_u32 / other.imm_values[0].imm_u32, - imm_values[1].imm_u32 / other.imm_values[0].imm_u32, - imm_values[2].imm_u32 / other.imm_values[0].imm_u32, - imm_values[3].imm_u32 / other.imm_values[0].imm_u32); - case Type::F32x4: - return ImmValue(imm_values[0].imm_f32 / other.imm_values[0].imm_f32, - imm_values[1].imm_f32 / other.imm_values[0].imm_f32, - imm_values[2].imm_f32 / other.imm_values[0].imm_f32, - imm_values[3].imm_f32 / other.imm_values[0].imm_f32); - case Type::U64: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s64 / other.imm_values[0].imm_s64) - : ImmValue(imm_values[0].imm_u64 / other.imm_values[0].imm_u64); - case Type::F64: - return ImmValue(imm_values[0].imm_f64 / other.imm_values[0].imm_f64); - case Type::F64x2: - return ImmValue(imm_values[0].imm_f64 / other.imm_values[0].imm_f64, - imm_values[1].imm_f64 / other.imm_values[0].imm_f64); - case Type::F64x3: - return ImmValue(imm_values[0].imm_f64 / other.imm_values[0].imm_f64, - imm_values[1].imm_f64 / other.imm_values[0].imm_f64, - imm_values[2].imm_f64 / other.imm_values[0].imm_f64); - case Type::F64x4: - return ImmValue(imm_values[0].imm_f64 / other.imm_values[0].imm_f64, - imm_values[1].imm_f64 / other.imm_values[0].imm_f64, - imm_values[2].imm_f64 / other.imm_values[0].imm_f64, - imm_values[3].imm_f64 / other.imm_values[0].imm_f64); - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +template <> +ImmValue ImmValue::Convert(const ImmValue& in) noexcept { + return ImmValue(static_cast(in.imm_values[0].imm_u16)); } -ImmValue ImmValue::operator%(const ImmValue& other) const noexcept { - ASSERT(type == other.type); - switch (type) { - case Type::U8: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s8 % other.imm_values[0].imm_s8) - : ImmValue(imm_values[0].imm_u8 % other.imm_values[0].imm_u8); - case Type::U16: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s16 % other.imm_values[0].imm_s16) - : ImmValue(imm_values[0].imm_u16 % other.imm_values[0].imm_u16); - case Type::U32: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s32 % other.imm_values[0].imm_s32) - : ImmValue(imm_values[0].imm_u32 % other.imm_values[0].imm_u32); - case Type::U64: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s64 % other.imm_values[0].imm_s64) - : ImmValue(imm_values[0].imm_u64 % other.imm_values[0].imm_u64); - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +template <> +ImmValue ImmValue::Convert(const ImmValue& in) noexcept { + return ImmValue(static_cast(in.imm_values[0].imm_u16)); } -ImmValue ImmValue::operator&(const ImmValue& other) const noexcept { - ASSERT(type == other.type); - switch (type) { - case Type::U1: - return ImmValue(imm_values[0].imm_u1 & other.imm_values[0].imm_u1); - case Type::U8: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s8 & other.imm_values[0].imm_s8) - : ImmValue(imm_values[0].imm_u8 & other.imm_values[0].imm_u8); - case Type::U16: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s16 & other.imm_values[0].imm_s16) - : ImmValue(imm_values[0].imm_u16 & other.imm_values[0].imm_u16); - case Type::U32: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s32 & other.imm_values[0].imm_s32) - : ImmValue(imm_values[0].imm_u32 & other.imm_values[0].imm_u32); - case Type::U64: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s64 & other.imm_values[0].imm_s64) - : ImmValue(imm_values[0].imm_u64 & other.imm_values[0].imm_u64); - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +template <> +ImmValue ImmValue::Convert(const ImmValue& in) noexcept { + return ImmValue(static_cast(in.imm_values[0].imm_u32)); } -ImmValue ImmValue::operator|(const ImmValue& other) const noexcept { - ASSERT(type == other.type); - switch (type) { - case Type::U1: - return ImmValue(imm_values[0].imm_u1 | other.imm_values[0].imm_u1); - case Type::U8: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s8 | other.imm_values[0].imm_s8) - : ImmValue(imm_values[0].imm_u8 | other.imm_values[0].imm_u8); - case Type::U16: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s16 | other.imm_values[0].imm_s16) - : ImmValue(imm_values[0].imm_u16 | other.imm_values[0].imm_u16); - case Type::U32: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s32 | other.imm_values[0].imm_s32) - : ImmValue(imm_values[0].imm_u32 | other.imm_values[0].imm_u32); - case Type::U64: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s64 | other.imm_values[0].imm_s64) - : ImmValue(imm_values[0].imm_u64 | other.imm_values[0].imm_u64); - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +template <> +ImmValue ImmValue::Convert(const ImmValue& in) noexcept { + return ImmValue(static_cast(in.imm_values[0].imm_u32)); } -ImmValue ImmValue::operator^(const ImmValue& other) const noexcept { - ASSERT(type == other.type); - switch (type) { - case Type::U1: - return ImmValue(imm_values[0].imm_u1 ^ other.imm_values[0].imm_u1); - case Type::U8: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s8 ^ other.imm_values[0].imm_s8) - : ImmValue(imm_values[0].imm_u8 ^ other.imm_values[0].imm_u8); - case Type::U16: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s16 ^ other.imm_values[0].imm_s16) - : ImmValue(imm_values[0].imm_u16 ^ other.imm_values[0].imm_u16); - case Type::U32: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s32 ^ other.imm_values[0].imm_s32) - : ImmValue(imm_values[0].imm_u32 ^ other.imm_values[0].imm_u32); - case Type::U64: - return is_signed && other.is_signed - ? ImmValue(imm_values[0].imm_s64 ^ other.imm_values[0].imm_s64) - : ImmValue(imm_values[0].imm_u64 ^ other.imm_values[0].imm_u64); - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +template <> +ImmValue ImmValue::Convert(const ImmValue& in) noexcept { + return ImmValue(static_cast(in.imm_values[0].imm_f64)); } -ImmValue ImmValue::operator<<(const ImmU32& other) const noexcept { - switch (type) { - case Type::U1: - return ImmValue(imm_values[0].imm_u1 << other.imm_values[0].imm_u1); - case Type::U8: - return is_signed ? ImmValue(imm_values[0].imm_s8 << other.imm_values[0].imm_s8) - : ImmValue(imm_values[0].imm_u8 << other.imm_values[0].imm_u8); - case Type::U16: - return is_signed ? ImmValue(imm_values[0].imm_s16 << other.imm_values[0].imm_s16) - : ImmValue(imm_values[0].imm_u16 << other.imm_values[0].imm_u16); - case Type::U32: - return is_signed ? ImmValue(imm_values[0].imm_s32 << other.imm_values[0].imm_s32) - : ImmValue(imm_values[0].imm_u32 << other.imm_values[0].imm_u32); - case Type::U64: - return is_signed ? ImmValue(imm_values[0].imm_s64 << other.imm_values[0].imm_s64) - : ImmValue(imm_values[0].imm_u64 << other.imm_values[0].imm_u64); - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +template <> +ImmValue ImmValue::Convert(const ImmValue& in) noexcept { + return ImmValue(static_cast(in.imm_values[0].imm_u32)); } -ImmValue ImmValue::operator>>(const ImmU32& other) const noexcept { - switch (type) { - case Type::U1: - return ImmValue(imm_values[0].imm_u1 >> other.imm_values[0].imm_u1); - case Type::U8: - return is_signed ? ImmValue(imm_values[0].imm_s8 >> other.imm_values[0].imm_s8) - : ImmValue(imm_values[0].imm_u8 >> other.imm_values[0].imm_u8); - case Type::U16: - return is_signed ? ImmValue(imm_values[0].imm_s16 >> other.imm_values[0].imm_s16) - : ImmValue(imm_values[0].imm_u16 >> other.imm_values[0].imm_u16); - case Type::U32: - return is_signed ? ImmValue(imm_values[0].imm_s32 >> other.imm_values[0].imm_s32) - : ImmValue(imm_values[0].imm_u32 >> other.imm_values[0].imm_u32); - case Type::U64: - return is_signed ? ImmValue(imm_values[0].imm_s64 >> other.imm_values[0].imm_s64) - : ImmValue(imm_values[0].imm_u64 >> other.imm_values[0].imm_u64); - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +template <> +ImmValue ImmValue::Convert(const ImmValue& in) noexcept { + return ImmValue(static_cast(in.imm_values[0].imm_s32)); } -ImmValue ImmValue::operator~() const noexcept { - switch (type) { - case Type::U1: - return ImmValue(~imm_values[0].imm_u1); - case Type::U8: - return is_signed ? ImmValue(imm_values[0].imm_s8) : ImmValue(imm_values[0].imm_u8); - case Type::U16: - return is_signed ? ImmValue(imm_values[0].imm_s16) : ImmValue(imm_values[0].imm_u16); - case Type::U32: - return is_signed ? ImmValue(imm_values[0].imm_s32) : ImmValue(imm_values[0].imm_u32); - case Type::U64: - return is_signed ? ImmValue(imm_values[0].imm_s64) : ImmValue(imm_values[0].imm_u64); - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +template <> +ImmValue ImmValue::Convert(const ImmValue& in) noexcept { + return ImmValue(static_cast(in.imm_values[0].imm_f32)); } -ImmValue ImmValue::operator++(int) noexcept { - switch (type) { - case Type::U8: - return is_signed ? ImmValue(imm_values[0].imm_s8++) : ImmValue(imm_values[0].imm_u8++); - case Type::U16: - return is_signed ? ImmValue(imm_values[0].imm_s16++) : ImmValue(imm_values[0].imm_u16++); - case Type::U32: - return is_signed ? ImmValue(imm_values[0].imm_s32++) : ImmValue(imm_values[0].imm_u32++); - case Type::U64: - return is_signed ? ImmValue(imm_values[0].imm_s64++) : ImmValue(imm_values[0].imm_u64++); - case Type::F32: - return ImmValue(imm_values[0].imm_f32++); - case Type::F64: - return ImmValue(imm_values[0].imm_f64++); - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +template <> +ImmValue ImmValue::Add(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u8 + b.imm_values[0].imm_u8, + a.imm_values[1].imm_u8 + b.imm_values[1].imm_u8, + a.imm_values[2].imm_u8 + b.imm_values[2].imm_u8, + a.imm_values[3].imm_u8 + b.imm_values[3].imm_u8); } -ImmValue ImmValue::operator--(int) noexcept { - switch (type) { - case Type::U8: - return is_signed ? ImmValue(imm_values[0].imm_s8--) : ImmValue(imm_values[0].imm_u8--); - case Type::U16: - return is_signed ? ImmValue(imm_values[0].imm_s16--) : ImmValue(imm_values[0].imm_u16--); - case Type::U32: - return is_signed ? ImmValue(imm_values[0].imm_s32--) : ImmValue(imm_values[0].imm_u32--); - case Type::U64: - return is_signed ? ImmValue(imm_values[0].imm_s64--) : ImmValue(imm_values[0].imm_u64--); - case Type::F32: - return ImmValue(imm_values[0].imm_f32--); - case Type::F64: - return ImmValue(imm_values[0].imm_f64--); - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +template <> +ImmValue ImmValue::Add(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_s8 + b.imm_values[0].imm_s8, + a.imm_values[1].imm_s8 + b.imm_values[1].imm_s8, + a.imm_values[2].imm_s8 + b.imm_values[2].imm_s8, + a.imm_values[3].imm_s8 + b.imm_values[3].imm_s8); } -ImmValue& ImmValue::operator++() noexcept { - switch (type) { - case Type::U8: - if (is_signed) { - imm_values[0].imm_s8++; - } else { - imm_values[0].imm_u8++; - } - break; - case Type::U16: - if (is_signed) { - imm_values[0].imm_s16++; - } else { - imm_values[0].imm_u16++; - } - break; - case Type::U32: - if (is_signed) { - imm_values[0].imm_s32++; - } else { - imm_values[0].imm_u32++; - } - break; - case Type::U64: - if (is_signed) { - imm_values[0].imm_s64++; - } else { - imm_values[0].imm_u64++; - } - break; - case Type::F32: - imm_values[0].imm_f32++; - break; - case Type::F64: - imm_values[0].imm_f64++; - break; - default: - UNREACHABLE_MSG("Invalid type {}", type); - } - return *this; +template <> +ImmValue ImmValue::Add(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u16 + b.imm_values[0].imm_u16, + a.imm_values[1].imm_u16 + b.imm_values[1].imm_u16, + a.imm_values[2].imm_u16 + b.imm_values[2].imm_u16, + a.imm_values[3].imm_u16 + b.imm_values[3].imm_u16); } -ImmValue& ImmValue::operator--() noexcept { - switch (type) { - case Type::U8: - if (is_signed) { - imm_values[0].imm_s8--; - } else { - imm_values[0].imm_u8--; - } - break; - case Type::U16: - if (is_signed) { - imm_values[0].imm_s16--; - } else { - imm_values[0].imm_u16--; - } - break; - case Type::U32: - if (is_signed) { - imm_values[0].imm_s32--; - } else { - imm_values[0].imm_u32--; - } - break; - case Type::U64: - if (is_signed) { - imm_values[0].imm_s64--; - } else { - imm_values[0].imm_u64--; - } - break; - case Type::F32: - imm_values[0].imm_f32--; - break; - case Type::F64: - imm_values[0].imm_f64--; - break; - default: - UNREACHABLE_MSG("Invalid type {}", type); - } - return *this; +template <> +ImmValue ImmValue::Add(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_s16 + b.imm_values[0].imm_s16, + a.imm_values[1].imm_s16 + b.imm_values[1].imm_s16, + a.imm_values[2].imm_s16 + b.imm_values[2].imm_s16, + a.imm_values[3].imm_s16 + b.imm_values[3].imm_s16); } -ImmValue ImmValue::operator-() const noexcept { - switch (type) { - case Type::U8: - return is_signed ? ImmValue(-imm_values[0].imm_s8) : ImmValue(-imm_values[0].imm_u8); - case Type::U16: - return is_signed ? ImmValue(-imm_values[0].imm_s16) : ImmValue(-imm_values[0].imm_u16); - case Type::U32: - return is_signed ? ImmValue(-imm_values[0].imm_s32) : ImmValue(-imm_values[0].imm_u32); - case Type::U32x2: - return is_signed ? ImmValue(-imm_values[0].imm_s32, -imm_values[1].imm_s32) - : ImmValue(-imm_values[0].imm_u32, -imm_values[1].imm_u32); - case Type::U32x3: - return is_signed ? ImmValue(-imm_values[0].imm_s32, -imm_values[1].imm_s32, - -imm_values[2].imm_s32) - : ImmValue(-imm_values[0].imm_u32, -imm_values[1].imm_u32, - -imm_values[2].imm_u32); - case Type::U32x4: - return is_signed ? ImmValue(-imm_values[0].imm_s32, -imm_values[1].imm_s32, - -imm_values[2].imm_s32, -imm_values[3].imm_s32) - : ImmValue(-imm_values[0].imm_u32, -imm_values[1].imm_u32, - -imm_values[2].imm_u32, -imm_values[3].imm_u32); - case Type::U64: - return is_signed ? ImmValue(-imm_values[0].imm_s64) : ImmValue(-imm_values[0].imm_u64); - case Type::F32: - return ImmValue(-imm_values[0].imm_f32); - case Type::F32x2: - return ImmValue(-imm_values[0].imm_f32, -imm_values[1].imm_f32); - case Type::F32x3: - return ImmValue(-imm_values[0].imm_f32, -imm_values[1].imm_f32, -imm_values[2].imm_f32); - case Type::F32x4: - return ImmValue(-imm_values[0].imm_f32, -imm_values[1].imm_f32, -imm_values[2].imm_f32, - -imm_values[3].imm_f32); - case Type::F64: - return ImmValue(-imm_values[0].imm_f64); - case Type::F64x2: - return ImmValue(-imm_values[0].imm_f64, -imm_values[1].imm_f64); - case Type::F64x3: - return ImmValue(-imm_values[0].imm_f64, -imm_values[1].imm_f64, -imm_values[2].imm_f64); - case Type::F64x4: - return ImmValue(-imm_values[0].imm_f64, -imm_values[1].imm_f64, -imm_values[2].imm_f64, - -imm_values[3].imm_f64); - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +template <> +ImmValue ImmValue::Add(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u32 + b.imm_values[0].imm_u32, + a.imm_values[1].imm_u32 + b.imm_values[1].imm_u32, + a.imm_values[2].imm_u32 + b.imm_values[2].imm_u32, + a.imm_values[3].imm_u32 + b.imm_values[3].imm_u32); } -ImmValue ImmValue::operator+() const noexcept { - return *this; +template <> +ImmValue ImmValue::Add(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_s32 + b.imm_values[0].imm_s32, + a.imm_values[1].imm_s32 + b.imm_values[1].imm_s32, + a.imm_values[2].imm_s32 + b.imm_values[2].imm_s32, + a.imm_values[3].imm_s32 + b.imm_values[3].imm_s32); } -// this is not the best way - -ImmValue& ImmValue::operator+=(const ImmValue& other) noexcept { - ImmValue result = *this + other; - *this = result; - return *this; +template <> +ImmValue ImmValue::Add(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_f32 + b.imm_values[0].imm_f32, + a.imm_values[1].imm_f32 + b.imm_values[1].imm_f32, + a.imm_values[2].imm_f32 + b.imm_values[2].imm_f32, + a.imm_values[3].imm_f32 + b.imm_values[3].imm_f32); } -ImmValue& ImmValue::operator-=(const ImmValue& other) noexcept { - ImmValue result = *this - other; - *this = result; - return *this; +template <> +ImmValue ImmValue::Add(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u64 + b.imm_values[0].imm_u64, + a.imm_values[1].imm_u64 + b.imm_values[1].imm_u64, + a.imm_values[2].imm_u64 + b.imm_values[2].imm_u64, + a.imm_values[3].imm_u64 + b.imm_values[3].imm_u64); } -ImmValue& ImmValue::operator*=(const ImmValue& other) noexcept { - ImmValue result = *this * other; - *this = result; - return *this; +template <> +ImmValue ImmValue::Add(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_s64 + b.imm_values[0].imm_s64, + a.imm_values[1].imm_s64 + b.imm_values[1].imm_s64, + a.imm_values[2].imm_s64 + b.imm_values[2].imm_s64, + a.imm_values[3].imm_s64 + b.imm_values[3].imm_s64); } -ImmValue& ImmValue::operator/=(const ImmValue& other) { - ImmValue result = *this / other; - *this = result; - return *this; +template <> +ImmValue ImmValue::Add(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_f64 + b.imm_values[0].imm_f64, + a.imm_values[1].imm_f64 + b.imm_values[1].imm_f64, + a.imm_values[2].imm_f64 + b.imm_values[2].imm_f64, + a.imm_values[3].imm_f64 + b.imm_values[3].imm_f64); } -ImmValue& ImmValue::operator%=(const ImmValue& other) noexcept { - ImmValue result = *this % other; - *this = result; - return *this; +template <> +ImmValue ImmValue::Sub(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u8 - b.imm_values[0].imm_u8, + a.imm_values[1].imm_u8 - b.imm_values[1].imm_u8, + a.imm_values[2].imm_u8 - b.imm_values[2].imm_u8, + a.imm_values[3].imm_u8 - b.imm_values[3].imm_u8); } -ImmValue& ImmValue::operator&=(const ImmValue& other) noexcept { - ImmValue result = *this & other; - *this = result; - return *this; +template <> +ImmValue ImmValue::Sub(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_s8 - b.imm_values[0].imm_s8, + a.imm_values[1].imm_s8 - b.imm_values[1].imm_s8, + a.imm_values[2].imm_s8 - b.imm_values[2].imm_s8, + a.imm_values[3].imm_s8 - b.imm_values[3].imm_s8); } -ImmValue& ImmValue::operator|=(const ImmValue& other) noexcept { - ImmValue result = *this | other; - *this = result; - return *this; +template <> +ImmValue ImmValue::Sub(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u16 - b.imm_values[0].imm_u16, + a.imm_values[1].imm_u16 - b.imm_values[1].imm_u16, + a.imm_values[2].imm_u16 - b.imm_values[2].imm_u16, + a.imm_values[3].imm_u16 - b.imm_values[3].imm_u16); } -ImmValue& ImmValue::operator^=(const ImmValue& other) noexcept { - ImmValue result = *this ^ other; - *this = result; - return *this; +template <> +ImmValue ImmValue::Sub(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_s16 - b.imm_values[0].imm_s16, + a.imm_values[1].imm_s16 - b.imm_values[1].imm_s16, + a.imm_values[2].imm_s16 - b.imm_values[2].imm_s16, + a.imm_values[3].imm_s16 - b.imm_values[3].imm_s16); } -ImmValue& ImmValue::operator<<=(const ImmU32& other) noexcept { - ImmValue result = *this << other; - *this = result; - return *this; +template <> +ImmValue ImmValue::Sub(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u32 - b.imm_values[0].imm_u32, + a.imm_values[1].imm_u32 - b.imm_values[1].imm_u32, + a.imm_values[2].imm_u32 - b.imm_values[2].imm_u32, + a.imm_values[3].imm_u32 - b.imm_values[3].imm_u32); } -ImmValue& ImmValue::operator>>=(const ImmU32& other) noexcept { - ImmValue result = *this >> other; - *this = result; - return *this; +template <> +ImmValue ImmValue::Sub(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_s32 - b.imm_values[0].imm_s32, + a.imm_values[1].imm_s32 - b.imm_values[1].imm_s32, + a.imm_values[2].imm_s32 - b.imm_values[2].imm_s32, + a.imm_values[3].imm_s32 - b.imm_values[3].imm_s32); } -ImmValue ImmValue::abs() const noexcept { - switch (type) { - case Type::U8: - return is_signed ? ImmValue(std::abs(imm_values[0].imm_s8)) - : ImmValue(imm_values[0].imm_u8); - case Type::U16: - return is_signed ? ImmValue(std::abs(imm_values[0].imm_s16)) - : ImmValue(imm_values[0].imm_u16); - case Type::U32: - return is_signed ? ImmValue(std::abs(imm_values[0].imm_s32)) - : ImmValue(imm_values[0].imm_u32); - case Type::U64: - return is_signed ? ImmValue(std::abs(imm_values[0].imm_s64)) - : ImmValue(imm_values[0].imm_u64); - case Type::F32: - return ImmValue(std::abs(imm_values[0].imm_f32)); - case Type::F64: - return ImmValue(std::abs(imm_values[0].imm_f64)); - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +template <> +ImmValue ImmValue::Sub(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_f32 - b.imm_values[0].imm_f32, + a.imm_values[1].imm_f32 - b.imm_values[1].imm_f32, + a.imm_values[2].imm_f32 - b.imm_values[2].imm_f32, + a.imm_values[3].imm_f32 - b.imm_values[3].imm_f32); } -ImmValue ImmValue::recip() const noexcept { - switch (type) { - case Type::F32: - return ImmValue(1.0f / imm_values[0].imm_f32); - case Type::F64: - return ImmValue(1.0 / imm_values[0].imm_f64); - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +template <> +ImmValue ImmValue::Sub(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u64 - b.imm_values[0].imm_u64, + a.imm_values[1].imm_u64 - b.imm_values[1].imm_u64, + a.imm_values[2].imm_u64 - b.imm_values[2].imm_u64, + a.imm_values[3].imm_u64 - b.imm_values[3].imm_u64); } -ImmValue ImmValue::sqrt() const noexcept { - switch (type) { - case Type::F32: - return ImmValue(std::sqrt(imm_values[0].imm_f32)); - case Type::F64: - return ImmValue(std::sqrt(imm_values[0].imm_f64)); - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +template <> +ImmValue ImmValue::Sub(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_s64 - b.imm_values[0].imm_s64, + a.imm_values[1].imm_s64 - b.imm_values[1].imm_s64, + a.imm_values[2].imm_s64 - b.imm_values[2].imm_s64, + a.imm_values[3].imm_s64 - b.imm_values[3].imm_s64); } -ImmValue ImmValue::rsqrt() const noexcept { - switch (type) { - case Type::F32: - return ImmValue(1.0f / std::sqrt(imm_values[0].imm_f32)); - case Type::F64: - return ImmValue(1.0 / std::sqrt(imm_values[0].imm_f64)); - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +template <> +ImmValue ImmValue::Sub(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_f64 - b.imm_values[0].imm_f64, + a.imm_values[1].imm_f64 - b.imm_values[1].imm_f64, + a.imm_values[2].imm_f64 - b.imm_values[2].imm_f64, + a.imm_values[3].imm_f64 - b.imm_values[3].imm_f64); } -ImmValue ImmValue::sin() const noexcept { - switch (type) { - case Type::F32: - return ImmValue(std::sin(imm_values[0].imm_f32)); - case Type::F64: - return ImmValue(std::sin(imm_values[0].imm_f64)); - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +template <> +ImmValue ImmValue::Mul(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u8 * b.imm_values[0].imm_u8, + a.imm_values[1].imm_u8 * b.imm_values[0].imm_u8, + a.imm_values[2].imm_u8 * b.imm_values[0].imm_u8, + a.imm_values[3].imm_u8 * b.imm_values[0].imm_u8); } -ImmValue ImmValue::cos() const noexcept { - switch (type) { - case Type::F32: - return ImmValue(std::cos(imm_values[0].imm_f32)); - case Type::F64: - return ImmValue(std::cos(imm_values[0].imm_f64)); - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +template <> +ImmValue ImmValue::Mul(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_s8 * b.imm_values[0].imm_s8, + a.imm_values[1].imm_s8 * b.imm_values[0].imm_s8, + a.imm_values[2].imm_s8 * b.imm_values[0].imm_s8, + a.imm_values[3].imm_s8 * b.imm_values[0].imm_s8); } -ImmValue ImmValue::exp2() const noexcept { - switch (type) { - case Type::F32: - return ImmValue(std::exp2(imm_values[0].imm_f32)); - case Type::F64: - return ImmValue(std::exp2(imm_values[0].imm_f64)); - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +template <> +ImmValue ImmValue::Mul(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u16 * b.imm_values[0].imm_u16, + a.imm_values[1].imm_u16 * b.imm_values[0].imm_u16, + a.imm_values[2].imm_u16 * b.imm_values[0].imm_u16, + a.imm_values[3].imm_u16 * b.imm_values[0].imm_u16); } -ImmValue ImmValue::ldexp(const ImmU32& exp) const noexcept { - switch (type) { - case Type::F32: - return ImmValue(std::ldexp(imm_values[0].imm_f32, exp.imm_values[0].imm_s32)); - case Type::F64: - return ImmValue(std::ldexp(imm_values[0].imm_f64, exp.imm_values[0].imm_s32)); - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +template <> +ImmValue ImmValue::Mul(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_s16 * b.imm_values[0].imm_s16, + a.imm_values[1].imm_s16 * b.imm_values[0].imm_s16, + a.imm_values[2].imm_s16 * b.imm_values[0].imm_s16, + a.imm_values[3].imm_s16 * b.imm_values[0].imm_s16); } -ImmValue ImmValue::log2() const noexcept { - switch (type) { - case Type::F32: - return ImmValue(std::log2(imm_values[0].imm_f32)); - case Type::F64: - return ImmValue(std::log2(imm_values[0].imm_f64)); - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +template <> +ImmValue ImmValue::Mul(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u32 * b.imm_values[0].imm_u32, + a.imm_values[1].imm_u32 * b.imm_values[0].imm_u32, + a.imm_values[2].imm_u32 * b.imm_values[0].imm_u32, + a.imm_values[3].imm_u32 * b.imm_values[0].imm_u32); } -ImmValue ImmValue::clamp(const ImmValue& min, const ImmValue& max) const noexcept { - ASSERT(type == min.type && min.type == max.type); - switch (type) { - case Type::U8: - return is_signed && min.is_signed && max.is_signed - ? ImmValue(std::clamp(imm_values[0].imm_s8, min.imm_values[0].imm_s8, - max.imm_values[0].imm_s8)) - : ImmValue(std::clamp(imm_values[0].imm_u8, min.imm_values[0].imm_u8, - max.imm_values[0].imm_u8)); - case Type::U16: - return is_signed && min.is_signed && max.is_signed - ? ImmValue(std::clamp(imm_values[0].imm_s16, min.imm_values[0].imm_s16, - max.imm_values[0].imm_s16)) - : ImmValue(std::clamp(imm_values[0].imm_u16, min.imm_values[0].imm_u16, - max.imm_values[0].imm_u16)); - case Type::U32: - return is_signed && min.is_signed && max.is_signed - ? ImmValue(std::clamp(imm_values[0].imm_s32, min.imm_values[0].imm_s32, - max.imm_values[0].imm_s32)) - : ImmValue(std::clamp(imm_values[0].imm_u32, min.imm_values[0].imm_u32, - max.imm_values[0].imm_u32)); - case Type::U64: - return is_signed && min.is_signed && max.is_signed - ? ImmValue(std::clamp(imm_values[0].imm_s64, min.imm_values[0].imm_s64, - max.imm_values[0].imm_s64)) - : ImmValue(std::clamp(imm_values[0].imm_u64, min.imm_values[0].imm_u64, - max.imm_values[0].imm_u64)); - case Type::F32: - return ImmValue(std::clamp(imm_values[0].imm_f32, min.imm_values[0].imm_f32, - max.imm_values[0].imm_f32)); - case Type::F64: - return ImmValue(std::clamp(imm_values[0].imm_f64, min.imm_values[0].imm_f64, - max.imm_values[0].imm_f64)); - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +template <> +ImmValue ImmValue::Mul(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_s32 * b.imm_values[0].imm_s32, + a.imm_values[1].imm_s32 * b.imm_values[0].imm_s32, + a.imm_values[2].imm_s32 * b.imm_values[0].imm_s32, + a.imm_values[3].imm_s32 * b.imm_values[0].imm_s32); } -ImmValue ImmValue::floor() const noexcept { - switch (type) { - case Type::F32: - return ImmValue(std::floor(imm_values[0].imm_f32)); - case Type::F64: - return ImmValue(std::floor(imm_values[0].imm_f64)); - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +template <> +ImmValue ImmValue::Mul(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_f32 * b.imm_values[0].imm_f32, + a.imm_values[1].imm_f32 * b.imm_values[0].imm_f32, + a.imm_values[2].imm_f32 * b.imm_values[0].imm_f32, + a.imm_values[3].imm_f32 * b.imm_values[0].imm_f32); } -ImmValue ImmValue::ceil() const noexcept { - switch (type) { - case Type::F32: - return ImmValue(std::ceil(imm_values[0].imm_f32)); - case Type::F64: - return ImmValue(std::ceil(imm_values[0].imm_f64)); - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +template <> +ImmValue ImmValue::Mul(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u64 * b.imm_values[0].imm_u64, + a.imm_values[1].imm_u64 * b.imm_values[0].imm_u64, + a.imm_values[2].imm_u64 * b.imm_values[0].imm_u64, + a.imm_values[3].imm_u64 * b.imm_values[0].imm_u64); } -ImmValue ImmValue::round() const noexcept { - switch (type) { - case Type::F32: - return ImmValue(std::round(imm_values[0].imm_f32)); - case Type::F64: - return ImmValue(std::round(imm_values[0].imm_f64)); - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +template <> +ImmValue ImmValue::Mul(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_s64 * b.imm_values[0].imm_s64, + a.imm_values[1].imm_s64 * b.imm_values[0].imm_s64, + a.imm_values[2].imm_s64 * b.imm_values[0].imm_s64, + a.imm_values[3].imm_s64 * b.imm_values[0].imm_s64); } -ImmValue ImmValue::trunc() const noexcept { - switch (type) { - case Type::F32: - return ImmValue(std::trunc(imm_values[0].imm_f32)); - case Type::F64: - return ImmValue(std::trunc(imm_values[0].imm_f64)); - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +template <> +ImmValue ImmValue::Mul(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_f64 * b.imm_values[0].imm_f64, + a.imm_values[1].imm_f64 * b.imm_values[0].imm_f64, + a.imm_values[2].imm_f64 * b.imm_values[0].imm_f64, + a.imm_values[3].imm_f64 * b.imm_values[0].imm_f64); } -ImmValue ImmValue::fract() const noexcept { - switch (type) { - case Type::F32: - return ImmValue(imm_values[0].imm_f32 - std::floor(imm_values[0].imm_f32)); - case Type::F64: - return ImmValue(imm_values[0].imm_f64 - std::floor(imm_values[0].imm_f64)); - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +template <> +ImmValue ImmValue::Div(const ImmValue& a, const ImmValue& b) { + return ImmValue(a.imm_values[0].imm_u8 / b.imm_values[0].imm_u8, + a.imm_values[1].imm_u8 / b.imm_values[0].imm_u8, + a.imm_values[2].imm_u8 / b.imm_values[0].imm_u8, + a.imm_values[3].imm_u8 / b.imm_values[0].imm_u8); } -bool ImmValue::isnan() const noexcept { - switch (type) { - case Type::F32: - return std::isnan(imm_values[0].imm_f32); - case Type::F64: - return std::isnan(imm_values[0].imm_f64); - case Type::F32x2: - return std::isnan(imm_values[0].imm_f32) || std::isnan(imm_values[1].imm_f32); - case Type::F64x2: - return std::isnan(imm_values[0].imm_f64) || std::isnan(imm_values[1].imm_f64); - case Type::F32x3: - return std::isnan(imm_values[0].imm_f32) || std::isnan(imm_values[1].imm_f32) || - std::isnan(imm_values[2].imm_f32); - case Type::F64x3: - return std::isnan(imm_values[0].imm_f64) || std::isnan(imm_values[1].imm_f64) || - std::isnan(imm_values[2].imm_f64); - case Type::F32x4: - return std::isnan(imm_values[0].imm_f32) || std::isnan(imm_values[1].imm_f32) || - std::isnan(imm_values[2].imm_f32) || std::isnan(imm_values[3].imm_f32); - case Type::F64x4: - return std::isnan(imm_values[0].imm_f64) || std::isnan(imm_values[1].imm_f64) || - std::isnan(imm_values[2].imm_f64) || std::isnan(imm_values[3].imm_f64); - default: - UNREACHABLE_MSG("Invalid type {}", type); - } +template <> +ImmValue ImmValue::Div(const ImmValue& a, const ImmValue& b) { + return ImmValue(a.imm_values[0].imm_s8 / b.imm_values[0].imm_s8, + a.imm_values[1].imm_s8 / b.imm_values[0].imm_s8, + a.imm_values[2].imm_s8 / b.imm_values[0].imm_s8, + a.imm_values[3].imm_s8 / b.imm_values[0].imm_s8); } -ImmValue ImmValue::fma(const ImmF32F64& a, const ImmF32F64& b, const ImmF32F64& c) noexcept { - ASSERT(a.type == b.type && b.type == c.type); - switch (a.type) { - case Type::F32: - return ImmValue( - std::fma(a.imm_values[0].imm_f32, b.imm_values[0].imm_f32, c.imm_values[0].imm_f32)); - case Type::F64: - return ImmValue( - std::fma(a.imm_values[0].imm_f64, b.imm_values[0].imm_f64, c.imm_values[0].imm_f64)); - case Type::F32x2: - return ImmValue( - std::fma(a.imm_values[0].imm_f32, b.imm_values[0].imm_f32, c.imm_values[0].imm_f32), - std::fma(a.imm_values[1].imm_f32, b.imm_values[1].imm_f32, c.imm_values[1].imm_f32)); - case Type::F64x2: - return ImmValue( - std::fma(a.imm_values[0].imm_f64, b.imm_values[0].imm_f64, c.imm_values[0].imm_f64), - std::fma(a.imm_values[1].imm_f64, b.imm_values[1].imm_f64, c.imm_values[1].imm_f64)); - case Type::F32x3: - return ImmValue( - std::fma(a.imm_values[0].imm_f32, b.imm_values[0].imm_f32, c.imm_values[0].imm_f32), - std::fma(a.imm_values[1].imm_f32, b.imm_values[1].imm_f32, c.imm_values[1].imm_f32), - std::fma(a.imm_values[2].imm_f32, b.imm_values[2].imm_f32, c.imm_values[2].imm_f32)); - case Type::F64x3: - return ImmValue( - std::fma(a.imm_values[0].imm_f64, b.imm_values[0].imm_f64, c.imm_values[0].imm_f64), - std::fma(a.imm_values[1].imm_f64, b.imm_values[1].imm_f64, c.imm_values[1].imm_f64), - std::fma(a.imm_values[2].imm_f64, b.imm_values[2].imm_f64, c.imm_values[2].imm_f64)); - case Type::F32x4: - return ImmValue( - std::fma(a.imm_values[0].imm_f32, b.imm_values[0].imm_f32, c.imm_values[0].imm_f32), - std::fma(a.imm_values[1].imm_f32, b.imm_values[1].imm_f32, c.imm_values[1].imm_f32), - std::fma(a.imm_values[2].imm_f32, b.imm_values[2].imm_f32, c.imm_values[2].imm_f32), - std::fma(a.imm_values[3].imm_f32, b.imm_values[3].imm_f32, c.imm_values[3].imm_f32)); - case Type::F64x4: - return ImmValue( - std::fma(a.imm_values[0].imm_f64, b.imm_values[0].imm_f64, c.imm_values[0].imm_f64), - std::fma(a.imm_values[1].imm_f64, b.imm_values[1].imm_f64, c.imm_values[1].imm_f64), - std::fma(a.imm_values[2].imm_f64, b.imm_values[2].imm_f64, c.imm_values[2].imm_f64), - std::fma(a.imm_values[3].imm_f64, b.imm_values[3].imm_f64, c.imm_values[3].imm_f64)); - default: - UNREACHABLE_MSG("Invalid type {}", a.type); - } +template <> +ImmValue ImmValue::Div(const ImmValue& a, const ImmValue& b) { + return ImmValue(a.imm_values[0].imm_u16 / b.imm_values[0].imm_u16, + a.imm_values[1].imm_u16 / b.imm_values[0].imm_u16, + a.imm_values[2].imm_u16 / b.imm_values[0].imm_u16, + a.imm_values[3].imm_u16 / b.imm_values[0].imm_u16); +} + +template <> +ImmValue ImmValue::Div(const ImmValue& a, const ImmValue& b) { + return ImmValue(a.imm_values[0].imm_s16 / b.imm_values[0].imm_s16, + a.imm_values[1].imm_s16 / b.imm_values[0].imm_s16, + a.imm_values[2].imm_s16 / b.imm_values[0].imm_s16, + a.imm_values[3].imm_s16 / b.imm_values[0].imm_s16); +} + +template <> +ImmValue ImmValue::Div(const ImmValue& a, const ImmValue& b) { + return ImmValue(a.imm_values[0].imm_u32 / b.imm_values[0].imm_u32, + a.imm_values[1].imm_u32 / b.imm_values[0].imm_u32, + a.imm_values[2].imm_u32 / b.imm_values[0].imm_u32, + a.imm_values[3].imm_u32 / b.imm_values[0].imm_u32); +} + +template <> +ImmValue ImmValue::Div(const ImmValue& a, const ImmValue& b) { + return ImmValue(a.imm_values[0].imm_s32 / b.imm_values[0].imm_s32, + a.imm_values[1].imm_s32 / b.imm_values[0].imm_s32, + a.imm_values[2].imm_s32 / b.imm_values[0].imm_s32, + a.imm_values[3].imm_s32 / b.imm_values[0].imm_s32); +} + +template <> +ImmValue ImmValue::Div(const ImmValue& a, const ImmValue& b) { + return ImmValue(a.imm_values[0].imm_f32 / b.imm_values[0].imm_f32, + a.imm_values[1].imm_f32 / b.imm_values[0].imm_f32, + a.imm_values[2].imm_f32 / b.imm_values[0].imm_f32, + a.imm_values[3].imm_f32 / b.imm_values[0].imm_f32); +} + +template <> +ImmValue ImmValue::Div(const ImmValue& a, const ImmValue& b) { + return ImmValue(a.imm_values[0].imm_u64 / b.imm_values[0].imm_u64, + a.imm_values[1].imm_u64 / b.imm_values[0].imm_u64, + a.imm_values[2].imm_u64 / b.imm_values[0].imm_u64, + a.imm_values[3].imm_u64 / b.imm_values[0].imm_u64); +} + +template <> +ImmValue ImmValue::Div(const ImmValue& a, const ImmValue& b) { + return ImmValue(a.imm_values[0].imm_s64 / b.imm_values[0].imm_s64, + a.imm_values[1].imm_s64 / b.imm_values[0].imm_s64, + a.imm_values[2].imm_s64 / b.imm_values[0].imm_s64, + a.imm_values[3].imm_s64 / b.imm_values[0].imm_s64); +} + +template <> +ImmValue ImmValue::Div(const ImmValue& a, const ImmValue& b) { + return ImmValue(a.imm_values[0].imm_f64 / b.imm_values[0].imm_f64, + a.imm_values[1].imm_f64 / b.imm_values[0].imm_f64, + a.imm_values[2].imm_f64 / b.imm_values[0].imm_f64, + a.imm_values[3].imm_f64 / b.imm_values[0].imm_f64); +} + +template <> +ImmValue ImmValue::Mod(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u8 % b.imm_values[0].imm_u8); +} + +template <> +ImmValue ImmValue::Mod(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_s8 % b.imm_values[0].imm_s8); +} + +template <> +ImmValue ImmValue::Mod(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u16 % b.imm_values[0].imm_u16); +} + +template <> +ImmValue ImmValue::Mod(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_s16 % b.imm_values[0].imm_s16); +} + +template <> +ImmValue ImmValue::Mod(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u32 % b.imm_values[0].imm_u32); +} + +template <> +ImmValue ImmValue::Mod(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_s32 % b.imm_values[0].imm_s32); +} + +template <> +ImmValue ImmValue::Mod(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u64 % b.imm_values[0].imm_u64); +} + +template <> +ImmValue ImmValue::Mod(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_s64 % b.imm_values[0].imm_s64); +} + +template <> +ImmValue ImmValue::And(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u1 & b.imm_values[0].imm_u1); +} + +template <> +ImmValue ImmValue::And(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u8 & b.imm_values[0].imm_u8); +} + +template <> +ImmValue ImmValue::And(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u16 & b.imm_values[0].imm_u16); +} + +template <> +ImmValue ImmValue::And(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u32 & b.imm_values[0].imm_u32); +} + +template <> +ImmValue ImmValue::And(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u64 & b.imm_values[0].imm_u64); +} + +template <> +ImmValue ImmValue::Or(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u1 | b.imm_values[0].imm_u1); +} + +template <> +ImmValue ImmValue::Or(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u8 | b.imm_values[0].imm_u8); +} + +template <> +ImmValue ImmValue::Or(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u16 | b.imm_values[0].imm_u16); +} + +template <> +ImmValue ImmValue::Or(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u32 | b.imm_values[0].imm_u32); +} + +template <> +ImmValue ImmValue::Or(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u64 | b.imm_values[0].imm_u64); +} + +template <> +ImmValue ImmValue::Xor(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u1 ^ b.imm_values[0].imm_u1); +} + +template <> +ImmValue ImmValue::Xor(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u8 ^ b.imm_values[0].imm_u8); +} + +template <> +ImmValue ImmValue::Xor(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u16 ^ b.imm_values[0].imm_u16); +} + +template <> +ImmValue ImmValue::Xor(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u32 ^ b.imm_values[0].imm_u32); +} + +template <> +ImmValue ImmValue::Xor(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u64 ^ b.imm_values[0].imm_u64); +} + +template <> +ImmValue ImmValue::LShift(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u8 << b.imm_values[0].imm_u8); +} + +template <> +ImmValue ImmValue::LShift(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u16 << b.imm_values[0].imm_u16); +} + +template <> +ImmValue ImmValue::LShift(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u32 << b.imm_values[0].imm_u32); +} + +template <> +ImmValue ImmValue::LShift(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u64 << b.imm_values[0].imm_u64); +} + +template <> +ImmValue ImmValue::RShift(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u8 >> b.imm_values[0].imm_u8); +} + +template <> +ImmValue ImmValue::RShift(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_s8 >> b.imm_values[0].imm_s8); +} + +template <> +ImmValue ImmValue::RShift(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u16 >> b.imm_values[0].imm_u16); +} + +template <> +ImmValue ImmValue::RShift(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_s16 >> b.imm_values[0].imm_s16); +} + +template <> +ImmValue ImmValue::RShift(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u32 >> b.imm_values[0].imm_u32); +} + +template <> +ImmValue ImmValue::RShift(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_s32 >> b.imm_values[0].imm_s32); +} + +template <> +ImmValue ImmValue::RShift(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_u64 >> b.imm_values[0].imm_u64); +} + +template <> +ImmValue ImmValue::RShift(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(a.imm_values[0].imm_s64 >> b.imm_values[0].imm_s64); +} + +template <> +ImmValue ImmValue::Not(const ImmValue& in) noexcept { + return ImmValue(-in.imm_values[0].imm_u1); +} + +template <> +ImmValue ImmValue::Not(const ImmValue& in) noexcept { + return ImmValue(-in.imm_values[0].imm_u8); +} + +template <> +ImmValue ImmValue::Not(const ImmValue& in) noexcept { + return ImmValue(-in.imm_values[0].imm_u16); +} + +template <> +ImmValue ImmValue::Not(const ImmValue& in) noexcept { + return ImmValue(-in.imm_values[0].imm_u32); +} + +template <> +ImmValue ImmValue::Not(const ImmValue& in) noexcept { + return ImmValue(-in.imm_values[0].imm_u64); +} + +template <> +ImmValue ImmValue::Neg(const ImmValue& in) noexcept { + return ImmValue(-in.imm_values[0].imm_s8, -in.imm_values[1].imm_s8, -in.imm_values[2].imm_s8, + -in.imm_values[3].imm_s8); +} + +template <> +ImmValue ImmValue::Neg(const ImmValue& in) noexcept { + return ImmValue(-in.imm_values[0].imm_s16, -in.imm_values[1].imm_s16, -in.imm_values[2].imm_s16, + -in.imm_values[3].imm_s16); +} + +template <> +ImmValue ImmValue::Neg(const ImmValue& in) noexcept { + return ImmValue(-in.imm_values[0].imm_s32, -in.imm_values[1].imm_s32, -in.imm_values[2].imm_s32, + -in.imm_values[3].imm_s32); +} + +template <> +ImmValue ImmValue::Neg(const ImmValue& in) noexcept { + return ImmValue(-in.imm_values[0].imm_f32, -in.imm_values[1].imm_f32, -in.imm_values[2].imm_f32, + -in.imm_values[3].imm_f32); +} + +template <> +ImmValue ImmValue::Neg(const ImmValue& in) noexcept { + return ImmValue(-in.imm_values[0].imm_s64, -in.imm_values[1].imm_s64, -in.imm_values[2].imm_s64, + -in.imm_values[3].imm_s64); +} + +template <> +ImmValue ImmValue::Neg(const ImmValue& in) noexcept { + return ImmValue(-in.imm_values[0].imm_f64, -in.imm_values[1].imm_f64, -in.imm_values[2].imm_f64, + -in.imm_values[3].imm_f64); +} + +template <> +ImmValue ImmValue::Abs(const ImmValue& in) noexcept { + return ImmValue(std::abs(in.imm_values[0].imm_s8)); +} + +template <> +ImmValue ImmValue::Abs(const ImmValue& in) noexcept { + return ImmValue(std::abs(in.imm_values[0].imm_s16)); +} + +template <> +ImmValue ImmValue::Abs(const ImmValue& in) noexcept { + return ImmValue(std::abs(in.imm_values[0].imm_s32)); +} + +template <> +ImmValue ImmValue::Abs(const ImmValue& in) noexcept { + return ImmValue(std::abs(in.imm_values[0].imm_f32)); +} + +template <> +ImmValue ImmValue::Abs(const ImmValue& in) noexcept { + return ImmValue(std::abs(in.imm_values[0].imm_s64)); +} + +template <> +ImmValue ImmValue::Abs(const ImmValue& in) noexcept { + return ImmValue(std::abs(in.imm_values[0].imm_f64)); +} + +template <> +ImmValue ImmValue::Recip(const ImmValue& in) noexcept { + return ImmValue(1.0f / in.imm_values[0].imm_f32); +} + +template <> +ImmValue ImmValue::Recip(const ImmValue& in) noexcept { + return ImmValue(1.0 / in.imm_values[0].imm_f64); +} + +template <> +ImmValue ImmValue::Sqrt(const ImmValue& in) noexcept { + return ImmValue(std::sqrt(in.imm_values[0].imm_f32)); +} + +template <> +ImmValue ImmValue::Sqrt(const ImmValue& in) noexcept { + return ImmValue(std::sqrt(in.imm_values[0].imm_f64)); +} + +template <> +ImmValue ImmValue::Rsqrt(const ImmValue& in) noexcept { + return ImmValue(1.0f / std::sqrt(in.imm_values[0].imm_f32)); +} + +template <> +ImmValue ImmValue::Rsqrt(const ImmValue& in) noexcept { + return ImmValue(1.0 / std::sqrt(in.imm_values[0].imm_f64)); +} + +template <> +ImmValue ImmValue::Sin(const ImmValue& in) noexcept { + return ImmValue(std::sin(in.imm_values[0].imm_f32)); +} + +template <> +ImmValue ImmValue::Sin(const ImmValue& in) noexcept { + return ImmValue(std::sin(in.imm_values[0].imm_f64)); +} + +template <> +ImmValue ImmValue::Cos(const ImmValue& in) noexcept { + return ImmValue(std::cos(in.imm_values[0].imm_f32)); +} + +template <> +ImmValue ImmValue::Cos(const ImmValue& in) noexcept { + return ImmValue(std::cos(in.imm_values[0].imm_f64)); +} + +template <> +ImmValue ImmValue::Exp2(const ImmValue& in) noexcept { + return ImmValue(std::exp2(in.imm_values[0].imm_f32)); +} + +template <> +ImmValue ImmValue::Exp2(const ImmValue& in) noexcept { + return ImmValue(std::exp2(in.imm_values[0].imm_f64)); +} + +template <> +ImmValue ImmValue::Ldexp(const ImmValue& in, const ImmValue& exp) noexcept { + return ImmValue(std::ldexp(in.imm_values[0].imm_f32, exp.imm_values[0].imm_s32)); +} + +template <> +ImmValue ImmValue::Ldexp(const ImmValue& in, const ImmValue& exp) noexcept { + return ImmValue(std::ldexp(in.imm_values[0].imm_f64, exp.imm_values[0].imm_s32)); +} + +template <> +ImmValue ImmValue::Log2(const ImmValue& in) noexcept { + return ImmValue(std::log2(in.imm_values[0].imm_f32)); +} + +template <> +ImmValue ImmValue::Log2(const ImmValue& in) noexcept { + return ImmValue(std::log2(in.imm_values[0].imm_f64)); +} + +template <> +ImmValue ImmValue::Min(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(std::min(a.imm_values[0].imm_u8, b.imm_values[0].imm_u8)); +} + +template <> +ImmValue ImmValue::Min(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(std::min(a.imm_values[0].imm_s8, b.imm_values[0].imm_s8)); +} + +template <> +ImmValue ImmValue::Min(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(std::min(a.imm_values[0].imm_u16, b.imm_values[0].imm_u16)); +} + +template <> +ImmValue ImmValue::Min(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(std::min(a.imm_values[0].imm_s16, b.imm_values[0].imm_s16)); +} + +template <> +ImmValue ImmValue::Min(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(std::min(a.imm_values[0].imm_u32, b.imm_values[0].imm_u32)); +} + +template <> +ImmValue ImmValue::Min(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(std::min(a.imm_values[0].imm_s32, b.imm_values[0].imm_s32)); +} + +template <> +ImmValue ImmValue::Min(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(std::min(a.imm_values[0].imm_u64, b.imm_values[0].imm_u64)); +} + +template <> +ImmValue ImmValue::Min(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(std::min(a.imm_values[0].imm_s64, b.imm_values[0].imm_s64)); +} + +template <> +ImmValue ImmValue::Min(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(std::min(a.imm_values[0].imm_f32, b.imm_values[0].imm_f32)); +} + +template <> +ImmValue ImmValue::Min(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(std::min(a.imm_values[0].imm_f64, b.imm_values[0].imm_f64)); +} + +template <> +ImmValue ImmValue::Max(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(std::max(a.imm_values[0].imm_u8, b.imm_values[0].imm_u8)); +} + +template <> +ImmValue ImmValue::Max(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(std::max(a.imm_values[0].imm_s8, b.imm_values[0].imm_s8)); +} + +template <> +ImmValue ImmValue::Max(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(std::max(a.imm_values[0].imm_u16, b.imm_values[0].imm_u16)); +} + +template <> +ImmValue ImmValue::Max(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(std::max(a.imm_values[0].imm_s16, b.imm_values[0].imm_s16)); +} + +template <> +ImmValue ImmValue::Max(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(std::max(a.imm_values[0].imm_u32, b.imm_values[0].imm_u32)); +} + +template <> +ImmValue ImmValue::Max(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(std::max(a.imm_values[0].imm_s32, b.imm_values[0].imm_s32)); +} + +template <> +ImmValue ImmValue::Max(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(std::max(a.imm_values[0].imm_u64, b.imm_values[0].imm_u64)); +} + +template <> +ImmValue ImmValue::Max(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(std::max(a.imm_values[0].imm_s64, b.imm_values[0].imm_s64)); +} + +template <> +ImmValue ImmValue::Max(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(std::max(a.imm_values[0].imm_f32, b.imm_values[0].imm_f32)); +} + +template <> +ImmValue ImmValue::Max(const ImmValue& a, const ImmValue& b) noexcept { + return ImmValue(std::max(a.imm_values[0].imm_f64, b.imm_values[0].imm_f64)); +} + +template <> +ImmValue ImmValue::Clamp(const ImmValue& in, const ImmValue& min, + const ImmValue& max) noexcept { + return ImmValue( + std::clamp(in.imm_values[0].imm_u8, min.imm_values[0].imm_u8, max.imm_values[0].imm_u8)); +} + +template <> +ImmValue ImmValue::Clamp(const ImmValue& in, const ImmValue& min, + const ImmValue& max) noexcept { + return ImmValue( + std::clamp(in.imm_values[0].imm_s8, min.imm_values[0].imm_s8, max.imm_values[0].imm_s8)); +} + +template <> +ImmValue ImmValue::Clamp(const ImmValue& in, const ImmValue& min, + const ImmValue& max) noexcept { + return ImmValue( + std::clamp(in.imm_values[0].imm_u16, min.imm_values[0].imm_u16, max.imm_values[0].imm_u16)); +} + +template <> +ImmValue ImmValue::Clamp(const ImmValue& in, const ImmValue& min, + const ImmValue& max) noexcept { + return ImmValue( + std::clamp(in.imm_values[0].imm_s16, min.imm_values[0].imm_s16, max.imm_values[0].imm_s16)); +} + +template <> +ImmValue ImmValue::Clamp(const ImmValue& in, const ImmValue& min, + const ImmValue& max) noexcept { + return ImmValue( + std::clamp(in.imm_values[0].imm_u32, min.imm_values[0].imm_u32, max.imm_values[0].imm_u32)); +} + +template <> +ImmValue ImmValue::Clamp(const ImmValue& in, const ImmValue& min, + const ImmValue& max) noexcept { + return ImmValue( + std::clamp(in.imm_values[0].imm_s32, min.imm_values[0].imm_s32, max.imm_values[0].imm_s32)); +} + +template <> +ImmValue ImmValue::Clamp(const ImmValue& in, const ImmValue& min, + const ImmValue& max) noexcept { + return ImmValue( + std::clamp(in.imm_values[0].imm_u64, min.imm_values[0].imm_u64, max.imm_values[0].imm_u64)); +} + +template <> +ImmValue ImmValue::Clamp(const ImmValue& in, const ImmValue& min, + const ImmValue& max) noexcept { + return ImmValue( + std::clamp(in.imm_values[0].imm_s64, min.imm_values[0].imm_s64, max.imm_values[0].imm_s64)); +} + +template <> +ImmValue ImmValue::Clamp(const ImmValue& in, const ImmValue& min, + const ImmValue& max) noexcept { + return ImmValue( + std::clamp(in.imm_values[0].imm_f32, min.imm_values[0].imm_f32, max.imm_values[0].imm_f32)); +} + +template <> +ImmValue ImmValue::Clamp(const ImmValue& in, const ImmValue& min, + const ImmValue& max) noexcept { + return ImmValue( + std::clamp(in.imm_values[0].imm_f64, min.imm_values[0].imm_f64, max.imm_values[0].imm_f64)); +} + +template <> +ImmValue ImmValue::Floor(const ImmValue& in) noexcept { + return ImmValue(std::floor(in.imm_values[0].imm_f32)); +} + +template <> +ImmValue ImmValue::Floor(const ImmValue& in) noexcept { + return ImmValue(std::floor(in.imm_values[0].imm_f64)); +} + +template <> +ImmValue ImmValue::Ceil(const ImmValue& in) noexcept { + return ImmValue(std::ceil(in.imm_values[0].imm_f32)); +} + +template <> +ImmValue ImmValue::Ceil(const ImmValue& in) noexcept { + return ImmValue(std::ceil(in.imm_values[0].imm_f64)); +} + +template <> +ImmValue ImmValue::Round(const ImmValue& in) noexcept { + return ImmValue(std::round(in.imm_values[0].imm_f32)); +} + +template <> +ImmValue ImmValue::Round(const ImmValue& in) noexcept { + return ImmValue(std::round(in.imm_values[0].imm_f64)); +} + +template <> +ImmValue ImmValue::Trunc(const ImmValue& in) noexcept { + return ImmValue(std::trunc(in.imm_values[0].imm_f32)); +} + +template <> +ImmValue ImmValue::Trunc(const ImmValue& in) noexcept { + return ImmValue(std::trunc(in.imm_values[0].imm_f64)); +} + +template <> +ImmValue ImmValue::Fract(const ImmValue& in) noexcept { + return ImmValue(in.imm_values[0].imm_f32 - std::floor(in.imm_values[0].imm_f32)); +} + +template <> +ImmValue ImmValue::Fract(const ImmValue& in) noexcept { + return ImmValue(in.imm_values[0].imm_f64 - std::floor(in.imm_values[0].imm_f64)); +} + +template <> +ImmValue ImmValue::Fma(const ImmValue& a, const ImmValue& b, + const ImmValue& c) noexcept { + return ImmValue( + std::fma(a.imm_values[0].imm_f32, b.imm_values[0].imm_f32, c.imm_values[0].imm_f32), + std::fma(a.imm_values[1].imm_f32, b.imm_values[1].imm_f32, c.imm_values[1].imm_f32), + std::fma(a.imm_values[2].imm_f32, b.imm_values[2].imm_f32, c.imm_values[2].imm_f32), + std::fma(a.imm_values[3].imm_f32, b.imm_values[3].imm_f32, c.imm_values[3].imm_f32)); +} + +template <> +ImmValue ImmValue::Fma(const ImmValue& a, const ImmValue& b, + const ImmValue& c) noexcept { + return ImmValue( + std::fma(a.imm_values[0].imm_f64, b.imm_values[0].imm_f64, c.imm_values[0].imm_f64), + std::fma(a.imm_values[1].imm_f64, b.imm_values[1].imm_f64, c.imm_values[1].imm_f64), + std::fma(a.imm_values[2].imm_f64, b.imm_values[2].imm_f64, c.imm_values[2].imm_f64), + std::fma(a.imm_values[3].imm_f64, b.imm_values[3].imm_f64, c.imm_values[3].imm_f64)); +} + +template <> +bool ImmValue::IsNan(const ImmValue& in) noexcept { + return std::isnan(in.imm_values[0].imm_f32) || std::isnan(in.imm_values[1].imm_f32) || + std::isnan(in.imm_values[2].imm_f32) || std::isnan(in.imm_values[3].imm_f32); +} + +template <> +bool ImmValue::IsNan(const ImmValue& in) noexcept { + return std::isnan(in.imm_values[0].imm_f64) || std::isnan(in.imm_values[1].imm_f64) || + std::isnan(in.imm_values[2].imm_f64) || std::isnan(in.imm_values[3].imm_f64); } bool ImmValue::IsSupportedValue(const IR::Value& value) noexcept { @@ -1465,58 +1237,18 @@ bool ImmValue::IsSupportedValue(const IR::Value& value) noexcept { } } -} // namespace Shader::IR +} // namespace Shader::IR::ComputeValue namespace std { -std::size_t hash::operator()(const Shader::IR::ImmValue& value) const { - using namespace Shader::IR; +std::size_t hash::operator()( + const Shader::IR::ComputeValue::ImmValue& value) const { + using namespace Shader::IR::ComputeValue; - u64 h = HashCombine(static_cast(value.Type()), 0ULL); - - switch (value.Type()) { - case Type::U1: - return HashCombine(static_cast(value.imm_values[0].imm_u1), h); - case Type::U8: - return HashCombine(static_cast(value.imm_values[0].imm_u8), h); - case Type::U16: - return HashCombine(static_cast(value.imm_values[0].imm_u16), h); - case Type::U32: - case Type::F32: - return HashCombine(static_cast(value.imm_values[0].imm_u32), h); - case Type::U64: - case Type::F64: - return HashCombine(static_cast(value.imm_values[0].imm_u64), h); - case Type::U32x2: - case Type::F32x2: - h = HashCombine(static_cast(value.imm_values[0].imm_u32), h); - return HashCombine(static_cast(value.imm_values[1].imm_u32), h); - case Type::F64x2: - h = HashCombine(static_cast(value.imm_values[0].imm_f64), h); - return HashCombine(static_cast(value.imm_values[1].imm_f64), h); - case Type::U32x3: - case Type::F32x3: - h = HashCombine(static_cast(value.imm_values[0].imm_u32), h); - h = HashCombine(static_cast(value.imm_values[1].imm_u32), h); - return HashCombine(static_cast(value.imm_values[2].imm_u32), h); - case Type::F64x3: - h = HashCombine(static_cast(value.imm_values[0].imm_f64), h); - h = HashCombine(static_cast(value.imm_values[1].imm_f64), h); - return HashCombine(static_cast(value.imm_values[2].imm_f64), h); - case Type::U32x4: - case Type::F32x4: - h = HashCombine(static_cast(value.imm_values[0].imm_u32), h); - h = HashCombine(static_cast(value.imm_values[1].imm_u32), h); - h = HashCombine(static_cast(value.imm_values[2].imm_u32), h); - return HashCombine(static_cast(value.imm_values[3].imm_u32), h); - case Type::F64x4: - h = HashCombine(static_cast(value.imm_values[0].imm_f64), h); - h = HashCombine(static_cast(value.imm_values[1].imm_f64), h); - h = HashCombine(static_cast(value.imm_values[2].imm_f64), h); - return HashCombine(static_cast(value.imm_values[3].imm_f64), h); - default: - UNREACHABLE_MSG("Invalid type {}", value.Type()); - } + u64 h = HashCombine(value.imm_values[0].imm_u64, 0UL); + h = HashCombine(value.imm_values[1].imm_u64, h); + h = HashCombine(value.imm_values[2].imm_u64, h); + return HashCombine(value.imm_values[3].imm_u64, h); } -} // namespace std +} // namespace std \ No newline at end of file diff --git a/src/shader_recompiler/ir/compute_value/imm_value.h b/src/shader_recompiler/ir/compute_value/imm_value.h index 74b9d39b7..a46712ce5 100644 --- a/src/shader_recompiler/ir/compute_value/imm_value.h +++ b/src/shader_recompiler/ir/compute_value/imm_value.h @@ -11,62 +11,9 @@ #include "shader_recompiler/ir/type.h" #include "shader_recompiler/ir/value.h" -namespace Shader::IR { +namespace Shader::IR::ComputeValue { -// Like IR::Value but can only hold immediate values. Additionally, can hold vectors of values. -// Has arithmetic operations defined for it. Usefull for computing a value at shader compile time. - -template -class TypedImmValue; - -using ImmU1 = TypedImmValue; -using ImmU8 = TypedImmValue; -using ImmS8 = TypedImmValue; -using ImmU16 = TypedImmValue; -using ImmS16 = TypedImmValue; -using ImmU32 = TypedImmValue; -using ImmS32 = TypedImmValue; -using ImmF32 = TypedImmValue; -using ImmU64 = TypedImmValue; -using ImmS64 = TypedImmValue; -using ImmF64 = TypedImmValue; -using ImmS32F32 = TypedImmValue; -using ImmS64F64 = TypedImmValue; -using ImmU32U64 = TypedImmValue; -using ImmS32S64 = TypedImmValue; -using ImmU16U32U64 = TypedImmValue; -using ImmS16S32S64 = TypedImmValue; -using ImmF32F64 = TypedImmValue; -using ImmUAny = TypedImmValue; -using ImmSAny = TypedImmValue; -using ImmU32x2 = TypedImmValue; -using ImmU32x3 = TypedImmValue; -using ImmU32x4 = TypedImmValue; -using ImmS32x2 = TypedImmValue; -using ImmS32x3 = TypedImmValue; -using ImmS32x4 = TypedImmValue; -using ImmF32x2 = TypedImmValue; -using ImmF32x3 = TypedImmValue; -using ImmF32x4 = TypedImmValue; -using ImmF64x2 = TypedImmValue; -using ImmF64x3 = TypedImmValue; -using ImmF64x4 = TypedImmValue; -using ImmS32F32x2 = TypedImmValue; -using ImmS32F32x3 = TypedImmValue; -using ImmS32F32x4 = TypedImmValue; -using ImmF32F64x2 = TypedImmValue; -using ImmF32F64x3 = TypedImmValue; -using ImmF32F64x4 = TypedImmValue; -using ImmU32xAny = TypedImmValue; -using ImmS32xAny = TypedImmValue; -using ImmF32xAny = TypedImmValue; -using ImmF64xAny = TypedImmValue; -using ImmS32F32xAny = TypedImmValue; -using ImmF32F64xAny = TypedImmValue; +// Holds an immediate value and provides helper functions to do arithmetic operations on it. class ImmValue { public: @@ -93,6 +40,12 @@ public: ImmValue(f32 value1, f32 value2) noexcept; ImmValue(f32 value1, f32 value2, f32 value3) noexcept; ImmValue(f32 value1, f32 value2, f32 value3, f32 value4) noexcept; + ImmValue(u64 value1, u64 value2) noexcept; + ImmValue(u64 value1, u64 value2, u64 value3) noexcept; + ImmValue(u64 value1, u64 value2, u64 value3, u64 value4) noexcept; + ImmValue(s64 value1, s64 value2) noexcept; + ImmValue(s64 value1, s64 value2, s64 value3) noexcept; + ImmValue(s64 value1, s64 value2, s64 value3, s64 value4) noexcept; ImmValue(f64 value1, f64 value2) noexcept; ImmValue(f64 value1, f64 value2, f64 value3) noexcept; ImmValue(f64 value1, f64 value2, f64 value3, f64 value4) noexcept; @@ -101,107 +54,142 @@ public: ImmValue(const ImmValue& value1, const ImmValue& value2, const ImmValue& value3, const ImmValue& value4) noexcept; - [[nodiscard]] bool IsEmpty() const noexcept; - [[nodiscard]] IR::Type Type() const noexcept; - [[nodiscard]] IR::Type BaseType() const noexcept; - [[nodiscard]] u32 Dimensions() const noexcept; + [[nodiscard]] static ImmValue CompositeFrom2x2(const ImmValue& value1, + const ImmValue& value2) noexcept; - [[nodiscard]] bool IsSigned() const noexcept; - void SetSigned(bool signed_) noexcept; - void SameSignAs(const ImmValue& other) noexcept; + [[nodiscard]] bool U1() const noexcept; + [[nodiscard]] u8 U8() const noexcept; + [[nodiscard]] s8 S8() const noexcept; + [[nodiscard]] u16 U16() const noexcept; + [[nodiscard]] s16 S16() const noexcept; + [[nodiscard]] u32 U32() const noexcept; + [[nodiscard]] s32 S32() const noexcept; + [[nodiscard]] f32 F32() const noexcept; + [[nodiscard]] u64 U64() const noexcept; + [[nodiscard]] s64 S64() const noexcept; + [[nodiscard]] f64 F64() const noexcept; - [[nodiscard]] ImmValue Convert(IR::Type new_type, bool new_signed) const noexcept; - [[nodiscard]] ImmValue Bitcast(IR::Type new_type, bool new_signed) const noexcept; - [[nodiscard]] ImmValue Extract(const ImmU32& index) const noexcept; - [[nodiscard]] ImmValue Insert(const ImmValue& value, const ImmU32& indndex) const noexcept; - - [[nodiscard]] bool U1() const; - [[nodiscard]] u8 U8() const; - [[nodiscard]] s8 S8() const; - [[nodiscard]] u16 U16() const; - [[nodiscard]] s16 S16() const; - [[nodiscard]] u32 U32() const; - [[nodiscard]] s32 S32() const; - [[nodiscard]] f32 F32() const; - [[nodiscard]] u64 U64() const; - [[nodiscard]] s64 S64() const; - [[nodiscard]] f64 F64() const; - - [[nodiscard]] std::tuple U32x2() const; - [[nodiscard]] std::tuple U32x3() const; - [[nodiscard]] std::tuple U32x4() const; - [[nodiscard]] std::tuple S32x2() const; - [[nodiscard]] std::tuple S32x3() const; - [[nodiscard]] std::tuple S32x4() const; - [[nodiscard]] std::tuple F32x2() const; - [[nodiscard]] std::tuple F32x3() const; - [[nodiscard]] std::tuple F32x4() const; - [[nodiscard]] std::tuple F64x2() const; - [[nodiscard]] std::tuple F64x3() const; - [[nodiscard]] std::tuple F64x4() const; + [[nodiscard]] std::tuple U32x2() const noexcept; + [[nodiscard]] std::tuple U32x3() const noexcept; + [[nodiscard]] std::tuple U32x4() const noexcept; + [[nodiscard]] std::tuple S32x2() const noexcept; + [[nodiscard]] std::tuple S32x3() const noexcept; + [[nodiscard]] std::tuple S32x4() const noexcept; + [[nodiscard]] std::tuple F32x2() const noexcept; + [[nodiscard]] std::tuple F32x3() const noexcept; + [[nodiscard]] std::tuple F32x4() const noexcept; + [[nodiscard]] std::tuple F64x2() const noexcept; + [[nodiscard]] std::tuple F64x3() const noexcept; + [[nodiscard]] std::tuple F64x4() const noexcept; ImmValue& operator=(const ImmValue& value) noexcept = default; [[nodiscard]] bool operator==(const ImmValue& other) const noexcept; [[nodiscard]] bool operator!=(const ImmValue& other) const noexcept; - [[nodiscard]] bool operator<(const ImmValue& other) const noexcept; - [[nodiscard]] bool operator>(const ImmValue& other) const noexcept; - [[nodiscard]] bool operator<=(const ImmValue& other) const noexcept; - [[nodiscard]] bool operator>=(const ImmValue& other) const noexcept; - [[nodiscard]] ImmValue operator+(const ImmValue& other) const noexcept; - [[nodiscard]] ImmValue operator-(const ImmValue& other) const noexcept; - [[nodiscard]] ImmValue operator*(const ImmValue& other) const noexcept; - [[nodiscard]] ImmValue operator/(const ImmValue& other) const; - [[nodiscard]] ImmValue operator%(const ImmValue& other) const noexcept; - [[nodiscard]] ImmValue operator&(const ImmValue& other) const noexcept; - [[nodiscard]] ImmValue operator|(const ImmValue& other) const noexcept; - [[nodiscard]] ImmValue operator^(const ImmValue& other) const noexcept; - [[nodiscard]] ImmValue operator<<(const ImmU32& other) const noexcept; - [[nodiscard]] ImmValue operator>>(const ImmU32& other) const noexcept; - [[nodiscard]] ImmValue operator~() const noexcept; + [[nodiscard]] static ImmValue Extract(const ImmValue& vec, const ImmValue& index) noexcept; + [[nodiscard]] static ImmValue Insert(const ImmValue& vec, const ImmValue& value, + const ImmValue& index) noexcept; - [[nodiscard]] ImmValue operator++(int) noexcept; - [[nodiscard]] ImmValue operator--(int) noexcept; + template + [[nodiscard]] static ImmValue Convert(const ImmValue& in) noexcept; - ImmValue& operator++() noexcept; - ImmValue& operator--() noexcept; + template + [[nodiscard]] static ImmValue Add(const ImmValue& a, const ImmValue& b) noexcept; - [[nodiscard]] ImmValue operator-() const noexcept; - [[nodiscard]] ImmValue operator+() const noexcept; + template + [[nodiscard]] static ImmValue Sub(const ImmValue& a, const ImmValue& b) noexcept; - ImmValue& operator+=(const ImmValue& other) noexcept; - ImmValue& operator-=(const ImmValue& other) noexcept; - ImmValue& operator*=(const ImmValue& other) noexcept; - ImmValue& operator/=(const ImmValue& other); - ImmValue& operator%=(const ImmValue& other) noexcept; - ImmValue& operator&=(const ImmValue& other) noexcept; - ImmValue& operator|=(const ImmValue& other) noexcept; - ImmValue& operator^=(const ImmValue& other) noexcept; - ImmValue& operator<<=(const ImmU32& other) noexcept; - ImmValue& operator>>=(const ImmU32& other) noexcept; + template + [[nodiscard]] static ImmValue Mul(const ImmValue& a, const ImmValue& b) noexcept; - [[nodiscard]] ImmValue abs() const noexcept; - [[nodiscard]] ImmValue recip() const noexcept; - [[nodiscard]] ImmValue sqrt() const noexcept; - [[nodiscard]] ImmValue rsqrt() const noexcept; - [[nodiscard]] ImmValue sin() const noexcept; - [[nodiscard]] ImmValue cos() const noexcept; - [[nodiscard]] ImmValue exp2() const noexcept; - [[nodiscard]] ImmValue ldexp(const ImmU32& exp) const noexcept; - [[nodiscard]] ImmValue log2() const noexcept; - [[nodiscard]] ImmValue clamp(const ImmValue& min, const ImmValue& max) const noexcept; - [[nodiscard]] ImmValue floor() const noexcept; - [[nodiscard]] ImmValue ceil() const noexcept; - [[nodiscard]] ImmValue round() const noexcept; - [[nodiscard]] ImmValue trunc() const noexcept; - [[nodiscard]] ImmValue fract() const noexcept; - [[nodiscard]] bool isnan() const noexcept; + template + [[nodiscard]] static ImmValue Div(const ImmValue& a, const ImmValue& b); - [[nodiscard]] static ImmValue fma(const ImmF32F64& a, const ImmF32F64& b, - const ImmF32F64& c) noexcept; + template + [[nodiscard]] static ImmValue Mod(const ImmValue& a, const ImmValue& b) noexcept; - static bool IsSupportedValue(const IR::Value& value) noexcept; + template + [[nodiscard]] static ImmValue And(const ImmValue& a, const ImmValue& b) noexcept; + + template + [[nodiscard]] static ImmValue Or(const ImmValue& a, const ImmValue& b) noexcept; + + template + [[nodiscard]] static ImmValue Xor(const ImmValue& a, const ImmValue& b) noexcept; + + template + [[nodiscard]] static ImmValue LShift(const ImmValue& a, const ImmValue& shift) noexcept; + + template + [[nodiscard]] static ImmValue RShift(const ImmValue& a, const ImmValue& shift) noexcept; + + template + [[nodiscard]] static ImmValue Not(const ImmValue& in) noexcept; + + template + [[nodiscard]] static ImmValue Neg(const ImmValue& in) noexcept; + + template + [[nodiscard]] static ImmValue Abs(const ImmValue& in) noexcept; + + template + [[nodiscard]] static ImmValue Recip(const ImmValue& in) noexcept; + + template + [[nodiscard]] static ImmValue Sqrt(const ImmValue& in) noexcept; + + template + [[nodiscard]] static ImmValue Rsqrt(const ImmValue& in) noexcept; + + template + [[nodiscard]] static ImmValue Sin(const ImmValue& in) noexcept; + + template + [[nodiscard]] static ImmValue Cos(const ImmValue& in) noexcept; + + template + [[nodiscard]] static ImmValue Exp2(const ImmValue& in) noexcept; + + template + [[nodiscard]] static ImmValue Ldexp(const ImmValue& in, const ImmValue& exp) noexcept; + + template + [[nodiscard]] static ImmValue Log2(const ImmValue& in) noexcept; + + template + [[nodiscard]] static ImmValue Min(const ImmValue& a, const ImmValue& b) noexcept; + + template + [[nodiscard]] static ImmValue Max(const ImmValue& a, const ImmValue& b) noexcept; + + template + [[nodiscard]] static ImmValue Clamp(const ImmValue& in, const ImmValue& min, + const ImmValue& max) noexcept; + + template + [[nodiscard]] static ImmValue Floor(const ImmValue& in) noexcept; + + template + [[nodiscard]] static ImmValue Ceil(const ImmValue& in) noexcept; + + template + [[nodiscard]] static ImmValue Round(const ImmValue& in) noexcept; + + template + [[nodiscard]] static ImmValue Trunc(const ImmValue& in) noexcept; + + template + [[nodiscard]] static ImmValue Fract(const ImmValue& in) noexcept; + + template + [[nodiscard]] static ImmValue Fma(const ImmValue& a, const ImmValue& b, + const ImmValue& c) noexcept; + + template + [[nodiscard]] static bool IsNan(const ImmValue& in) noexcept; + + [[nodiscard]] static bool IsSupportedValue(const IR::Value& value) noexcept; private: union Value { @@ -218,168 +206,113 @@ private: f64 imm_f64; }; - IR::Type type{}; - bool is_signed{}; std::array imm_values; friend class std::hash; }; static_assert(std::is_trivially_copyable_v); -template -class TypedImmValue : public ImmValue { -public: - inline static constexpr IR::Type static_type = type_; - inline static constexpr bool static_is_signed = is_signed_; - - TypedImmValue() = default; - - template - requires((other_type & type_) != IR::Type::Void && other_signed == is_signed_) - explicit(false) TypedImmValue(const TypedImmValue& other) - : ImmValue(other) {} - - explicit TypedImmValue(const ImmValue& value) : ImmValue(value) { - if ((value.Type() & type_) == IR::Type::Void && value.IsSigned() == is_signed_) { - throw InvalidArgument("Incompatible types {} {} and {} {}", - is_signed_ ? "signed" : "unsigned", type_, value.Type(), - value.IsSigned() ? "signed" : "unsigned"); - } - } -}; - -inline bool ImmValue::IsEmpty() const noexcept { - return type == Type::Void; -} - -inline IR::Type ImmValue::Type() const noexcept { - return type; -} - -inline bool ImmValue::U1() const { - ASSERT(type == Type::U1 && !is_signed); +inline bool ImmValue::U1() const noexcept { return imm_values[0].imm_u1; } -inline u8 ImmValue::U8() const { - ASSERT(type == Type::U8 && !is_signed); +inline u8 ImmValue::U8() const noexcept { return imm_values[0].imm_u8; } -inline s8 ImmValue::S8() const { - ASSERT(type == Type::U8 && is_signed); +inline s8 ImmValue::S8() const noexcept { return imm_values[0].imm_s8; } -inline u16 ImmValue::U16() const { - ASSERT(type == Type::U16 && !is_signed); +inline u16 ImmValue::U16() const noexcept { return imm_values[0].imm_u16; } -inline s16 ImmValue::S16() const { - ASSERT(type == Type::U16 && is_signed); +inline s16 ImmValue::S16() const noexcept { return imm_values[0].imm_s16; } -inline u32 ImmValue::U32() const { - ASSERT(type == Type::U32 && !is_signed); +inline u32 ImmValue::U32() const noexcept { return imm_values[0].imm_u32; } -inline s32 ImmValue::S32() const { - ASSERT(type == Type::U32 && is_signed); +inline s32 ImmValue::S32() const noexcept { return imm_values[0].imm_s32; } -inline f32 ImmValue::F32() const { - ASSERT(type == Type::F32 && is_signed); +inline f32 ImmValue::F32() const noexcept { return imm_values[0].imm_f32; } -inline u64 ImmValue::U64() const { - ASSERT(type == Type::U64 && !is_signed); +inline u64 ImmValue::U64() const noexcept { return imm_values[0].imm_u64; } -inline s64 ImmValue::S64() const { - ASSERT(type == Type::U64 && is_signed); +inline s64 ImmValue::S64() const noexcept { return imm_values[0].imm_s64; } -inline f64 ImmValue::F64() const { - ASSERT(type == Type::F64 && is_signed); +inline f64 ImmValue::F64() const noexcept { return imm_values[0].imm_f64; } -inline std::tuple ImmValue::U32x2() const { - ASSERT(type == Type::U32x2 && !is_signed); +inline std::tuple ImmValue::U32x2() const noexcept { return {imm_values[0].imm_u32, imm_values[1].imm_u32}; } -inline std::tuple ImmValue::U32x3() const { - ASSERT(type == Type::U32x3 && !is_signed); +inline std::tuple ImmValue::U32x3() const noexcept { return {imm_values[0].imm_u32, imm_values[1].imm_u32, imm_values[2].imm_u32}; } -inline std::tuple ImmValue::U32x4() const { - ASSERT(type == Type::U32x4 && !is_signed); +inline std::tuple ImmValue::U32x4() const noexcept { return {imm_values[0].imm_u32, imm_values[1].imm_u32, imm_values[2].imm_u32, imm_values[3].imm_u32}; } -inline std::tuple ImmValue::S32x2() const { - ASSERT(type == Type::U32x2 && is_signed); +inline std::tuple ImmValue::S32x2() const noexcept { return {imm_values[0].imm_s32, imm_values[1].imm_s32}; } -inline std::tuple ImmValue::S32x3() const { - ASSERT(type == Type::U32x3 && is_signed); +inline std::tuple ImmValue::S32x3() const noexcept { return {imm_values[0].imm_s32, imm_values[1].imm_s32, imm_values[2].imm_s32}; } -inline std::tuple ImmValue::S32x4() const { - ASSERT(type == Type::U32x4 && is_signed); +inline std::tuple ImmValue::S32x4() const noexcept { return {imm_values[0].imm_s32, imm_values[1].imm_s32, imm_values[2].imm_s32, imm_values[3].imm_s32}; } -inline std::tuple ImmValue::F32x2() const { - ASSERT(type == Type::F32x2 && is_signed); +inline std::tuple ImmValue::F32x2() const noexcept { return {imm_values[0].imm_f32, imm_values[1].imm_f32}; } -inline std::tuple ImmValue::F32x3() const { - ASSERT(type == Type::F32x3 && is_signed); +inline std::tuple ImmValue::F32x3() const noexcept { return {imm_values[0].imm_f32, imm_values[1].imm_f32, imm_values[2].imm_f32}; } -inline std::tuple ImmValue::F32x4() const { - ASSERT(type == Type::F32x4 && is_signed); +inline std::tuple ImmValue::F32x4() const noexcept { return {imm_values[0].imm_f32, imm_values[1].imm_f32, imm_values[2].imm_f32, imm_values[3].imm_f32}; } -inline std::tuple ImmValue::F64x2() const { - ASSERT(type == Type::F64x2 && is_signed); +inline std::tuple ImmValue::F64x2() const noexcept { return {imm_values[0].imm_f64, imm_values[1].imm_f64}; } -inline std::tuple ImmValue::F64x3() const { - ASSERT(type == Type::F64x3 && is_signed); +inline std::tuple ImmValue::F64x3() const noexcept { return {imm_values[0].imm_f64, imm_values[1].imm_f64, imm_values[2].imm_f64}; } -inline std::tuple ImmValue::F64x4() const { - ASSERT(type == Type::F64x4 && is_signed); +inline std::tuple ImmValue::F64x4() const noexcept { return {imm_values[0].imm_f64, imm_values[1].imm_f64, imm_values[2].imm_f64, imm_values[3].imm_f64}; } -} // namespace Shader::IR +} // namespace Shader::IR::ComputeValue namespace std { template <> -struct hash { - std::size_t operator()(const Shader::IR::ImmValue& value) const; +struct hash { + std::size_t operator()(const Shader::IR::ComputeValue::ImmValue& value) const; }; } // namespace std \ No newline at end of file From 418f7daa57712a774fe2a01078920fd4a6179387 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Wed, 19 Mar 2025 23:59:34 +0100 Subject: [PATCH 23/49] clang-format --- src/common/cartesian_invoke.h | 6 +- src/common/func_traits.h | 2 +- .../ir/compute_value/do_composite.cpp | 268 ++++++++++++------ .../ir/compute_value/do_composite.h | 170 +++++++---- .../ir/compute_value/do_nop_functions.h | 3 +- 5 files changed, 310 insertions(+), 139 deletions(-) diff --git a/src/common/cartesian_invoke.h b/src/common/cartesian_invoke.h index 6bbc4cd4a..7a4162592 100644 --- a/src/common/cartesian_invoke.h +++ b/src/common/cartesian_invoke.h @@ -10,8 +10,8 @@ namespace Detail { template void CartesianInvokeImpl(Func func, OutputIt out_it, - std::tuple& arglists_its, - const std::tuple& arglists_tuple) { + std::tuple& arglists_its, + const std::tuple& arglists_tuple) { if constexpr (Level == N) { auto get_tuple = [&](std::index_sequence) { return std::forward_as_tuple(*std::get(arglists_its)...); @@ -37,7 +37,7 @@ void CartesianInvoke(Func func, OutputIt out_it, const ArgLists&... arg_lists) { std::tuple arglists_it; Detail::CartesianInvokeImpl(func, out_it, arglists_it, - arglists_tuple); + arglists_tuple); } } // namespace Common diff --git a/src/common/func_traits.h b/src/common/func_traits.h index c3035d7cc..b85681467 100644 --- a/src/common/func_traits.h +++ b/src/common/func_traits.h @@ -3,8 +3,8 @@ #pragma once -#include #include +#include namespace Common { diff --git a/src/shader_recompiler/ir/compute_value/do_composite.cpp b/src/shader_recompiler/ir/compute_value/do_composite.cpp index 41cbd04f9..b3120b485 100644 --- a/src/shader_recompiler/ir/compute_value/do_composite.cpp +++ b/src/shader_recompiler/ir/compute_value/do_composite.cpp @@ -6,224 +6,324 @@ namespace Shader::IR::ComputeValue { -static void CommonCompositeConstruct(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1) { - const auto op = [](const ImmValue& a, const ImmValue& b) { - return ImmValue(a, b); - }; +static void CommonCompositeConstruct(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1) { + const auto op = [](const ImmValue& a, const ImmValue& b) { return ImmValue(a, b); }; Common::CartesianInvoke(op, std::insert_iterator(inst_values, inst_values.begin()), arg0, arg1); } -static void CommonCompositeConstruct(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2) { +static void CommonCompositeConstruct(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1, const ImmValueList& arg2) { const auto op = [](const ImmValue& a, const ImmValue& b, const ImmValue& c) { return ImmValue(a, b, c); }; - Common::CartesianInvoke(op, std::insert_iterator(inst_values, inst_values.begin()), arg0, arg1, arg2); + Common::CartesianInvoke(op, std::insert_iterator(inst_values, inst_values.begin()), arg0, arg1, + arg2); } -static void CommonCompositeConstruct(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2, const ImmValueList& arg3) { +static void CommonCompositeConstruct(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1, const ImmValueList& arg2, + const ImmValueList& arg3) { const auto op = [](const ImmValue& a, const ImmValue& b, const ImmValue& c, const ImmValue& d) { return ImmValue(a, b, c, d); }; - Common::CartesianInvoke(op, std::insert_iterator(inst_values, inst_values.begin()), arg0, arg1, arg2, arg3); + Common::CartesianInvoke(op, std::insert_iterator(inst_values, inst_values.begin()), arg0, arg1, + arg2, arg3); } -void DoCompositeConstructU32x2(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1) { +void DoCompositeConstructU32x2(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1) { CommonCompositeConstruct(inst_values, arg0, arg1); } -void DoCompositeConstructU32x3(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2) { +void DoCompositeConstructU32x3(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1, const ImmValueList& arg2) { CommonCompositeConstruct(inst_values, arg0, arg1, arg2); } -void DoCompositeConstructU32x4(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2, const ImmValueList& arg3) { +void DoCompositeConstructU32x4(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1, const ImmValueList& arg2, + const ImmValueList& arg3) { CommonCompositeConstruct(inst_values, arg0, arg1, arg2, arg3); } -void DoCompositeConstructU32x2x2(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1) { - Common::CartesianInvoke(ImmValue::CompositeFrom2x2, std::insert_iterator(inst_values, inst_values.begin()), arg0, arg1); +void DoCompositeConstructU32x2x2(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1) { + Common::CartesianInvoke(ImmValue::CompositeFrom2x2, + std::insert_iterator(inst_values, inst_values.begin()), arg0, arg1); } -void DoCompositeExtractU32x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx) { - Common::CartesianInvoke(ImmValue::Extract, std::insert_iterator(inst_values, inst_values.begin()), vec, idx); +void DoCompositeExtractU32x2(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Extract, + std::insert_iterator(inst_values, inst_values.begin()), vec, idx); } -void DoCompositeExtractU32x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx) { - Common::CartesianInvoke(ImmValue::Extract, std::insert_iterator(inst_values, inst_values.begin()), vec, idx); +void DoCompositeExtractU32x3(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Extract, + std::insert_iterator(inst_values, inst_values.begin()), vec, idx); } -void DoCompositeExtractU32x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx) { - Common::CartesianInvoke(ImmValue::Extract, std::insert_iterator(inst_values, inst_values.begin()), vec, idx); +void DoCompositeExtractU32x4(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Extract, + std::insert_iterator(inst_values, inst_values.begin()), vec, idx); } -void DoCompositeInsertU32x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx) { - Common::CartesianInvoke(ImmValue::Insert, std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); +void DoCompositeInsertU32x2(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& val, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Insert, + std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); } -void DoCompositeInsertU32x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx) { - Common::CartesianInvoke(ImmValue::Insert, std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); +void DoCompositeInsertU32x3(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& val, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Insert, + std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); } -void DoCompositeInsertU32x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx) { - Common::CartesianInvoke(ImmValue::Insert, std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); +void DoCompositeInsertU32x4(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& val, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Insert, + std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); } -void DoCompositeShuffleU32x2(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1) { +void DoCompositeShuffleU32x2(ImmValueList& inst_values, const ImmValueList& vec0, + const ImmValueList& vec1, const ImmValueList& idx0, + const ImmValueList& idx1) { UNREACHABLE_MSG("Unimplemented"); } -void DoCompositeShuffleU32x3(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2) { +void DoCompositeShuffleU32x3(ImmValueList& inst_values, const ImmValueList& vec0, + const ImmValueList& vec1, const ImmValueList& idx0, + const ImmValueList& idx1, const ImmValueList& idx2) { UNREACHABLE_MSG("Unimplemented"); } -void DoCompositeShuffleU32x4(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2, const ImmValueList& idx3) { +void DoCompositeShuffleU32x4(ImmValueList& inst_values, const ImmValueList& vec0, + const ImmValueList& vec1, const ImmValueList& idx0, + const ImmValueList& idx1, const ImmValueList& idx2, + const ImmValueList& idx3) { UNREACHABLE_MSG("Unimplemented"); } -void DoCompositeConstructF16x2(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1) { +void DoCompositeConstructF16x2(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1) { CommonCompositeConstruct(inst_values, arg0, arg1); } -void DoCompositeConstructF16x3(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2) { +void DoCompositeConstructF16x3(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1, const ImmValueList& arg2) { CommonCompositeConstruct(inst_values, arg0, arg1, arg2); } -void DoCompositeConstructF16x4(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2, const ImmValueList& arg3) { +void DoCompositeConstructF16x4(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1, const ImmValueList& arg2, + const ImmValueList& arg3) { CommonCompositeConstruct(inst_values, arg0, arg1, arg2, arg3); } -void DoCompositeConstructF32x2x2(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1) { - Common::CartesianInvoke(ImmValue::CompositeFrom2x2, std::insert_iterator(inst_values, inst_values.begin()), arg0, arg1); +void DoCompositeConstructF32x2x2(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1) { + Common::CartesianInvoke(ImmValue::CompositeFrom2x2, + std::insert_iterator(inst_values, inst_values.begin()), arg0, arg1); } -void DoCompositeExtractF16x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx) { - Common::CartesianInvoke(ImmValue::Extract, std::insert_iterator(inst_values, inst_values.begin()), vec, idx); +void DoCompositeExtractF16x2(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Extract, + std::insert_iterator(inst_values, inst_values.begin()), vec, idx); } -void DoCompositeExtractF16x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx) { - Common::CartesianInvoke(ImmValue::Extract, std::insert_iterator(inst_values, inst_values.begin()), vec, idx); +void DoCompositeExtractF16x3(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Extract, + std::insert_iterator(inst_values, inst_values.begin()), vec, idx); } -void DoCompositeExtractF16x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx) { - Common::CartesianInvoke(ImmValue::Extract, std::insert_iterator(inst_values, inst_values.begin()), vec, idx); +void DoCompositeExtractF16x4(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Extract, + std::insert_iterator(inst_values, inst_values.begin()), vec, idx); } -void DoCompositeInsertF16x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx) { - Common::CartesianInvoke(ImmValue::Insert, std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); +void DoCompositeInsertF16x2(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& val, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Insert, + std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); } -void DoCompositeInsertF16x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx) { - Common::CartesianInvoke(ImmValue::Insert, std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); +void DoCompositeInsertF16x3(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& val, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Insert, + std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); } -void DoCompositeInsertF16x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx) { - Common::CartesianInvoke(ImmValue::Insert, std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); +void DoCompositeInsertF16x4(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& val, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Insert, + std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); } -void DoCompositeShuffleF16x2(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1) { +void DoCompositeShuffleF16x2(ImmValueList& inst_values, const ImmValueList& vec0, + const ImmValueList& vec1, const ImmValueList& idx0, + const ImmValueList& idx1) { UNREACHABLE_MSG("Unimplemented"); } -void DoCompositeShuffleF16x3(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2) { +void DoCompositeShuffleF16x3(ImmValueList& inst_values, const ImmValueList& vec0, + const ImmValueList& vec1, const ImmValueList& idx0, + const ImmValueList& idx1, const ImmValueList& idx2) { UNREACHABLE_MSG("Unimplemented"); } -void DoCompositeShuffleF16x4(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2, const ImmValueList& idx3) { +void DoCompositeShuffleF16x4(ImmValueList& inst_values, const ImmValueList& vec0, + const ImmValueList& vec1, const ImmValueList& idx0, + const ImmValueList& idx1, const ImmValueList& idx2, + const ImmValueList& idx3) { UNREACHABLE_MSG("Unimplemented"); } -void DoCompositeConstructF32x2(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1) { +void DoCompositeConstructF32x2(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1) { CommonCompositeConstruct(inst_values, arg0, arg1); } -void DoCompositeConstructF32x3(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2) { +void DoCompositeConstructF32x3(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1, const ImmValueList& arg2) { CommonCompositeConstruct(inst_values, arg0, arg1, arg2); } -void DoCompositeConstructF32x4(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2, const ImmValueList& arg3) { +void DoCompositeConstructF32x4(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1, const ImmValueList& arg2, + const ImmValueList& arg3) { CommonCompositeConstruct(inst_values, arg0, arg1, arg2, arg3); } -void DoCompositeExtractF32x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx) { - Common::CartesianInvoke(ImmValue::Extract, std::insert_iterator(inst_values, inst_values.begin()), vec, idx); +void DoCompositeExtractF32x2(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Extract, + std::insert_iterator(inst_values, inst_values.begin()), vec, idx); } -void DoCompositeExtractF32x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx) { - Common::CartesianInvoke(ImmValue::Extract, std::insert_iterator(inst_values, inst_values.begin()), vec, idx); +void DoCompositeExtractF32x3(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Extract, + std::insert_iterator(inst_values, inst_values.begin()), vec, idx); } -void DoCompositeExtractF32x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx) { - Common::CartesianInvoke(ImmValue::Extract, std::insert_iterator(inst_values, inst_values.begin()), vec, idx); +void DoCompositeExtractF32x4(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Extract, + std::insert_iterator(inst_values, inst_values.begin()), vec, idx); } -void DoCompositeInsertF32x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx) { - Common::CartesianInvoke(ImmValue::Insert, std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); +void DoCompositeInsertF32x2(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& val, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Insert, + std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); } -void DoCompositeInsertF32x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx) { - Common::CartesianInvoke(ImmValue::Insert, std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); +void DoCompositeInsertF32x3(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& val, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Insert, + std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); } -void DoCompositeInsertF32x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx) { - Common::CartesianInvoke(ImmValue::Insert, std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); +void DoCompositeInsertF32x4(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& val, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Insert, + std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); } -void DoCompositeShuffleF32x2(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1) { +void DoCompositeShuffleF32x2(ImmValueList& inst_values, const ImmValueList& vec0, + const ImmValueList& vec1, const ImmValueList& idx0, + const ImmValueList& idx1) { UNREACHABLE_MSG("Unimplemented"); } -void DoCompositeShuffleF32x3(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2) { +void DoCompositeShuffleF32x3(ImmValueList& inst_values, const ImmValueList& vec0, + const ImmValueList& vec1, const ImmValueList& idx0, + const ImmValueList& idx1, const ImmValueList& idx2) { UNREACHABLE_MSG("Unimplemented"); } -void DoCompositeShuffleF32x4(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2, const ImmValueList& idx3) { +void DoCompositeShuffleF32x4(ImmValueList& inst_values, const ImmValueList& vec0, + const ImmValueList& vec1, const ImmValueList& idx0, + const ImmValueList& idx1, const ImmValueList& idx2, + const ImmValueList& idx3) { UNREACHABLE_MSG("Unimplemented"); } -void DoCompositeConstructF64x2(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1) { +void DoCompositeConstructF64x2(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1) { CommonCompositeConstruct(inst_values, arg0, arg1); } -void DoCompositeConstructF64x3(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2) { +void DoCompositeConstructF64x3(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1, const ImmValueList& arg2) { CommonCompositeConstruct(inst_values, arg0, arg1, arg2); } -void DoCompositeConstructF64x4(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2, const ImmValueList& arg3) { +void DoCompositeConstructF64x4(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1, const ImmValueList& arg2, + const ImmValueList& arg3) { CommonCompositeConstruct(inst_values, arg0, arg1, arg2, arg3); } -void DoCompositeExtractF64x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx) { - Common::CartesianInvoke(ImmValue::Extract, std::insert_iterator(inst_values, inst_values.begin()), vec, idx); +void DoCompositeExtractF64x2(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Extract, + std::insert_iterator(inst_values, inst_values.begin()), vec, idx); } -void DoCompositeExtractF64x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx) { - Common::CartesianInvoke(ImmValue::Extract, std::insert_iterator(inst_values, inst_values.begin()), vec, idx); +void DoCompositeExtractF64x3(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Extract, + std::insert_iterator(inst_values, inst_values.begin()), vec, idx); } -void DoCompositeExtractF64x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx) { - Common::CartesianInvoke(ImmValue::Extract, std::insert_iterator(inst_values, inst_values.begin()), vec, idx); +void DoCompositeExtractF64x4(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Extract, + std::insert_iterator(inst_values, inst_values.begin()), vec, idx); } -void DoCompositeInsertF64x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx) { - Common::CartesianInvoke(ImmValue::Insert, std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); +void DoCompositeInsertF64x2(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& val, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Insert, + std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); } -void DoCompositeInsertF64x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx) { - Common::CartesianInvoke(ImmValue::Insert, std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); +void DoCompositeInsertF64x3(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& val, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Insert, + std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); } -void DoCompositeInsertF64x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx) { - Common::CartesianInvoke(ImmValue::Insert, std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); +void DoCompositeInsertF64x4(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& val, const ImmValueList& idx) { + Common::CartesianInvoke(ImmValue::Insert, + std::insert_iterator(inst_values, inst_values.begin()), vec, val, idx); } -void DoCompositeShuffleF64x2(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1) { +void DoCompositeShuffleF64x2(ImmValueList& inst_values, const ImmValueList& vec0, + const ImmValueList& vec1, const ImmValueList& idx0, + const ImmValueList& idx1) { UNREACHABLE_MSG("Unimplemented"); } -void DoCompositeShuffleF64x3(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2) { +void DoCompositeShuffleF64x3(ImmValueList& inst_values, const ImmValueList& vec0, + const ImmValueList& vec1, const ImmValueList& idx0, + const ImmValueList& idx1, const ImmValueList& idx2) { UNREACHABLE_MSG("Unimplemented"); } -void DoCompositeShuffleF64x4(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2, const ImmValueList& idx3) { +void DoCompositeShuffleF64x4(ImmValueList& inst_values, const ImmValueList& vec0, + const ImmValueList& vec1, const ImmValueList& idx0, + const ImmValueList& idx1, const ImmValueList& idx2, + const ImmValueList& idx3) { UNREACHABLE_MSG("Unimplemented"); } diff --git a/src/shader_recompiler/ir/compute_value/do_composite.h b/src/shader_recompiler/ir/compute_value/do_composite.h index a55fd0fd8..b15f02141 100644 --- a/src/shader_recompiler/ir/compute_value/do_composite.h +++ b/src/shader_recompiler/ir/compute_value/do_composite.h @@ -7,58 +7,128 @@ namespace Shader::IR::ComputeValue { -void DoCompositeConstructU32x2(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1); -void DoCompositeConstructU32x3(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2); -void DoCompositeConstructU32x4(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2, const ImmValueList& arg3); -void DoCompositeConstructU32x2x2(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1); -void DoCompositeExtractU32x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx); -void DoCompositeExtractU32x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx); -void DoCompositeExtractU32x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx); -void DoCompositeInsertU32x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx); -void DoCompositeInsertU32x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx); -void DoCompositeInsertU32x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx); -void DoCompositeShuffleU32x2(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1); -void DoCompositeShuffleU32x3(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2); -void DoCompositeShuffleU32x4(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2, const ImmValueList& idx3); +void DoCompositeConstructU32x2(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1); +void DoCompositeConstructU32x3(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1, const ImmValueList& arg2); +void DoCompositeConstructU32x4(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1, const ImmValueList& arg2, + const ImmValueList& arg3); +void DoCompositeConstructU32x2x2(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1); +void DoCompositeExtractU32x2(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& idx); +void DoCompositeExtractU32x3(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& idx); +void DoCompositeExtractU32x4(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& idx); +void DoCompositeInsertU32x2(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& val, const ImmValueList& idx); +void DoCompositeInsertU32x3(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& val, const ImmValueList& idx); +void DoCompositeInsertU32x4(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& val, const ImmValueList& idx); +void DoCompositeShuffleU32x2(ImmValueList& inst_values, const ImmValueList& vec0, + const ImmValueList& vec1, const ImmValueList& idx0, + const ImmValueList& idx1); +void DoCompositeShuffleU32x3(ImmValueList& inst_values, const ImmValueList& vec0, + const ImmValueList& vec1, const ImmValueList& idx0, + const ImmValueList& idx1, const ImmValueList& idx2); +void DoCompositeShuffleU32x4(ImmValueList& inst_values, const ImmValueList& vec0, + const ImmValueList& vec1, const ImmValueList& idx0, + const ImmValueList& idx1, const ImmValueList& idx2, + const ImmValueList& idx3); -void DoCompositeConstructF16x2(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1); -void DoCompositeConstructF16x3(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2); -void DoCompositeConstructF16x4(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2, const ImmValueList& arg3); -void DoCompositeExtractF16x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx); -void DoCompositeExtractF16x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx); -void DoCompositeExtractF16x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx); -void DoCompositeInsertF16x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx); -void DoCompositeInsertF16x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx); -void DoCompositeInsertF16x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx); -void DoCompositeShuffleF16x2(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1); -void DoCompositeShuffleF16x3(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2); -void DoCompositeShuffleF16x4(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2, const ImmValueList& idx3); +void DoCompositeConstructF16x2(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1); +void DoCompositeConstructF16x3(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1, const ImmValueList& arg2); +void DoCompositeConstructF16x4(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1, const ImmValueList& arg2, + const ImmValueList& arg3); +void DoCompositeExtractF16x2(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& idx); +void DoCompositeExtractF16x3(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& idx); +void DoCompositeExtractF16x4(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& idx); +void DoCompositeInsertF16x2(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& val, const ImmValueList& idx); +void DoCompositeInsertF16x3(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& val, const ImmValueList& idx); +void DoCompositeInsertF16x4(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& val, const ImmValueList& idx); +void DoCompositeShuffleF16x2(ImmValueList& inst_values, const ImmValueList& vec0, + const ImmValueList& vec1, const ImmValueList& idx0, + const ImmValueList& idx1); +void DoCompositeShuffleF16x3(ImmValueList& inst_values, const ImmValueList& vec0, + const ImmValueList& vec1, const ImmValueList& idx0, + const ImmValueList& idx1, const ImmValueList& idx2); +void DoCompositeShuffleF16x4(ImmValueList& inst_values, const ImmValueList& vec0, + const ImmValueList& vec1, const ImmValueList& idx0, + const ImmValueList& idx1, const ImmValueList& idx2, + const ImmValueList& idx3); -void DoCompositeConstructF32x2(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1); -void DoCompositeConstructF32x3(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2); -void DoCompositeConstructF32x4(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2, const ImmValueList& arg3); -void DoCompositeConstructF32x2x2(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1); -void DoCompositeExtractF32x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx); -void DoCompositeExtractF32x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx); -void DoCompositeExtractF32x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx); -void DoCompositeInsertF32x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx); -void DoCompositeInsertF32x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx); -void DoCompositeInsertF32x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx); -void DoCompositeShuffleF32x2(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1); -void DoCompositeShuffleF32x3(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2); -void DoCompositeShuffleF32x4(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2, const ImmValueList& idx3); +void DoCompositeConstructF32x2(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1); +void DoCompositeConstructF32x3(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1, const ImmValueList& arg2); +void DoCompositeConstructF32x4(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1, const ImmValueList& arg2, + const ImmValueList& arg3); +void DoCompositeConstructF32x2x2(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1); +void DoCompositeExtractF32x2(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& idx); +void DoCompositeExtractF32x3(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& idx); +void DoCompositeExtractF32x4(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& idx); +void DoCompositeInsertF32x2(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& val, const ImmValueList& idx); +void DoCompositeInsertF32x3(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& val, const ImmValueList& idx); +void DoCompositeInsertF32x4(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& val, const ImmValueList& idx); +void DoCompositeShuffleF32x2(ImmValueList& inst_values, const ImmValueList& vec0, + const ImmValueList& vec1, const ImmValueList& idx0, + const ImmValueList& idx1); +void DoCompositeShuffleF32x3(ImmValueList& inst_values, const ImmValueList& vec0, + const ImmValueList& vec1, const ImmValueList& idx0, + const ImmValueList& idx1, const ImmValueList& idx2); +void DoCompositeShuffleF32x4(ImmValueList& inst_values, const ImmValueList& vec0, + const ImmValueList& vec1, const ImmValueList& idx0, + const ImmValueList& idx1, const ImmValueList& idx2, + const ImmValueList& idx3); -void DoCompositeConstructF64x2(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1); -void DoCompositeConstructF64x3(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2); -void DoCompositeConstructF64x4(ImmValueList& inst_values, const ImmValueList& arg0, const ImmValueList& arg1, const ImmValueList& arg2, const ImmValueList& arg3); -void DoCompositeExtractF64x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx); -void DoCompositeExtractF64x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx); -void DoCompositeExtractF64x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& idx); -void DoCompositeInsertF64x2(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx); -void DoCompositeInsertF64x3(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx); -void DoCompositeInsertF64x4(ImmValueList& inst_values, const ImmValueList& vec, const ImmValueList& val, const ImmValueList& idx); -void DoCompositeShuffleF64x2(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1); -void DoCompositeShuffleF64x3(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2); -void DoCompositeShuffleF64x4(ImmValueList& inst_values, const ImmValueList& vec0, const ImmValueList& vec1, const ImmValueList& idx0, const ImmValueList& idx1, const ImmValueList& idx2, const ImmValueList& idx3); +void DoCompositeConstructF64x2(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1); +void DoCompositeConstructF64x3(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1, const ImmValueList& arg2); +void DoCompositeConstructF64x4(ImmValueList& inst_values, const ImmValueList& arg0, + const ImmValueList& arg1, const ImmValueList& arg2, + const ImmValueList& arg3); +void DoCompositeExtractF64x2(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& idx); +void DoCompositeExtractF64x3(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& idx); +void DoCompositeExtractF64x4(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& idx); +void DoCompositeInsertF64x2(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& val, const ImmValueList& idx); +void DoCompositeInsertF64x3(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& val, const ImmValueList& idx); +void DoCompositeInsertF64x4(ImmValueList& inst_values, const ImmValueList& vec, + const ImmValueList& val, const ImmValueList& idx); +void DoCompositeShuffleF64x2(ImmValueList& inst_values, const ImmValueList& vec0, + const ImmValueList& vec1, const ImmValueList& idx0, + const ImmValueList& idx1); +void DoCompositeShuffleF64x3(ImmValueList& inst_values, const ImmValueList& vec0, + const ImmValueList& vec1, const ImmValueList& idx0, + const ImmValueList& idx1, const ImmValueList& idx2); +void DoCompositeShuffleF64x4(ImmValueList& inst_values, const ImmValueList& vec0, + const ImmValueList& vec1, const ImmValueList& idx0, + const ImmValueList& idx1, const ImmValueList& idx2, + const ImmValueList& idx3); } // namespace Shader::IR::ComputeValue diff --git a/src/shader_recompiler/ir/compute_value/do_nop_functions.h b/src/shader_recompiler/ir/compute_value/do_nop_functions.h index 69acced68..716478e00 100644 --- a/src/shader_recompiler/ir/compute_value/do_nop_functions.h +++ b/src/shader_recompiler/ir/compute_value/do_nop_functions.h @@ -3,7 +3,8 @@ namespace Shader::IR::ComputeValue { -#define NOP_FUNCTION(name) inline void Do##name(ImmValueList& inst_values) {} +#define NOP_FUNCTION(name) \ + inline void Do##name(ImmValueList& inst_values) {} NOP_FUNCTION(Phi) NOP_FUNCTION(Identity) From da65ae3bcb1c25fa846eb44efc1f0ad175452bf4 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Thu, 20 Mar 2025 12:48:36 +0100 Subject: [PATCH 24/49] Num executions --- CMakeLists.txt | 2 + .../ir/compute_value/compute.cpp | 7 +-- .../ir/compute_value/compute.h | 4 +- src/shader_recompiler/ir/num_executions.cpp | 61 +++++++++++++++++++ src/shader_recompiler/ir/num_executions.h | 16 +++++ 5 files changed, 84 insertions(+), 6 deletions(-) create mode 100644 src/shader_recompiler/ir/num_executions.cpp create mode 100644 src/shader_recompiler/ir/num_executions.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 26c03c301..e56ff15eb 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -875,6 +875,8 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/ir_emitter.cpp src/shader_recompiler/ir/ir_emitter.h src/shader_recompiler/ir/microinstruction.cpp + src/shader_recompiler/ir/num_executions.cpp + src/shader_recompiler/ir/num_executions.cpp src/shader_recompiler/ir/opcodes.cpp src/shader_recompiler/ir/opcodes.h src/shader_recompiler/ir/opcodes.inc diff --git a/src/shader_recompiler/ir/compute_value/compute.cpp b/src/shader_recompiler/ir/compute_value/compute.cpp index ad01ae799..f2cb9007b 100644 --- a/src/shader_recompiler/ir/compute_value/compute.cpp +++ b/src/shader_recompiler/ir/compute_value/compute.cpp @@ -23,7 +23,7 @@ static void Invoke(ImmValueList& inst_values, const std::array -static void Invoke(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& cache) { +static void Invoke(Inst* inst, ImmValueList& inst_values, Cache& cache) { using Traits = Common::FuncTraits; constexpr size_t num_args = Traits::NUM_ARGS - 1; ASSERT(inst->NumArgs() >= num_args); @@ -34,8 +34,7 @@ static void Invoke(Inst* inst, ImmValueList& inst_values, ComputeImmValuesCache& Invoke(inst_values, args, std::make_index_sequence{}); } -static void DoInstructionOperation(Inst* inst, ImmValueList& inst_values, - ComputeImmValuesCache& cache) { +static void DoInstructionOperation(Inst* inst, ImmValueList& inst_values, Cache& cache) { switch (inst->GetOpcode()) { #define OPCODE(name, result_type, ...) \ case Opcode::name: \ @@ -64,7 +63,7 @@ static bool IsSelectInst(Inst* inst) { } } -void Compute(const Value& value, ImmValueList& values, ComputeImmValuesCache& cache) { +void Compute(const Value& value, ImmValueList& values, Cache& cache) { Value resolved = value.Resolve(); if (ImmValue::IsSupportedValue(resolved)) { values.insert(ImmValue(resolved)); diff --git a/src/shader_recompiler/ir/compute_value/compute.h b/src/shader_recompiler/ir/compute_value/compute.h index b98b4ecae..57907c3c6 100644 --- a/src/shader_recompiler/ir/compute_value/compute.h +++ b/src/shader_recompiler/ir/compute_value/compute.h @@ -15,8 +15,8 @@ namespace Shader::IR::ComputeValue { using ImmValueList = std::unordered_set; -using ComputeImmValuesCache = boost::container::flat_map; +using Cache = boost::container::flat_map; -void Compute(const Value& value, ImmValueList& values, ComputeImmValuesCache& cache); +void Compute(const Value& value, ImmValueList& values, Cache& cache); } // namespace Shader::IR::ComputeValue \ No newline at end of file diff --git a/src/shader_recompiler/ir/num_executions.cpp b/src/shader_recompiler/ir/num_executions.cpp new file mode 100644 index 000000000..4c79135d7 --- /dev/null +++ b/src/shader_recompiler/ir/num_executions.cpp @@ -0,0 +1,61 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/cartesian_invoke.h" +#include "shader_recompiler/ir/basic_block.h" +#include "shader_recompiler/ir/compute_value/compute.h" +#include "shader_recompiler/ir/num_executions.h" + +namespace Shader::IR { + +static bool Is64BitCondition(const Inst* inst) { + switch (inst->GetOpcode()) { + case Opcode::SLessThan64: + case Opcode::ULessThan64: + case Opcode::IEqual64: + case Opcode::INotEqual64: + return true; + default: + return false; + } +} + +static u64 GetDistance32(const ComputeValue::ImmValue& a, const ComputeValue::ImmValue& b) { + return a.U32() < b.U32() ? b.U32() - a.U32() : a.U32() - b.U32(); +} + +static u64 GetDistance64(const ComputeValue::ImmValue& a, const ComputeValue::ImmValue& b) { + return a.U64() < b.U64() ? b.U64() - a.U64() : a.U64() - b.U64(); +} + +u64 GetNumExecutions(const Inst* inst) { + u64 num_executions = 1; + const auto* cond_data = &inst->GetParent()->CondData(); + while (cond_data->asl_node) { + if (cond_data->asl_node->type == AbstractSyntaxNode::Type::Loop) { + ComputeValue::ImmValueList cond_arg0, cond_arg1; + ComputeValue::Cache cache; + Block* cont_block = cond_data->asl_node->data.loop.continue_block; + Inst* cond_inst = cont_block->back().Arg(0).InstRecursive(); + ASSERT(cond_inst); + ComputeValue::Compute(cond_inst->Arg(0), cond_arg0, cache); + ComputeValue::Compute(cond_inst->Arg(1), cond_arg1, cache); + std::unordered_set distances; + if (Is64BitCondition(cond_inst)) { + Common::CartesianInvoke(GetDistance64, + std::insert_iterator(distances, distances.end()), cond_arg0, + cond_arg1); + } else { + Common::CartesianInvoke(GetDistance32, + std::insert_iterator(distances, distances.end()), cond_arg0, + cond_arg1); + } + num_executions *= + std::max(1, *std::max_element(distances.begin(), distances.end())); + } + cond_data = cond_data->parent; + } + return num_executions; +} + +} // namespace Shader::IR \ No newline at end of file diff --git a/src/shader_recompiler/ir/num_executions.h b/src/shader_recompiler/ir/num_executions.h new file mode 100644 index 000000000..68ade024d --- /dev/null +++ b/src/shader_recompiler/ir/num_executions.h @@ -0,0 +1,16 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/types.h" +#include "shader_recompiler/ir/type.h" + +// Get the number of times an instruction will be executed. +// 0 if it cannot be determined statically. + +namespace Shader::IR { + +u64 GetNumExecutions(const Inst* inst); + +} // namespace Shader::IR From 52650d2c555c6b560a854f50dde54e1410d7a536 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Tue, 8 Apr 2025 02:34:45 +0200 Subject: [PATCH 25/49] Finish IR --- .../backend/spirv/emit_spirv_instructions.h | 1 + .../backend/spirv/emit_spirv_special.cpp | 4 + src/shader_recompiler/ir/basic_block.cpp | 6 + src/shader_recompiler/ir/basic_block.h | 3 + .../ir/compute_value/do_nop_functions.h | 1 + src/shader_recompiler/ir/ir_emitter.cpp | 8 + src/shader_recompiler/ir/ir_emitter.h | 4 + src/shader_recompiler/ir/microinstruction.cpp | 1 + src/shader_recompiler/ir/opcodes.inc | 1 + .../passes/flatten_extended_userdata_pass.cpp | 201 +++++++++--------- src/shader_recompiler/ir/passes/ir_passes.h | 3 +- src/shader_recompiler/ir/program.cpp | 7 +- src/shader_recompiler/ir/program.h | 2 +- src/shader_recompiler/ir/srt_gvn_table.h | 14 -- src/shader_recompiler/ir/subprogram.cpp | 73 +++++-- src/shader_recompiler/ir/subprogram.h | 4 +- src/shader_recompiler/recompiler.cpp | 2 +- 17 files changed, 189 insertions(+), 146 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index fb37799f5..a8901d8f6 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -47,6 +47,7 @@ void EmitPrologue(EmitContext& ctx); void EmitEpilogue(EmitContext& ctx); void EmitDiscard(EmitContext& ctx); void EmitDiscardCond(EmitContext& ctx, Id condition); +void EmitStoreFlatbuf(EmitContext& ctx, const IR::Value& data, const IR::Value& offset); void EmitDebugPrint(EmitContext& ctx, IR::Inst* inst, Id arg0, Id arg1, Id arg2, Id arg3, Id arg4); void EmitBarrier(EmitContext& ctx); void EmitWorkgroupMemoryBarrier(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index fe7bd3356..f48c76395 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -102,6 +102,10 @@ void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { throw NotImplementedException("Geometry streams"); } +void EmitStoreFlatbuf(EmitContext& ctx, const IR::Value& data, const IR::Value& offset) { + UNREACHABLE_MSG("StoreFlatbuf not intended for SPIR-V"); +} + void EmitDebugPrint(EmitContext& ctx, IR::Inst* inst, Id fmt, Id arg0, Id arg1, Id arg2, Id arg3) { IR::DebugPrintFlags flags = inst->Flags(); std::array fmt_args = {arg0, arg1, arg2, arg3}; diff --git a/src/shader_recompiler/ir/basic_block.cpp b/src/shader_recompiler/ir/basic_block.cpp index a312eabde..6e9062254 100644 --- a/src/shader_recompiler/ir/basic_block.cpp +++ b/src/shader_recompiler/ir/basic_block.cpp @@ -23,6 +23,12 @@ Block::iterator Block::PrependNewInst(iterator insertion_point, const Inst& base return instructions.insert(insertion_point, *inst); } +Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, u32 flags) { + Inst* const inst{inst_pool->Create(op, flags)}; + inst->SetParent(this); + return instructions.insert(insertion_point, *inst); +} + Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list args, u32 flags) { Inst* const inst{inst_pool->Create(op, flags)}; diff --git a/src/shader_recompiler/ir/basic_block.h b/src/shader_recompiler/ir/basic_block.h index 865243835..3c74b1133 100644 --- a/src/shader_recompiler/ir/basic_block.h +++ b/src/shader_recompiler/ir/basic_block.h @@ -47,6 +47,9 @@ public: /// Prepends a copy of an instruction to this basic block before the insertion point. iterator PrependNewInst(iterator insertion_point, const Inst& base_inst); + /// Prepends a new instruction to this basic block before the insertion point (without args). + iterator PrependNewInst(iterator insertion_point, Opcode op, u32 flags); + /// Prepends a new instruction to this basic block before the insertion point. iterator PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list args = {}, u32 flags = 0); diff --git a/src/shader_recompiler/ir/compute_value/do_nop_functions.h b/src/shader_recompiler/ir/compute_value/do_nop_functions.h index 716478e00..8b88742a1 100644 --- a/src/shader_recompiler/ir/compute_value/do_nop_functions.h +++ b/src/shader_recompiler/ir/compute_value/do_nop_functions.h @@ -17,6 +17,7 @@ NOP_FUNCTION(Prologue) NOP_FUNCTION(Epilogue) NOP_FUNCTION(Discard) NOP_FUNCTION(DiscardCond) +NOP_FUNCTION(StoreFlatbuf) NOP_FUNCTION(DebugPrint) NOP_FUNCTION(ReadConst) diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index a171d32a2..77e12c30c 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -102,6 +102,10 @@ void IREmitter::Reference(const Value& value) { Inst(Opcode::Reference, value); } +Value IREmitter::Phi(IR::Type type) { + return Inst(Opcode::Phi, Flags(type)); +} + void IREmitter::PhiMove(IR::Inst& phi, const Value& value) { Inst(Opcode::PhiMove, Value{&phi}, value); } @@ -1970,6 +1974,10 @@ void IREmitter::ImageWrite(const Value& handle, const Value& coords, const U32& return Inst(Opcode::CubeFaceIndex, cube_coords); } +void IREmitter::StoreFlatbuf(const U32& data, const U32& offset) { + Inst(Opcode::StoreFlatbuf, data, offset); +} + // Debug print maps to SPIRV's NonSemantic DebugPrintf instruction // Renderdoc will hook in its own implementation of the SPIRV instruction // Renderdoc accepts format specifiers, e.g. %u, listed here: diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 48cc02725..b982f1f91 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -18,6 +18,8 @@ namespace Shader::IR { class IREmitter { public: explicit IREmitter(Block& block_) : block{&block_}, insertion_point{block->end()} {} + explicit IREmitter(Inst& inst) + : block{inst.GetParent()}, insertion_point{Block::InstructionList::s_iterator_to(inst)} {} explicit IREmitter(Block& block_, Block::iterator insertion_point_) : block{&block_}, insertion_point{insertion_point_} {} @@ -39,12 +41,14 @@ public: U1 ConditionRef(const U1& value); void Reference(const Value& value); + [[nodiscard]] Value Phi(IR::Type type); void PhiMove(IR::Inst& phi, const Value& value); void Prologue(); void Epilogue(); void Discard(); void Discard(const U1& cond); + void StoreFlatbuf(const U32& data, const U32& offset); void DebugPrint(const char* fmt, boost::container::small_vector args); void Barrier(); diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index 580156f5b..45b0f3de0 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -100,6 +100,7 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::ImageAtomicOr32: case Opcode::ImageAtomicXor32: case Opcode::ImageAtomicExchange32: + case Opcode::StoreFlatbuf: case Opcode::DebugPrint: case Opcode::EmitVertex: case Opcode::EmitPrimitive: diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 93d759b74..f30c1ee67 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -14,6 +14,7 @@ OPCODE(Prologue, Void, OPCODE(Epilogue, Void, ) OPCODE(Discard, Void, ) OPCODE(DiscardCond, Void, U1, ) +OPCODE(StoreFlatbuf, Void, U32, U32 ) OPCODE(DebugPrint, Void, StringLiteral, Opaque, Opaque, Opaque, Opaque, ) // Constant memory operations diff --git a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp index bbf3fe8fb..7aa8283eb 100644 --- a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp +++ b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp @@ -12,11 +12,15 @@ #include "common/path_util.h" #include "shader_recompiler/info.h" #include "shader_recompiler/ir/breadth_first_search.h" +#include "shader_recompiler/ir/ir_emitter.h" +#include "shader_recompiler/ir/num_executions.h" #include "shader_recompiler/ir/opcodes.h" +#include "shader_recompiler/ir/passes/ir_passes.h" #include "shader_recompiler/ir/passes/srt.h" #include "shader_recompiler/ir/program.h" #include "shader_recompiler/ir/reg.h" #include "shader_recompiler/ir/srt_gvn_table.h" +#include "shader_recompiler/ir/subprogram.h" #include "shader_recompiler/ir/value.h" #include "src/common/arch.h" #include "src/common/decoder.h" @@ -57,28 +61,23 @@ static void DumpSrtProgram(const Shader::Info& info, const u8* code, size_t code using namespace Shader; struct PassInfo { - // map offset to inst - using PtrUserList = boost::container::flat_map; + struct ReadConstData { + u32 offset_dw; + u32 count_dw; + IR::Inst* unique_inst; + IR::Inst* original_inst; + }; Optimization::SrtGvnTable gvn_table; - // keys are GetUserData or ReadConst instructions that are used as pointers - std::unordered_map pointer_uses; - // GetUserData instructions corresponding to sgpr_base of SRT roots - boost::container::small_flat_map srt_roots; - // pick a single inst for a given value number std::unordered_map vn_to_inst; + // map of all readconsts to their subprogram insts + boost::container::small_flat_map all_readconsts; + // subprogram insts mapped to their readconst data + boost::container::small_flat_map readconst_data; - // Bumped during codegen to assign offsets to readconsts - u32 dst_off_dw; - - PtrUserList* GetUsesAsPointer(IR::Inst* inst) { - auto it = pointer_uses.find(inst); - if (it != pointer_uses.end()) { - return &it->second; - } - return nullptr; - } + // Incremented during SRT program generation + u32 dst_off_dw = 0; // Return a single instruction that this instruction is identical to, according // to value number @@ -105,39 +104,79 @@ static inline void PopPtr(Xbyak::CodeGenerator& c) { c.pop(rdi); }; -static void VisitPointer(u32 off_dw, IR::Inst* subtree, PassInfo& pass_info, - Xbyak::CodeGenerator& c) { - PushPtr(c, off_dw); - PassInfo::PtrUserList* use_list = pass_info.GetUsesAsPointer(subtree); - ASSERT(use_list); - - // First copy all the src data from this tree level - // That way, all data that was contiguous in the guest SRT is also contiguous in the - // flattened buffer. - // TODO src and dst are contiguous. Optimize with wider loads/stores - // TODO if this subtree is dynamically indexed, don't compact it (keep it sparse) - for (auto [src_off_dw, use] : *use_list) { - c.mov(r10d, ptr[rdi + (src_off_dw << 2)]); - c.mov(ptr[rsi + (pass_info.dst_off_dw << 2)], r10d); - - use->SetFlags(pass_info.dst_off_dw); - pass_info.dst_off_dw++; +static IR::U32 WrapInstWithCounter(IR::Inst* inst, u32 inital_value, IR::Block* first_block) { + const IR::Block::ConditionalData* loop_data = &inst->GetParent()->CondData(); + while (loop_data != nullptr && + loop_data->asl_node->type != IR::AbstractSyntaxNode::Type::Loop) { + loop_data = loop_data->parent; } - - // Then visit any children used as pointers - for (const auto [src_off_dw, use] : *use_list) { - if (pass_info.GetUsesAsPointer(use)) { - VisitPointer(src_off_dw, use, pass_info, c); - } - } - - PopPtr(c); + ASSERT(loop_data != nullptr); + IR::Block* loop_body = loop_data->asl_node->data.loop.body; + // We are putting the Phi node in the loop header so that the counter is + // incremented each time the loop is executed. We point the Phi node to the + // first block so that the counter is not reset each time the loop is + // executed (nested loops) + IR::IREmitter ir_inst(*inst->GetParent(), ++IR::Block::InstructionList::s_iterator_to(*inst)); + IR::IREmitter ir_loop_header(*loop_body->ImmPredecessors().front()); + IR::Inst* phi = ir_loop_header.Phi(IR::Type::U32).Inst(); + IR::U32 inc = ir_inst.IAdd(IR::U32(phi), ir_inst.Imm32(1)); + phi->AddPhiOperand(first_block, ir_loop_header.Imm32(inital_value)); + phi->AddPhiOperand(inst->GetParent(), inc); + return IR::U32(phi); } -static void GenerateSrtProgram(Info& info, PassInfo& pass_info) { - Xbyak::CodeGenerator& c = g_srt_codegen; +static void GenerateSrtReadConsts(IR::Program& program, PassInfo& pass_info, Pools& pools) { + IR::SubProgram sub_gen(&program, pools); + for (auto& [inst, sub_inst] : pass_info.all_readconsts) { + sub_inst = sub_gen.AddInst(inst); + pass_info.readconst_data[sub_inst] = {0, 0, pass_info.DeduplicateInstruction(sub_inst), + inst}; + } + IR::Program sub_program = sub_gen.GetSubProgram(); + IR::Block* original_first_block = program.blocks.front(); + IR::Block* sub_first_block = sub_program.blocks.front(); + for (auto& [inst, data] : pass_info.readconst_data) { + if (inst != data.unique_inst) { + PassInfo::ReadConstData& unique_data = pass_info.readconst_data[data.unique_inst]; + data.offset_dw = unique_data.offset_dw; + // In this context, count_dw is always the same as unique_data.count_dw + // There are no duplicate instructions in different loops + data.count_dw = unique_data.count_dw; + } else { + u32 count = static_cast(IR::GetNumExecutions(inst)); + ASSERT_MSG(count > 0, "Dynamic loop range not supported yet"); + data.count_dw = count; + data.offset_dw = pass_info.dst_off_dw; + pass_info.dst_off_dw += count; + IR::U32 save_offset; + if (data.count_dw > 1) { + save_offset = WrapInstWithCounter(inst, data.offset_dw, sub_first_block); + } else { + IR::IREmitter ir(*inst); + save_offset = ir.Imm32(data.offset_dw); + } + IR::IREmitter ir(*inst->GetParent(), + ++IR::Block::InstructionList::s_iterator_to(*inst)); + ir.StoreFlatbuf(IR::U32(inst), save_offset); + } + if (data.count_dw > 1) { + IR::U32 counter = + WrapInstWithCounter(data.original_inst, data.offset_dw, original_first_block); + data.original_inst->SetArg(1, counter); + } else { + IR::IREmitter ir(*data.original_inst); + data.original_inst->SetArg(1, ir.Imm32(data.offset_dw)); + } + } + DeadCodeEliminationPass(sub_program); + IR::DumpProgram(sub_program, sub_program.info, "srt"); +} - if (info.srt_info.srt_reservations.empty() && pass_info.srt_roots.empty()) { +static void GenerateSrtProgram(IR::Program& program, PassInfo& pass_info, Pools& pools) { + Xbyak::CodeGenerator& c = g_srt_codegen; + Shader::Info& info = program.info; + + if (info.srt_info.srt_reservations.empty() && pass_info.all_readconsts.empty()) { return; } @@ -167,10 +206,12 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) { ASSERT(pass_info.dst_off_dw == info.srt_info.flattened_bufsize_dw); - for (const auto& [sgpr_base, root] : pass_info.srt_roots) { - VisitPointer(static_cast(sgpr_base), root, pass_info, c); + if (!pass_info.all_readconsts.empty()) { + GenerateSrtReadConsts(program, pass_info, pools); } + info.srt_info.flattened_bufsize_dw = pass_info.dst_off_dw; + c.ret(); c.ready(); @@ -178,75 +219,25 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) { size_t codesize = c.getCurr() - reinterpret_cast(info.srt_info.walker_func); DumpSrtProgram(info, reinterpret_cast(info.srt_info.walker_func), codesize); } - - info.srt_info.flattened_bufsize_dw = pass_info.dst_off_dw; } }; // namespace -void FlattenExtendedUserdataPass(IR::Program& program) { +void FlattenExtendedUserdataPass(IR::Program& program, Pools& pools) { Shader::Info& info = program.info; PassInfo pass_info; - // traverse at end and assign offsets to duplicate readconsts, using - // vn_to_inst as the source - boost::container::small_vector all_readconsts; - - for (auto r_it = program.post_order_blocks.rbegin(); r_it != program.post_order_blocks.rend(); - r_it++) { - IR::Block* block = *r_it; - for (IR::Inst& inst : *block) { + for (auto it = program.post_order_blocks.rbegin(); it != program.post_order_blocks.rend(); + ++it) { + IR::Block* block = *it; + for (auto& inst : block->Instructions()) { if (inst.GetOpcode() == IR::Opcode::ReadConst) { - if (!inst.Arg(1).IsImmediate()) { - LOG_WARNING(Render_Recompiler, "ReadConst has non-immediate offset"); - continue; - } - - all_readconsts.push_back(&inst); - if (pass_info.DeduplicateInstruction(&inst) != &inst) { - // This is a duplicate of a readconst we've already visited - continue; - } - - IR::Inst* ptr_composite = inst.Arg(0).InstRecursive(); - - const auto pred = [](IR::Inst* inst) -> std::optional { - if (inst->GetOpcode() == IR::Opcode::GetUserData || - inst->GetOpcode() == IR::Opcode::ReadConst) { - return inst; - } - return std::nullopt; - }; - auto base0 = IR::BreadthFirstSearch(ptr_composite->Arg(0), pred); - auto base1 = IR::BreadthFirstSearch(ptr_composite->Arg(1), pred); - ASSERT_MSG(base0 && base1, "ReadConst not from constant memory"); - - IR::Inst* ptr_lo = base0.value(); - ptr_lo = pass_info.DeduplicateInstruction(ptr_lo); - - auto ptr_uses_kv = - pass_info.pointer_uses.try_emplace(ptr_lo, PassInfo::PtrUserList{}); - PassInfo::PtrUserList& user_list = ptr_uses_kv.first->second; - - user_list[inst.Arg(1).U32()] = &inst; - - if (ptr_lo->GetOpcode() == IR::Opcode::GetUserData) { - IR::ScalarReg ud_reg = ptr_lo->Arg(0).ScalarReg(); - pass_info.srt_roots[ud_reg] = ptr_lo; - } + pass_info.all_readconsts[&inst] = nullptr; } } } - GenerateSrtProgram(info, pass_info); - - // Assign offsets to duplicate readconsts - for (IR::Inst* readconst : all_readconsts) { - ASSERT(pass_info.vn_to_inst.contains(pass_info.gvn_table.GetValueNumber(readconst))); - IR::Inst* original = pass_info.DeduplicateInstruction(readconst); - readconst->SetFlags(original->Flags()); - } - + GenerateSrtProgram(program, pass_info, pools); info.RefreshFlatBuf(); } diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h index 760dbb112..4b20f8c52 100644 --- a/src/shader_recompiler/ir/passes/ir_passes.h +++ b/src/shader_recompiler/ir/passes/ir_passes.h @@ -5,6 +5,7 @@ #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/program.h" +#include "shader_recompiler/pools.h" namespace Shader { struct Profile; @@ -16,7 +17,7 @@ void SsaRewritePass(IR::BlockList& program); void IdentityRemovalPass(IR::BlockList& program); void DeadCodeEliminationPass(IR::Program& program); void ConstantPropagationPass(IR::BlockList& program); -void FlattenExtendedUserdataPass(IR::Program& program); +void FlattenExtendedUserdataPass(IR::Program& program, Pools& pools); void ReadLaneEliminationPass(IR::Program& program); void ResourceTrackingPass(IR::Program& program); void CollectShaderInfoPass(IR::Program& program); diff --git a/src/shader_recompiler/ir/program.cpp b/src/shader_recompiler/ir/program.cpp index 4071c9ac9..f2f6e34fa 100644 --- a/src/shader_recompiler/ir/program.cpp +++ b/src/shader_recompiler/ir/program.cpp @@ -15,7 +15,7 @@ namespace Shader::IR { -void DumpProgram(const Program& program, const Info& info) { +void DumpProgram(const Program& program, const Info& info, const std::string& type) { using namespace Common::FS; if (!Config::dumpShaders()) { @@ -26,7 +26,8 @@ void DumpProgram(const Program& program, const Info& info) { if (!std::filesystem::exists(dump_dir)) { std::filesystem::create_directories(dump_dir); } - const auto ir_filename = fmt::format("{}_{:#018x}.irprogram.txt", info.stage, info.pgm_hash); + const auto ir_filename = + fmt::format("{}_{:#018x}.{}irprogram.txt", info.stage, info.pgm_hash, type); const auto ir_file = IOFile{dump_dir / ir_filename, FileAccessMode::Write, FileType::TextFile}; size_t index{0}; @@ -43,7 +44,7 @@ void DumpProgram(const Program& program, const Info& info) { ir_file.WriteString(s); } - const auto asl_filename = fmt::format("{}_{:#018x}.asl.txt", info.stage, info.pgm_hash); + const auto asl_filename = fmt::format("{}_{:#018x}.{}asl.txt", info.stage, info.pgm_hash, type); const auto asl_file = IOFile{dump_dir / asl_filename, FileAccessMode::Write, FileType::TextFile}; diff --git a/src/shader_recompiler/ir/program.h b/src/shader_recompiler/ir/program.h index 9ede71215..3ffd4dc96 100644 --- a/src/shader_recompiler/ir/program.h +++ b/src/shader_recompiler/ir/program.h @@ -21,6 +21,6 @@ struct Program { Info& info; }; -void DumpProgram(const Program& program, const Info& info); +void DumpProgram(const Program& program, const Info& info, const std::string& type = ""); } // namespace Shader::IR diff --git a/src/shader_recompiler/ir/srt_gvn_table.h b/src/shader_recompiler/ir/srt_gvn_table.h index 3baa1c7da..295a86814 100644 --- a/src/shader_recompiler/ir/srt_gvn_table.h +++ b/src/shader_recompiler/ir/srt_gvn_table.h @@ -51,20 +51,6 @@ private: u32 vn; switch (inst->GetOpcode()) { - case IR::Opcode::Phi: { - const auto pred = [](IR::Inst* inst) -> std::optional { - if (inst->GetOpcode() == IR::Opcode::GetUserData || - inst->GetOpcode() == IR::Opcode::CompositeConstructU32x2 || - inst->GetOpcode() == IR::Opcode::ReadConst) { - return inst; - } - return std::nullopt; - }; - IR::Inst* source = IR::BreadthFirstSearch(inst, pred).value(); - vn = GetValueNumber(source); - value_numbers[IR::Value(inst)] = vn; - break; - } case IR::Opcode::GetUserData: case IR::Opcode::CompositeConstructU32x2: case IR::Opcode::ReadConst: { diff --git a/src/shader_recompiler/ir/subprogram.cpp b/src/shader_recompiler/ir/subprogram.cpp index ac69ec61e..bb944e3ef 100644 --- a/src/shader_recompiler/ir/subprogram.cpp +++ b/src/shader_recompiler/ir/subprogram.cpp @@ -23,24 +23,7 @@ Block* SubProgram::AddBlock(Block* orig_block) { } Inst* SubProgram::AddInst(Inst* orig_inst) { - auto it = orig_inst_to_inst.find(orig_inst); - if (it != orig_inst_to_inst.end()) { - return it->second; - } - Block* block = AddBlock(orig_inst->GetParent()); - Inst inst(orig_inst->GetOpcode(), orig_inst->Flags()); - if (orig_inst->GetOpcode() == Opcode::Phi) { - AddPhi(orig_inst, &inst); - } else { - for (size_t i = 0; i < orig_inst->NumArgs(); ++i) { - SetArg(&inst, i, orig_inst->Arg(i)); - } - } - auto insertion_point = block->end(); - if (block->back().GetOpcode() == Opcode::ConditionRef) { - --insertion_point; - } - return &(*block->PrependNewInst(insertion_point, inst)); + return AddInst(orig_inst, std::nullopt); } Block* SubProgram::GetBlock(Block* orig_block) { @@ -64,6 +47,7 @@ Program SubProgram::GetSubProgram() { completed = true; Program sub_program(super_program->info); BuildBlockListAndASL(sub_program); + AddProlgueAndEpilogue(sub_program); sub_program.post_order_blocks = PostOrder(sub_program.syntax_list.front()); AddConditionalTreeFromASL(sub_program.syntax_list); for (Block* block : sub_program.blocks) { @@ -72,6 +56,47 @@ Program SubProgram::GetSubProgram() { return sub_program; } +void SubProgram::AddProlgueAndEpilogue(Program& sub_program) { + // We may need to handle this better. + Block* epilogue_block = pools.block_pool.Create(pools.inst_pool); + Block* front_block = sub_program.blocks.front(); + sub_program.blocks.back()->AddBranch(epilogue_block); + sub_program.blocks.push_back(epilogue_block); + sub_program.syntax_list.push_back(AbstractSyntaxNode{.data = {.block = epilogue_block}, + .type = AbstractSyntaxNode::Type::Block}); + sub_program.syntax_list.push_back(AbstractSyntaxNode{.type = AbstractSyntaxNode::Type::Return}); + epilogue_block->AppendNewInst(Opcode::Epilogue, {}); + front_block->PrependNewInst(front_block->begin(), Opcode::Prologue); + epilogue_block->SsaSeal(); +} + +Inst* SubProgram::AddInst(Inst* orig_inst, + std::optional insertion_point) { + auto it = orig_inst_to_inst.find(orig_inst); + if (it != orig_inst_to_inst.end()) { + return it->second; + } + Block* block = AddBlock(orig_inst->GetParent()); + if (!insertion_point) { + if (block->back().GetOpcode() == Opcode::ConditionRef) { + insertion_point = --block->end(); + } else { + insertion_point = block->end(); + } + } + Inst* inst = &( + *block->PrependNewInst(*insertion_point, orig_inst->GetOpcode(), orig_inst->Flags())); + orig_inst_to_inst[orig_inst] = inst; + if (orig_inst->GetOpcode() == Opcode::Phi) { + AddPhi(orig_inst, inst); + } else { + for (size_t i = 0; i < orig_inst->NumArgs(); ++i) { + SetArg(inst, orig_inst, i); + } + } + return inst; +} + void SubProgram::AddPhi(Inst* orig_phi, Inst* phi) { // Current IR only has Phis with 2 arguments. ASSERT(orig_phi->NumArgs() == 2); @@ -108,11 +133,18 @@ void SubProgram::AddPhi(Inst* orig_phi, Inst* phi) { } } -void SubProgram::SetArg(Inst* inst, size_t index, const Value& arg) { +void SubProgram::SetArg(Inst* inst, Inst* orig_inst, size_t index) { + const Value& arg = orig_inst->Arg(index); if (arg.IsImmediate()) { inst->SetArg(index, arg); } else { - inst->SetArg(index, Value(AddInst(arg.InstRecursive()))); + Inst* arg_inst = arg.InstRecursive(); + if (orig_inst->GetParent() == arg_inst->GetParent()) { + inst->SetArg(index, + Value(AddInst(arg_inst, Block::InstructionList::s_iterator_to(*inst)))); + } else { + inst->SetArg(index, Value(AddInst(arg_inst, std::nullopt))); + } } } @@ -216,6 +248,7 @@ void SubProgram::BuildBlockListAndASL(Program& sub_program) { break; } case AbstractSyntaxNode::Type::Unreachable: + case AbstractSyntaxNode::Type::Return: continue; default: break; diff --git a/src/shader_recompiler/ir/subprogram.h b/src/shader_recompiler/ir/subprogram.h index b14a31e3d..f2b61d411 100644 --- a/src/shader_recompiler/ir/subprogram.h +++ b/src/shader_recompiler/ir/subprogram.h @@ -27,12 +27,14 @@ struct SubProgram { Program GetSubProgram(); private: + Inst* AddInst(Inst* orig_inst, std::optional insertion_point); void AddPhi(Inst* orig_phi, Inst* phi); - void SetArg(Inst* inst, size_t index, const Value& arg); + void SetArg(Inst* inst, Inst* orig_inst, size_t index); void AddPhiOperand(Inst* phi, Block* block, const Value& arg); void BuildBlockListAndASL(Program& sub_program); + void AddProlgueAndEpilogue(Program& sub_program); bool completed = false; Program* super_program; diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index b02ec706c..a28b508b9 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -75,7 +75,7 @@ IR::Program TranslateProgram(std::span code, Pools& pools, Info& info } Shader::Optimization::RingAccessElimination(program, runtime_info); Shader::Optimization::ReadLaneEliminationPass(program); - Shader::Optimization::FlattenExtendedUserdataPass(program); + Shader::Optimization::FlattenExtendedUserdataPass(program, pools); Shader::Optimization::ResourceTrackingPass(program); Shader::Optimization::LowerBufferFormatToRaw(program); Shader::Optimization::SharedMemoryToStoragePass(program, runtime_info, profile); From 67dd111dcc4991971376715bc10b480f54c4a6c7 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Mon, 24 Mar 2025 00:14:44 +0100 Subject: [PATCH 26/49] Fix TrackSharp --- .../ir/passes/resource_tracking_pass.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index c5bfe5796..0f6b1a150 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -244,16 +244,20 @@ SharpLocation TrackSharp(const IR::Inst* inst, const Shader::Info& info) { } return std::nullopt; }; + // We are not accounting for modifications to after the source. const auto result = IR::BreadthFirstSearch(inst, pred); ASSERT_MSG(result, "Unable to track sharp source"); inst = result.value(); if (inst->GetOpcode() == IR::Opcode::GetUserData) { return static_cast(inst->Arg(0).ScalarReg()); - } else { - ASSERT_MSG(inst->GetOpcode() == IR::Opcode::ReadConst, - "Sharp load not from constant memory"); - return inst->Flags(); + } else if (inst->GetOpcode() == IR::Opcode::ReadConst) { + // Sharp is stored in the offset argument. + // The vale is not inmediate if ReadConst is inside of a loop + // and the offset is different in each iteration. (we don't support this) + ASSERT(inst->Arg(1).IsImmediate()); + return inst->Arg(1).U32(); } + UNREACHABLE_MSG("Sharp load not from constant memory or user data"); } s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors, From 1f8ef4e1f651b5506251185bc07fea0a85e72163 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Wed, 26 Mar 2025 16:54:41 +0100 Subject: [PATCH 27/49] Fix GCC build --- src/shader_recompiler/ir/ir_emitter.h | 2 +- src/shader_recompiler/ir/passes/resource_tracking_pass.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index b982f1f91..14f06eef8 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -18,7 +18,7 @@ namespace Shader::IR { class IREmitter { public: explicit IREmitter(Block& block_) : block{&block_}, insertion_point{block->end()} {} - explicit IREmitter(Inst& inst) + explicit IREmitter(IR::Inst& inst) : block{inst.GetParent()}, insertion_point{Block::InstructionList::s_iterator_to(inst)} {} explicit IREmitter(Block& block_, Block::iterator insertion_point_) : block{&block_}, insertion_point{insertion_point_} {} diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 0f6b1a150..b9640fafc 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -253,7 +253,7 @@ SharpLocation TrackSharp(const IR::Inst* inst, const Shader::Info& info) { } else if (inst->GetOpcode() == IR::Opcode::ReadConst) { // Sharp is stored in the offset argument. // The vale is not inmediate if ReadConst is inside of a loop - // and the offset is different in each iteration. (we don't support this) + // and the base or offset is different in each iteration. (we don't support this) ASSERT(inst->Arg(1).IsImmediate()); return inst->Arg(1).U32(); } From faf479dcd5baa61e133cbcf94142ab3a6f00a8c0 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Wed, 26 Mar 2025 19:23:10 +0100 Subject: [PATCH 28/49] Adjust SPIR-V EmitReadConst --- .../backend/spirv/emit_spirv_context_get_set.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index e4071bb95..541222163 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -163,13 +163,11 @@ void EmitGetGotoVariable(EmitContext&) { using BufferAlias = EmitContext::BufferAlias; Id EmitReadConst(EmitContext& ctx, IR::Inst* inst) { - const u32 flatbuf_off_dw = inst->Flags(); const auto& srt_flatbuf = ctx.buffers.back(); - ASSERT(srt_flatbuf.binding >= 0 && flatbuf_off_dw > 0 && - srt_flatbuf.buffer_type == BufferType::ReadConstUbo); + ASSERT(srt_flatbuf.binding >= 0 && srt_flatbuf.buffer_type == BufferType::ReadConstUbo); const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32]; const Id ptr{ - ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(flatbuf_off_dw))}; + ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.Def(inst->Arg(1)))}; return ctx.OpLoad(ctx.U32[1], ptr); } From b6e940665201c2c35f46ec0763f310a38387b624 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Thu, 27 Mar 2025 01:03:52 +0100 Subject: [PATCH 29/49] Fixed ImmValue compute --- src/common/cartesian_invoke.h | 2 +- .../ir/compute_value/compute.cpp | 8 +- .../ir/compute_value/compute.h | 4 +- .../ir/compute_value/do_float_operations.cpp | 78 +++++++++---------- .../compute_value/do_integer_operations.cpp | 62 +++++++-------- .../compute_value/do_logical_operations.cpp | 8 +- .../ir/compute_value/imm_value.cpp | 4 + src/shader_recompiler/ir/num_executions.cpp | 7 +- 8 files changed, 89 insertions(+), 84 deletions(-) diff --git a/src/common/cartesian_invoke.h b/src/common/cartesian_invoke.h index 7a4162592..daaa10ce7 100644 --- a/src/common/cartesian_invoke.h +++ b/src/common/cartesian_invoke.h @@ -16,7 +16,7 @@ void CartesianInvokeImpl(Func func, OutputIt out_it, auto get_tuple = [&](std::index_sequence) { return std::forward_as_tuple(*std::get(arglists_its)...); }; - *out_it++ = std::move(std::apply(func, get_tuple(std::make_index_sequence{}))); + out_it = std::move(std::apply(func, get_tuple(std::make_index_sequence{}))); return; } else { const auto& arglist = std::get(arglists_tuple); diff --git a/src/shader_recompiler/ir/compute_value/compute.cpp b/src/shader_recompiler/ir/compute_value/compute.cpp index f2cb9007b..68bb48b87 100644 --- a/src/shader_recompiler/ir/compute_value/compute.cpp +++ b/src/shader_recompiler/ir/compute_value/compute.cpp @@ -43,9 +43,8 @@ static void DoInstructionOperation(Inst* inst, ImmValueList& inst_values, Cache& #include "shader_recompiler/ir/opcodes.inc" #undef OPCODE default: - break; + UNREACHABLE_MSG("Invalid opcode: {}", inst->GetOpcode()); } - UNREACHABLE_MSG("Invalid opcode: {}", inst->GetOpcode()); } static bool IsSelectInst(Inst* inst) { @@ -69,7 +68,7 @@ void Compute(const Value& value, ImmValueList& values, Cache& cache) { values.insert(ImmValue(resolved)); return; } - if (resolved.Type() != Type::Opaque) { + if (resolved.IsImmediate()) { return; } Inst* inst = resolved.InstRecursive(); @@ -83,8 +82,7 @@ void Compute(const Value& value, ImmValueList& values, Cache& cache) { for (size_t i = 0; i < inst->NumArgs(); ++i) { Compute(inst->Arg(i), inst_values, cache); } - } - if (IsSelectInst(inst)) { + } else if (IsSelectInst(inst)) { Compute(inst->Arg(1), inst_values, cache); Compute(inst->Arg(2), inst_values, cache); } else { diff --git a/src/shader_recompiler/ir/compute_value/compute.h b/src/shader_recompiler/ir/compute_value/compute.h index 57907c3c6..f2946590a 100644 --- a/src/shader_recompiler/ir/compute_value/compute.h +++ b/src/shader_recompiler/ir/compute_value/compute.h @@ -3,8 +3,8 @@ #pragma once +#include #include -#include #include "shader_recompiler/ir/compute_value/imm_value.h" #include "shader_recompiler/ir/value.h" @@ -15,7 +15,7 @@ namespace Shader::IR::ComputeValue { using ImmValueList = std::unordered_set; -using Cache = boost::container::flat_map; +using Cache = std::unordered_map; void Compute(const Value& value, ImmValueList& values, Cache& cache); diff --git a/src/shader_recompiler/ir/compute_value/do_float_operations.cpp b/src/shader_recompiler/ir/compute_value/do_float_operations.cpp index 9868c2333..88f756e20 100644 --- a/src/shader_recompiler/ir/compute_value/do_float_operations.cpp +++ b/src/shader_recompiler/ir/compute_value/do_float_operations.cpp @@ -8,40 +8,40 @@ namespace Shader::IR::ComputeValue { void DoFPAbs32(ImmValueList& inst_values, const ImmValueList& args) { Common::CartesianInvoke(ImmValue::Abs, - std::insert_iterator(inst_values, inst_values.end()), args); + std::insert_iterator(inst_values, inst_values.begin()), args); } void DoFPAbs64(ImmValueList& inst_values, const ImmValueList& args) { Common::CartesianInvoke(ImmValue::Abs, - std::insert_iterator(inst_values, inst_values.end()), args); + std::insert_iterator(inst_values, inst_values.begin()), args); } void DoFPAdd32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { Common::CartesianInvoke(ImmValue::Add, - std::insert_iterator(inst_values, inst_values.end()), args0, args1); + std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } void DoFPAdd64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { Common::CartesianInvoke(ImmValue::Add, - std::insert_iterator(inst_values, inst_values.end()), args0, args1); + std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } void DoFPSub32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { Common::CartesianInvoke(ImmValue::Sub, - std::insert_iterator(inst_values, inst_values.end()), args0, args1); + std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } void DoFPFma32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, const ImmValueList& args2) { Common::CartesianInvoke(ImmValue::Fma, - std::insert_iterator(inst_values, inst_values.end()), args0, args1, + std::insert_iterator(inst_values, inst_values.begin()), args0, args1, args2); } void DoFPFma64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, const ImmValueList& args2) { Common::CartesianInvoke(ImmValue::Fma, - std::insert_iterator(inst_values, inst_values.end()), args0, args1, + std::insert_iterator(inst_values, inst_values.begin()), args0, args1, args2); } @@ -56,13 +56,13 @@ void DoFPMax32(ImmValueList& inst_values, const ImmValueList& args0, const ImmVa } return ImmValue::Max(a, b); }; - Common::CartesianInvoke(op, std::insert_iterator(inst_values, inst_values.end()), args0, args1, + Common::CartesianInvoke(op, std::insert_iterator(inst_values, inst_values.begin()), args0, args1, args_legacy); } void DoFPMax64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { Common::CartesianInvoke(ImmValue::Max, - std::insert_iterator(inst_values, inst_values.end()), args0, args1); + std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } void DoFPMin32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, @@ -76,93 +76,93 @@ void DoFPMin32(ImmValueList& inst_values, const ImmValueList& args0, const ImmVa } return ImmValue::Min(a, b); }; - Common::CartesianInvoke(op, std::insert_iterator(inst_values, inst_values.end()), args0, args1, + Common::CartesianInvoke(op, std::insert_iterator(inst_values, inst_values.begin()), args0, args1, args_legacy); } void DoFPMin64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { Common::CartesianInvoke(ImmValue::Min, - std::insert_iterator(inst_values, inst_values.end()), args0, args1); + std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } void DoFPMul32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { Common::CartesianInvoke(ImmValue::Mul, - std::insert_iterator(inst_values, inst_values.end()), args0, args1); + std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } void DoFPMul64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { Common::CartesianInvoke(ImmValue::Mul, - std::insert_iterator(inst_values, inst_values.end()), args0, args1); + std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } void DoFPDiv32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { Common::CartesianInvoke(ImmValue::Div, - std::insert_iterator(inst_values, inst_values.end()), args0, args1); + std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } void DoFPDiv64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { Common::CartesianInvoke(ImmValue::Div, - std::insert_iterator(inst_values, inst_values.end()), args0, args1); + std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } void DoFPNeg32(ImmValueList& inst_values, const ImmValueList& args) { Common::CartesianInvoke(ImmValue::Neg, - std::insert_iterator(inst_values, inst_values.end()), args); + std::insert_iterator(inst_values, inst_values.begin()), args); } void DoFPNeg64(ImmValueList& inst_values, const ImmValueList& args) { Common::CartesianInvoke(ImmValue::Neg, - std::insert_iterator(inst_values, inst_values.end()), args); + std::insert_iterator(inst_values, inst_values.begin()), args); } void DoFPRecip32(ImmValueList& inst_values, const ImmValueList& args) { Common::CartesianInvoke(ImmValue::Recip, - std::insert_iterator(inst_values, inst_values.end()), args); + std::insert_iterator(inst_values, inst_values.begin()), args); } void DoFPRecip64(ImmValueList& inst_values, const ImmValueList& args) { Common::CartesianInvoke(ImmValue::Recip, - std::insert_iterator(inst_values, inst_values.end()), args); + std::insert_iterator(inst_values, inst_values.begin()), args); } void DoFPRecipSqrt32(ImmValueList& inst_values, const ImmValueList& args) { Common::CartesianInvoke(ImmValue::Rsqrt, - std::insert_iterator(inst_values, inst_values.end()), args); + std::insert_iterator(inst_values, inst_values.begin()), args); } void DoFPRecipSqrt64(ImmValueList& inst_values, const ImmValueList& args) { Common::CartesianInvoke(ImmValue::Rsqrt, - std::insert_iterator(inst_values, inst_values.end()), args); + std::insert_iterator(inst_values, inst_values.begin()), args); } void DoFPSqrt(ImmValueList& inst_values, const ImmValueList& args) { Common::CartesianInvoke(ImmValue::Sqrt, - std::insert_iterator(inst_values, inst_values.end()), args); + std::insert_iterator(inst_values, inst_values.begin()), args); } void DoFPSin(ImmValueList& inst_values, const ImmValueList& args) { Common::CartesianInvoke(ImmValue::Sin, - std::insert_iterator(inst_values, inst_values.end()), args); + std::insert_iterator(inst_values, inst_values.begin()), args); } void DoFPExp2(ImmValueList& inst_values, const ImmValueList& args) { Common::CartesianInvoke(ImmValue::Exp2, - std::insert_iterator(inst_values, inst_values.end()), args); + std::insert_iterator(inst_values, inst_values.begin()), args); } void DoFPLdexp(ImmValueList& inst_values, const ImmValueList& args, const ImmValueList& exponents) { Common::CartesianInvoke(ImmValue::Ldexp, - std::insert_iterator(inst_values, inst_values.end()), args, exponents); + std::insert_iterator(inst_values, inst_values.begin()), args, exponents); } void DoFPCos(ImmValueList& inst_values, const ImmValueList& args) { Common::CartesianInvoke(ImmValue::Cos, - std::insert_iterator(inst_values, inst_values.end()), args); + std::insert_iterator(inst_values, inst_values.begin()), args); } void DoFPLog2(ImmValueList& inst_values, const ImmValueList& args) { Common::CartesianInvoke(ImmValue::Log2, - std::insert_iterator(inst_values, inst_values.end()), args); + std::insert_iterator(inst_values, inst_values.begin()), args); } void DoFPSaturate32(ImmValueList& inst_values, const ImmValueList& args) { @@ -176,63 +176,63 @@ void DoFPSaturate64(ImmValueList& inst_values, const ImmValueList& args) { void DoFPClamp32(ImmValueList& inst_values, const ImmValueList& args, const ImmValueList& mins, const ImmValueList& maxs) { Common::CartesianInvoke(ImmValue::Clamp, - std::insert_iterator(inst_values, inst_values.end()), args, mins, maxs); + std::insert_iterator(inst_values, inst_values.begin()), args, mins, maxs); } void DoFPClamp64(ImmValueList& inst_values, const ImmValueList& args, const ImmValueList& mins, const ImmValueList& maxs) { Common::CartesianInvoke(ImmValue::Clamp, - std::insert_iterator(inst_values, inst_values.end()), args, mins, maxs); + std::insert_iterator(inst_values, inst_values.begin()), args, mins, maxs); } void DoFPRoundEven32(ImmValueList& inst_values, const ImmValueList& args) { Common::CartesianInvoke(ImmValue::Round, - std::insert_iterator(inst_values, inst_values.end()), args); + std::insert_iterator(inst_values, inst_values.begin()), args); } void DoFPRoundEven64(ImmValueList& inst_values, const ImmValueList& args) { Common::CartesianInvoke(ImmValue::Round, - std::insert_iterator(inst_values, inst_values.end()), args); + std::insert_iterator(inst_values, inst_values.begin()), args); } void DoFPFloor32(ImmValueList& inst_values, const ImmValueList& args) { Common::CartesianInvoke(ImmValue::Floor, - std::insert_iterator(inst_values, inst_values.end()), args); + std::insert_iterator(inst_values, inst_values.begin()), args); } void DoFPFloor64(ImmValueList& inst_values, const ImmValueList& args) { Common::CartesianInvoke(ImmValue::Floor, - std::insert_iterator(inst_values, inst_values.end()), args); + std::insert_iterator(inst_values, inst_values.begin()), args); } void DoFPCeil32(ImmValueList& inst_values, const ImmValueList& args) { Common::CartesianInvoke(ImmValue::Ceil, - std::insert_iterator(inst_values, inst_values.end()), args); + std::insert_iterator(inst_values, inst_values.begin()), args); } void DoFPCeil64(ImmValueList& inst_values, const ImmValueList& args) { Common::CartesianInvoke(ImmValue::Ceil, - std::insert_iterator(inst_values, inst_values.end()), args); + std::insert_iterator(inst_values, inst_values.begin()), args); } void DoFPTrunc32(ImmValueList& inst_values, const ImmValueList& args) { Common::CartesianInvoke(ImmValue::Trunc, - std::insert_iterator(inst_values, inst_values.end()), args); + std::insert_iterator(inst_values, inst_values.begin()), args); } void DoFPTrunc64(ImmValueList& inst_values, const ImmValueList& args) { Common::CartesianInvoke(ImmValue::Trunc, - std::insert_iterator(inst_values, inst_values.end()), args); + std::insert_iterator(inst_values, inst_values.begin()), args); } void DoFPFract32(ImmValueList& inst_values, const ImmValueList& args) { Common::CartesianInvoke(ImmValue::Fract, - std::insert_iterator(inst_values, inst_values.end()), args); + std::insert_iterator(inst_values, inst_values.begin()), args); } void DoFPFract64(ImmValueList& inst_values, const ImmValueList& args) { Common::CartesianInvoke(ImmValue::Fract, - std::insert_iterator(inst_values, inst_values.end()), args); + std::insert_iterator(inst_values, inst_values.begin()), args); } void DoFPFrexpSig32(ImmValueList& inst_values, const ImmValueList& args) { diff --git a/src/shader_recompiler/ir/compute_value/do_integer_operations.cpp b/src/shader_recompiler/ir/compute_value/do_integer_operations.cpp index bcc101bde..76204ad40 100644 --- a/src/shader_recompiler/ir/compute_value/do_integer_operations.cpp +++ b/src/shader_recompiler/ir/compute_value/do_integer_operations.cpp @@ -8,12 +8,12 @@ namespace Shader::IR::ComputeValue { void DoIAdd32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { Common::CartesianInvoke(ImmValue::Add, - std::insert_iterator(inst_values, inst_values.end()), args0, args1); + std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } void DoIAdd64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { Common::CartesianInvoke(ImmValue::Add, - std::insert_iterator(inst_values, inst_values.end()), args0, args1); + std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } void DoIAddCary32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { @@ -22,22 +22,22 @@ void DoIAddCary32(ImmValueList& inst_values, const ImmValueList& args0, const Im void DoISub32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { Common::CartesianInvoke(ImmValue::Sub, - std::insert_iterator(inst_values, inst_values.end()), args0, args1); + std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } void DoISub64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { Common::CartesianInvoke(ImmValue::Sub, - std::insert_iterator(inst_values, inst_values.end()), args0, args1); + std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } void DoIMul32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { Common::CartesianInvoke(ImmValue::Mul, - std::insert_iterator(inst_values, inst_values.end()), args0, args1); + std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } void DoIMul64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { Common::CartesianInvoke(ImmValue::Mul, - std::insert_iterator(inst_values, inst_values.end()), args0, args1); + std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } void DoSMulExt(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { @@ -50,103 +50,103 @@ void DoUMulExt(ImmValueList& inst_values, const ImmValueList& args0, const ImmVa void DoSDiv32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { Common::CartesianInvoke(ImmValue::Div, - std::insert_iterator(inst_values, inst_values.end()), args0, args1); + std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } void DoUDiv32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { Common::CartesianInvoke(ImmValue::Div, - std::insert_iterator(inst_values, inst_values.end()), args0, args1); + std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } void DoSMod32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { Common::CartesianInvoke(ImmValue::Mod, - std::insert_iterator(inst_values, inst_values.end()), args0, args1); + std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } void DoUMod32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { Common::CartesianInvoke(ImmValue::Mod, - std::insert_iterator(inst_values, inst_values.end()), args0, args1); + std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } void DoINeg32(ImmValueList& inst_values, const ImmValueList& args) { Common::CartesianInvoke(ImmValue::Neg, - std::insert_iterator(inst_values, inst_values.end()), args); + std::insert_iterator(inst_values, inst_values.begin()), args); } void DoINeg64(ImmValueList& inst_values, const ImmValueList& args) { Common::CartesianInvoke(ImmValue::Neg, - std::insert_iterator(inst_values, inst_values.end()), args); + std::insert_iterator(inst_values, inst_values.begin()), args); } void DoIAbs32(ImmValueList& inst_values, const ImmValueList& args) { Common::CartesianInvoke(ImmValue::Abs, - std::insert_iterator(inst_values, inst_values.end()), args); + std::insert_iterator(inst_values, inst_values.begin()), args); } void DoShiftLeftLogical32(ImmValueList& inst_values, const ImmValueList& args, const ImmValueList& shift) { Common::CartesianInvoke(ImmValue::LShift, - std::insert_iterator(inst_values, inst_values.end()), args, shift); + std::insert_iterator(inst_values, inst_values.begin()), args, shift); } void DoShiftLeftLogical64(ImmValueList& inst_values, const ImmValueList& args, const ImmValueList& shift) { Common::CartesianInvoke(ImmValue::LShift, - std::insert_iterator(inst_values, inst_values.end()), args, shift); + std::insert_iterator(inst_values, inst_values.begin()), args, shift); } void DoShiftRightLogical32(ImmValueList& inst_values, const ImmValueList& args, const ImmValueList& shift) { Common::CartesianInvoke(ImmValue::RShift, - std::insert_iterator(inst_values, inst_values.end()), args, shift); + std::insert_iterator(inst_values, inst_values.begin()), args, shift); } void DoShiftRightLogical64(ImmValueList& inst_values, const ImmValueList& args, const ImmValueList& shift) { Common::CartesianInvoke(ImmValue::RShift, - std::insert_iterator(inst_values, inst_values.end()), args, shift); + std::insert_iterator(inst_values, inst_values.begin()), args, shift); } void DoShiftRightArithmetic32(ImmValueList& inst_values, const ImmValueList& args, const ImmValueList& shift) { Common::CartesianInvoke(ImmValue::RShift, - std::insert_iterator(inst_values, inst_values.end()), args, shift); + std::insert_iterator(inst_values, inst_values.begin()), args, shift); } void DoShiftRightArithmetic64(ImmValueList& inst_values, const ImmValueList& args, const ImmValueList& shift) { Common::CartesianInvoke(ImmValue::RShift, - std::insert_iterator(inst_values, inst_values.end()), args, shift); + std::insert_iterator(inst_values, inst_values.begin()), args, shift); } void DoBitwiseAnd32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { Common::CartesianInvoke(ImmValue::And, - std::insert_iterator(inst_values, inst_values.end()), args0, args1); + std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } void DoBitwiseAnd64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { Common::CartesianInvoke(ImmValue::And, - std::insert_iterator(inst_values, inst_values.end()), args0, args1); + std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } void DoBitwiseOr32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { Common::CartesianInvoke(ImmValue::Or, - std::insert_iterator(inst_values, inst_values.end()), args0, args1); + std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } void DoBitwiseOr64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { Common::CartesianInvoke(ImmValue::Or, - std::insert_iterator(inst_values, inst_values.end()), args0, args1); + std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } void DoBitwiseXor32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { Common::CartesianInvoke(ImmValue::Xor, - std::insert_iterator(inst_values, inst_values.end()), args0, args1); + std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } void DoBitFieldInsert(ImmValueList& inst_values, const ImmValueList& arg, @@ -179,7 +179,7 @@ void DoBitCount64(ImmValueList& inst_values, const ImmValueList& arg) { void DoBitwiseNot32(ImmValueList& inst_values, const ImmValueList& arg) { Common::CartesianInvoke(ImmValue::Not, - std::insert_iterator(inst_values, inst_values.end()), arg); + std::insert_iterator(inst_values, inst_values.begin()), arg); } void DoFindSMsb32(ImmValueList& inst_values, const ImmValueList& arg) { @@ -200,34 +200,34 @@ void DoFindILsb64(ImmValueList& inst_values, const ImmValueList& arg) { void DoSMin32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { Common::CartesianInvoke(ImmValue::Min, - std::insert_iterator(inst_values, inst_values.end()), args0, args1); + std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } void DoUMin32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { Common::CartesianInvoke(ImmValue::Min, - std::insert_iterator(inst_values, inst_values.end()), args0, args1); + std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } void DoSMax32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { Common::CartesianInvoke(ImmValue::Max, - std::insert_iterator(inst_values, inst_values.end()), args0, args1); + std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } void DoUMax32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { Common::CartesianInvoke(ImmValue::Max, - std::insert_iterator(inst_values, inst_values.end()), args0, args1); + std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } void DoSClamp32(ImmValueList& inst_values, const ImmValueList& value, const ImmValueList& min, const ImmValueList& max) { Common::CartesianInvoke(ImmValue::Clamp, - std::insert_iterator(inst_values, inst_values.end()), value, min, max); + std::insert_iterator(inst_values, inst_values.begin()), value, min, max); } void DoUClamp32(ImmValueList& inst_values, const ImmValueList& value, const ImmValueList& min, const ImmValueList& max) { Common::CartesianInvoke(ImmValue::Clamp, - std::insert_iterator(inst_values, inst_values.end()), value, min, max); + std::insert_iterator(inst_values, inst_values.begin()), value, min, max); } } // namespace Shader::IR::ComputeValue \ No newline at end of file diff --git a/src/shader_recompiler/ir/compute_value/do_logical_operations.cpp b/src/shader_recompiler/ir/compute_value/do_logical_operations.cpp index 8b494aafa..38a44fa9a 100644 --- a/src/shader_recompiler/ir/compute_value/do_logical_operations.cpp +++ b/src/shader_recompiler/ir/compute_value/do_logical_operations.cpp @@ -8,22 +8,22 @@ namespace Shader::IR::ComputeValue { void DoLogicalOr(ImmValueList& inst_values, const ImmValueList& arg1, const ImmValueList& arg2) { Common::CartesianInvoke(ImmValue::Or, - std::insert_iterator(inst_values, inst_values.end()), arg1, arg2); + std::insert_iterator(inst_values, inst_values.begin()), arg1, arg2); } void DoLogicalAnd(ImmValueList& inst_values, const ImmValueList& arg1, const ImmValueList& arg2) { Common::CartesianInvoke(ImmValue::And, - std::insert_iterator(inst_values, inst_values.end()), arg1, arg2); + std::insert_iterator(inst_values, inst_values.begin()), arg1, arg2); } void DoLogicalXor(ImmValueList& inst_values, const ImmValueList& arg1, const ImmValueList& arg2) { Common::CartesianInvoke(ImmValue::Xor, - std::insert_iterator(inst_values, inst_values.end()), arg1, arg2); + std::insert_iterator(inst_values, inst_values.begin()), arg1, arg2); } void DoLogicalNot(ImmValueList& inst_values, const ImmValueList& arg1) { Common::CartesianInvoke(ImmValue::Not, - std::insert_iterator(inst_values, inst_values.end()), arg1); + std::insert_iterator(inst_values, inst_values.begin()), arg1); } } // namespace Shader::IR::ComputeValue \ No newline at end of file diff --git a/src/shader_recompiler/ir/compute_value/imm_value.cpp b/src/shader_recompiler/ir/compute_value/imm_value.cpp index d92aed43c..c9ebf1519 100644 --- a/src/shader_recompiler/ir/compute_value/imm_value.cpp +++ b/src/shader_recompiler/ir/compute_value/imm_value.cpp @@ -7,6 +7,7 @@ namespace Shader::IR::ComputeValue { ImmValue::ImmValue(const IR::Value& value) noexcept { + ASSERT(value.IsImmediate()); switch (value.Type()) { case Type::U1: imm_values[0].imm_u1 = value.U1(); @@ -1223,6 +1224,9 @@ bool ImmValue::IsNan(const ImmValue& in) noexcept { } bool ImmValue::IsSupportedValue(const IR::Value& value) noexcept { + if (!value.IsImmediate()) { + return false; + } switch (value.Type()) { case IR::Type::U1: case IR::Type::U8: diff --git a/src/shader_recompiler/ir/num_executions.cpp b/src/shader_recompiler/ir/num_executions.cpp index 4c79135d7..0a63b9d77 100644 --- a/src/shader_recompiler/ir/num_executions.cpp +++ b/src/shader_recompiler/ir/num_executions.cpp @@ -50,8 +50,11 @@ u64 GetNumExecutions(const Inst* inst) { std::insert_iterator(distances, distances.end()), cond_arg0, cond_arg1); } - num_executions *= - std::max(1, *std::max_element(distances.begin(), distances.end())); + if (!distances.empty()) { + // We assume that the iterator changes by 1 each loop iteration. + num_executions *= + std::max(1, *std::max_element(distances.begin(), distances.end())) + 1; + } } cond_data = cond_data->parent; } From 057df5d3d19d1ec6231d52d9d8665a8b06292fa3 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Thu, 27 Mar 2025 01:11:00 +0100 Subject: [PATCH 30/49] clang-format --- .../spirv/emit_spirv_context_get_set.cpp | 3 +-- .../ir/compute_value/do_float_operations.cpp | 17 ++++++++++------- .../ir/compute_value/do_integer_operations.cpp | 6 ++++-- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 541222163..721148e85 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -166,8 +166,7 @@ Id EmitReadConst(EmitContext& ctx, IR::Inst* inst) { const auto& srt_flatbuf = ctx.buffers.back(); ASSERT(srt_flatbuf.binding >= 0 && srt_flatbuf.buffer_type == BufferType::ReadConstUbo); const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32]; - const Id ptr{ - ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.Def(inst->Arg(1)))}; + const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.Def(inst->Arg(1)))}; return ctx.OpLoad(ctx.U32[1], ptr); } diff --git a/src/shader_recompiler/ir/compute_value/do_float_operations.cpp b/src/shader_recompiler/ir/compute_value/do_float_operations.cpp index 88f756e20..dd4175eac 100644 --- a/src/shader_recompiler/ir/compute_value/do_float_operations.cpp +++ b/src/shader_recompiler/ir/compute_value/do_float_operations.cpp @@ -56,8 +56,8 @@ void DoFPMax32(ImmValueList& inst_values, const ImmValueList& args0, const ImmVa } return ImmValue::Max(a, b); }; - Common::CartesianInvoke(op, std::insert_iterator(inst_values, inst_values.begin()), args0, args1, - args_legacy); + Common::CartesianInvoke(op, std::insert_iterator(inst_values, inst_values.begin()), args0, + args1, args_legacy); } void DoFPMax64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { @@ -76,8 +76,8 @@ void DoFPMin32(ImmValueList& inst_values, const ImmValueList& args0, const ImmVa } return ImmValue::Min(a, b); }; - Common::CartesianInvoke(op, std::insert_iterator(inst_values, inst_values.begin()), args0, args1, - args_legacy); + Common::CartesianInvoke(op, std::insert_iterator(inst_values, inst_values.begin()), args0, + args1, args_legacy); } void DoFPMin64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { @@ -152,7 +152,8 @@ void DoFPExp2(ImmValueList& inst_values, const ImmValueList& args) { void DoFPLdexp(ImmValueList& inst_values, const ImmValueList& args, const ImmValueList& exponents) { Common::CartesianInvoke(ImmValue::Ldexp, - std::insert_iterator(inst_values, inst_values.begin()), args, exponents); + std::insert_iterator(inst_values, inst_values.begin()), args, + exponents); } void DoFPCos(ImmValueList& inst_values, const ImmValueList& args) { @@ -176,13 +177,15 @@ void DoFPSaturate64(ImmValueList& inst_values, const ImmValueList& args) { void DoFPClamp32(ImmValueList& inst_values, const ImmValueList& args, const ImmValueList& mins, const ImmValueList& maxs) { Common::CartesianInvoke(ImmValue::Clamp, - std::insert_iterator(inst_values, inst_values.begin()), args, mins, maxs); + std::insert_iterator(inst_values, inst_values.begin()), args, mins, + maxs); } void DoFPClamp64(ImmValueList& inst_values, const ImmValueList& args, const ImmValueList& mins, const ImmValueList& maxs) { Common::CartesianInvoke(ImmValue::Clamp, - std::insert_iterator(inst_values, inst_values.begin()), args, mins, maxs); + std::insert_iterator(inst_values, inst_values.begin()), args, mins, + maxs); } void DoFPRoundEven32(ImmValueList& inst_values, const ImmValueList& args) { diff --git a/src/shader_recompiler/ir/compute_value/do_integer_operations.cpp b/src/shader_recompiler/ir/compute_value/do_integer_operations.cpp index 76204ad40..4e5f29e73 100644 --- a/src/shader_recompiler/ir/compute_value/do_integer_operations.cpp +++ b/src/shader_recompiler/ir/compute_value/do_integer_operations.cpp @@ -221,13 +221,15 @@ void DoUMax32(ImmValueList& inst_values, const ImmValueList& args0, const ImmVal void DoSClamp32(ImmValueList& inst_values, const ImmValueList& value, const ImmValueList& min, const ImmValueList& max) { Common::CartesianInvoke(ImmValue::Clamp, - std::insert_iterator(inst_values, inst_values.begin()), value, min, max); + std::insert_iterator(inst_values, inst_values.begin()), value, min, + max); } void DoUClamp32(ImmValueList& inst_values, const ImmValueList& value, const ImmValueList& min, const ImmValueList& max) { Common::CartesianInvoke(ImmValue::Clamp, - std::insert_iterator(inst_values, inst_values.begin()), value, min, max); + std::insert_iterator(inst_values, inst_values.begin()), value, min, + max); } } // namespace Shader::IR::ComputeValue \ No newline at end of file From fff2383c5e3111d73baefdd5e74335df503c52a8 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Thu, 27 Mar 2025 11:54:58 +0100 Subject: [PATCH 31/49] Flattening pass adjustments --- .../backend/spirv/emit_spirv_context_get_set.cpp | 1 + .../ir/passes/flatten_extended_userdata_pass.cpp | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 721148e85..5b4f3c3c5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -164,6 +164,7 @@ using BufferAlias = EmitContext::BufferAlias; Id EmitReadConst(EmitContext& ctx, IR::Inst* inst) { const auto& srt_flatbuf = ctx.buffers.back(); + ASSERT_MSG(inst->Flags() == 1, "ReadConst was not processed by the flattening pass"); ASSERT(srt_flatbuf.binding >= 0 && srt_flatbuf.buffer_type == BufferType::ReadConstUbo); const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32]; const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.Def(inst->Arg(1)))}; diff --git a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp index 7aa8283eb..10e651e1c 100644 --- a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp +++ b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp @@ -159,14 +159,17 @@ static void GenerateSrtReadConsts(IR::Program& program, PassInfo& pass_info, Poo ++IR::Block::InstructionList::s_iterator_to(*inst)); ir.StoreFlatbuf(IR::U32(inst), save_offset); } + data.original_inst->SetFlags(1); + IR::IREmitter ir(*data.original_inst); + data.original_inst->SetArg(0, ir.Imm32(0)); if (data.count_dw > 1) { IR::U32 counter = WrapInstWithCounter(data.original_inst, data.offset_dw, original_first_block); data.original_inst->SetArg(1, counter); } else { - IR::IREmitter ir(*data.original_inst); data.original_inst->SetArg(1, ir.Imm32(data.offset_dw)); } + } DeadCodeEliminationPass(sub_program); IR::DumpProgram(sub_program, sub_program.info, "srt"); From 7545ae33b1803ade2770febab57c9b42f6ab5df5 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Thu, 27 Mar 2025 19:10:21 +0100 Subject: [PATCH 32/49] x64 backend base --- CMakeLists.txt | 6 ++++++ .../backend/asm_x64/emit_x64.cpp | 12 ++++++++++++ .../backend/asm_x64/emit_x64.h | 15 +++++++++++++++ .../passes/flatten_extended_userdata_pass.cpp | 17 +++++++++++++---- 4 files changed, 46 insertions(+), 4 deletions(-) create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64.cpp create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64.h diff --git a/CMakeLists.txt b/CMakeLists.txt index e56ff15eb..f7617d050 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -897,6 +897,12 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/value.h ) +if (ARCHITECTURE STREQUAL "x86_64") + set(SHADER_RECOMPILER ${SHADER_RECOMPILER} + src/shader_recompiler/backend/asm_x64/emit_x64.cpp + src/shader_recompiler/backend/asm_x64/emit_x64.h) +endif() + set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/amdgpu/liverpool.h src/video_core/amdgpu/pixel_format.cpp diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64.cpp new file mode 100644 index 000000000..d1a95bd0a --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64.cpp @@ -0,0 +1,12 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/asm_x64/emit_x64.h" + +namespace Shader::Backend::X64 { + +void EmitX64(const IR::Program& program, Xbyak::CodeGenerator& c) { + +} + +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64.h b/src/shader_recompiler/backend/asm_x64/emit_x64.h new file mode 100644 index 000000000..36197f9fb --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64.h @@ -0,0 +1,15 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include "shader_recompiler/ir/program.h" + +namespace Shader::Backend::X64 { + +void EmitX64(const IR::Program& program, Xbyak::CodeGenerator& c); + +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp index 10e651e1c..a2b9a0056 100644 --- a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp +++ b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp @@ -4,13 +4,15 @@ #include #include -#include -#include +#include "common/arch.h" #include "common/config.h" #include "common/io_file.h" #include "common/logging/log.h" #include "common/path_util.h" #include "shader_recompiler/info.h" +#ifdef ARCH_X86_64 +#include "shader_recompiler/backend/asm_x64/emit_x64.h" +#endif #include "shader_recompiler/ir/breadth_first_search.h" #include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/num_executions.h" @@ -125,7 +127,7 @@ static IR::U32 WrapInstWithCounter(IR::Inst* inst, u32 inital_value, IR::Block* return IR::U32(phi); } -static void GenerateSrtReadConsts(IR::Program& program, PassInfo& pass_info, Pools& pools) { +static IR::Program GenerateSrtReadConstsSubProgram(IR::Program& program, PassInfo& pass_info, Pools& pools) { IR::SubProgram sub_gen(&program, pools); for (auto& [inst, sub_inst] : pass_info.all_readconsts) { sub_inst = sub_gen.AddInst(inst); @@ -173,9 +175,11 @@ static void GenerateSrtReadConsts(IR::Program& program, PassInfo& pass_info, Poo } DeadCodeEliminationPass(sub_program); IR::DumpProgram(sub_program, sub_program.info, "srt"); + return sub_program; } static void GenerateSrtProgram(IR::Program& program, PassInfo& pass_info, Pools& pools) { +#ifdef ARCH_X86_64 Xbyak::CodeGenerator& c = g_srt_codegen; Shader::Info& info = program.info; @@ -210,7 +214,7 @@ static void GenerateSrtProgram(IR::Program& program, PassInfo& pass_info, Pools& ASSERT(pass_info.dst_off_dw == info.srt_info.flattened_bufsize_dw); if (!pass_info.all_readconsts.empty()) { - GenerateSrtReadConsts(program, pass_info, pools); + GenerateSrtReadConstsSubProgram(program, pass_info, pools); } info.srt_info.flattened_bufsize_dw = pass_info.dst_off_dw; @@ -222,6 +226,11 @@ static void GenerateSrtProgram(IR::Program& program, PassInfo& pass_info, Pools& size_t codesize = c.getCurr() - reinterpret_cast(info.srt_info.walker_func); DumpSrtProgram(info, reinterpret_cast(info.srt_info.walker_func), codesize); } +#elif + if (info.srt_info.srt_reservations.empty() && pass_info.all_readconsts.empty()) { + UNREACHABLE_MSG("SRT program generation only supported on x86_64"); + } +#endif } }; // namespace From 4224a95583b8bf5db7f01d1b73040270ae967f79 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Tue, 1 Apr 2025 10:12:23 +0200 Subject: [PATCH 33/49] x64 backend core and context --- CMakeLists.txt | 8 +- .../backend/asm_x64/emit_x64.cpp | 153 +++++++- .../backend/asm_x64/emit_x64_condition.cpp | 6 + .../backend/asm_x64/emit_x64_condition.h | 8 + .../backend/asm_x64/x64_emit_context.cpp | 353 ++++++++++++++++++ .../backend/asm_x64/x64_emit_context.h | 113 ++++++ .../backend/asm_x64/x64_utils.cpp | 285 ++++++++++++++ .../backend/asm_x64/x64_utils.h | 23 ++ 8 files changed, 947 insertions(+), 2 deletions(-) create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_condition.cpp create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_condition.h create mode 100644 src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp create mode 100644 src/shader_recompiler/backend/asm_x64/x64_emit_context.h create mode 100644 src/shader_recompiler/backend/asm_x64/x64_utils.cpp create mode 100644 src/shader_recompiler/backend/asm_x64/x64_utils.h diff --git a/CMakeLists.txt b/CMakeLists.txt index f7617d050..a550a7a88 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -899,8 +899,14 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h if (ARCHITECTURE STREQUAL "x86_64") set(SHADER_RECOMPILER ${SHADER_RECOMPILER} + src/shader_recompiler/backend/asm_x64/emit_x64_condition.cpp + src/shader_recompiler/backend/asm_x64/emit_x64_condition.h src/shader_recompiler/backend/asm_x64/emit_x64.cpp - src/shader_recompiler/backend/asm_x64/emit_x64.h) + src/shader_recompiler/backend/asm_x64/emit_x64.h + src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp + src/shader_recompiler/backend/asm_x64/x64_emit_context.h + src/shader_recompiler/backend/asm_x64/x64_utils.cpp + src/shader_recompiler/backend/asm_x64/x64_utils.h) endif() set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64.cpp index d1a95bd0a..d7d284cb8 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64.cpp @@ -1,12 +1,163 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "common/func_traits.h" #include "shader_recompiler/backend/asm_x64/emit_x64.h" +#include "shader_recompiler/backend/asm_x64/emit_x64_condition.h" +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" +#include "shader_recompiler/backend/asm_x64/x64_utils.h" namespace Shader::Backend::X64 { +using namespace Xbyak; +using namespace Xbyak::util; + +static void TestCondition(EmitContext& ctx, const IR::Inst* ref) { + IR::Value cond = ref->Arg(0); + Operand& op = ctx.Def(cond)[0]; + Reg8 tmp = op.isREG() ? op.getReg().cvt8() : ctx.TempGPReg(false).cvt8(); + if (!op.isREG()) { + ctx.Code().mov(tmp, op); + } + ctx.Code().test(tmp, tmp); +} + +template +ArgType Arg(EmitContext& ctx, const IR::Value& arg) { + if constexpr (std::is_same_v) { + return ctx.Def(arg); + } else if constexpr (std::is_same_v) { + return arg; + } else if constexpr (std::is_same_v) { + return arg.U32(); + } else if constexpr (std::is_same_v) { + return arg.U64(); + } else if constexpr (std::is_same_v) { + return arg.U1(); + } else if constexpr (std::is_same_v) { + return arg.Attribute(); + } else if constexpr (std::is_same_v) { + return arg.ScalarReg(); + } else if constexpr (std::is_same_v) { + return arg.VectorReg(); + } else if constexpr (std::is_same_v) { + return arg.StringLiteral(); + } else if constexpr (std::is_same_v) { + return arg.Patch(); + } + UNREACHABLE(); +} + +template +static void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence) { + using Traits = Common::FuncTraits; + if constexpr (has_dest) { + if constexpr (is_first_arg_inst) { + func(ctx, inst, ctx.Def(inst), + Arg>(ctx, inst->Arg(I))...); + } else { + func(ctx, ctx.Def(inst), + Arg>(ctx, inst->Arg(I))...); + } + } else { + if constexpr (is_first_arg_inst) { + func(ctx, inst, Arg>(ctx, inst->Arg(I))...); + } else { + func(ctx, Arg>(ctx, inst->Arg(I))...); + } + } +} + +template +static void Invoke(EmitContext& ctx, IR::Inst* inst) { + using Traits = Common::FuncTraits; + static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); + if constexpr (Traits::NUM_ARGS == 1) { + Invoke(ctx, inst, std::make_index_sequence<0>{}); + } else { + using FirstArgType = typename Traits::template ArgType<1>; + static constexpr bool is_first_arg_inst = std::is_same_v; + static constexpr size_t num_inst_args = Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1); + if constexpr (num_inst_args > 0 && has_dest) { + Invoke(ctx, inst, + std::make_index_sequence{}); + } else { + Invoke(ctx, inst, + std::make_index_sequence{}); + } + } +} + +static void EmitInst(EmitContext& ctx, IR::Inst* inst) { + switch (inst->GetOpcode()) { +#define OPCODE(name, result_type, ...) \ + case IR::Opcode::name: \ + Invoke<&Emit##name, IR::Type::result_type != IR::Type::Void>(ctx, inst); +#include "shader_recompiler/ir/opcodes.inc" +#undef OPCODE + } + UNREACHABLE_MSG("Invalid opcode {}", inst->GetOpcode()); +} + +static void Traverse(EmitContext& ctx, const IR::Program& program) { + CodeGenerator& c = ctx.Code(); + for (const IR::AbstractSyntaxNode& node : program.syntax_list) { + ctx.ResetTempRegs(); + switch (node.type) { + case IR::AbstractSyntaxNode::Type::Block: { + IR::Block* block = node.data.block; + c.L(ctx.BlockLabel(block)); + for (IR::Inst& inst : *block) { + } + const auto& phi_assignments = ctx.PhiAssignments(block); + if (phi_assignments) { + for (const auto& [phi, value] : phi_assignments->get()) { + MovValue(ctx, ctx.Def(phi), value); + } + } + break; + } + case IR::AbstractSyntaxNode::Type::If: { + IR::Inst* ref = node.data.if_node.cond.InstRecursive(); + Label& merge = ctx.BlockLabel(node.data.if_node.merge); + TestCondition(ctx, ref); + c.jz(merge); + break; + } + case IR::AbstractSyntaxNode::Type::Repeat: { + IR::Inst* ref = node.data.repeat.cond.InstRecursive(); + Label& loop_header = ctx.BlockLabel(node.data.repeat.loop_header); + TestCondition(ctx, ref); + c.jnz(loop_header); + break; + } + case IR::AbstractSyntaxNode::Type::Break: { + IR::Inst* ref = node.data.break_node.cond.InstRecursive(); + Label& merge = ctx.BlockLabel(node.data.break_node.merge); + TestCondition(ctx, ref); + c.jz(merge); + break; + } + case IR::AbstractSyntaxNode::Type::Return: { + c.jmp(ctx.EndLabel()); + break; + } + case IR::AbstractSyntaxNode::Type::Unreachable: { + c.int3(); + break; + } + case IR::AbstractSyntaxNode::Type::Loop: + case IR::AbstractSyntaxNode::Type::EndIf: + break; + } + } +} + void EmitX64(const IR::Program& program, Xbyak::CodeGenerator& c) { - + EmitContext context(program, c); + Traverse(context, program); + context.Code().L(context.EndLabel()); + context.Epilogue(); } } // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_condition.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_condition.cpp new file mode 100644 index 000000000..046454b6f --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_condition.cpp @@ -0,0 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/asm_x64/emit_x64_condition.h" + +namespace Shader::Backend::X64 {} \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_condition.h b/src/shader_recompiler/backend/asm_x64/emit_x64_condition.h new file mode 100644 index 000000000..16d6093ea --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_condition.h @@ -0,0 +1,8 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" + +namespace Shader::Backend::X64 {} \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp b/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp new file mode 100644 index 000000000..1c5d5c103 --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp @@ -0,0 +1,353 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" +#include "shader_recompiler/backend/asm_x64/x64_utils.h" + +using namespace Xbyak; +using namespace Xbyak::util; + +namespace Shader::Backend::X64 { + +EmitContext::EmitContext(const IR::Program& program_, Xbyak::CodeGenerator& code_) + : program(program_), code(code_) { + for (IR::Block* block : program.blocks) { + block_labels[block] = {}; + } + AllocateRegisters(); +} + +Reg64& EmitContext::TempGPReg(bool reserve) { + ASSERT(temp_gp_reg_index < temp_gp_regs.size()); + u64 idx = temp_gp_reg_index; + if (reserve) { + temp_gp_reg_index++; + } + Reg64& reg = temp_gp_regs[idx]; + if (idx > num_scratch_gp_regs && + std::ranges::find(preserved_regs, reg) == preserved_regs.end()) { + preserved_regs.push_back(reg); + code.push(reg); + } + return reg; +} + +Xmm& EmitContext::TempXmmReg(bool reserve) { + ASSERT(temp_xmm_reg_index < temp_xmm_regs.size()); + u64 idx = temp_xmm_reg_index; + if (reserve) { + temp_xmm_reg_index++; + } + Xmm& reg = temp_xmm_regs[idx]; + if (idx > num_scratch_xmm_regs && + std::ranges::find(preserved_regs, reg) == preserved_regs.end()) { + preserved_regs.push_back(reg); + code.sub(rsp, 16); + code.movdqu(ptr[rsp], reg); + } + return reg; +} + +Operands EmitContext::Def(IR::Inst* inst) { + return inst_to_operands.at(inst); +} + +Operands EmitContext::Def(const IR::Value& value) { + if (!value.IsImmediate()) { + return Def(value.InstRecursive()); + } + Operands operands; + Reg64& tmp = TempGPReg(false); + switch (value.Type()) { + case IR::Type::U1: + operands.push_back(TempGPReg().cvt8()); + code.mov(operands.back(), value.U1()); + break; + case IR::Type::U8: + operands.push_back(TempGPReg().cvt8()); + code.mov(operands.back(), value.U8()); + break; + case IR::Type::U16: + operands.push_back(TempGPReg().cvt16()); + code.mov(operands.back(), value.U16()); + break; + case IR::Type::U32: + operands.push_back(TempGPReg().cvt32()); + code.mov(operands.back(), value.U32()); + break; + case IR::Type::F32: { + code.mov(tmp.cvt32(), std::bit_cast(value.F32())); + Xmm& xmm32 = TempXmmReg(); + code.movd(xmm32, tmp.cvt32()); + operands.push_back(xmm32); + break; + } + case IR::Type::U64: + operands.push_back(TempGPReg()); + code.mov(operands.back(), value.U64()); + break; + case IR::Type::F64: { + code.mov(tmp, std::bit_cast(value.F64())); + Xmm& xmm64 = TempXmmReg(); + code.movq(xmm64, tmp); + operands.push_back(xmm64); + break; + } + case IR::Type::ScalarReg: + operands.push_back(TempGPReg().cvt32()); + code.mov(operands.back(), std::bit_cast(value.ScalarReg())); + break; + case IR::Type::VectorReg: + operands.push_back(TempXmmReg().cvt32()); + code.mov(operands.back(), std::bit_cast(value.VectorReg())); + break; + case IR::Type::Attribute: + operands.push_back(TempGPReg()); + code.mov(operands.back(), std::bit_cast(value.Attribute())); + break; + case IR::Type::Patch: + operands.push_back(TempGPReg()); + code.mov(operands.back(), std::bit_cast(value.Patch())); + break; + default: + UNREACHABLE_MSG("Unsupported value type: %s", IR::NameOf(value.Type())); + break; + } + return operands; +} + +std::optional> +EmitContext::PhiAssignments(IR::Block* block) const { + auto it = phi_assignments.find(block); + if (it != phi_assignments.end()) { + return std::cref(it->second); + } + return std::nullopt; +} + +void EmitContext::ResetTempRegs() { + temp_gp_reg_index = 0; + temp_xmm_reg_index = 0; +} + +void EmitContext::Prologue() { + if (inst_stack_space > 0) { + code.sub(rsp, inst_stack_space); + code.mov(r11, rsp); + } +} + +void EmitContext::Epilogue() { + for (auto it = preserved_regs.rbegin(); it != preserved_regs.rend(); ++it) { + Reg& reg = *it; + if (reg.isMMX()) { + code.movdqu(reg.cvt128(), ptr[rsp]); + code.add(rsp, 16); + } else { + code.pop(reg); + } + } + preserved_regs.clear(); + if (inst_stack_space > 0) { + code.add(rsp, inst_stack_space); + } +} + +void EmitContext::SpillInst(RegAllocContext& ctx, const ActiveInstInterval& interval, + ActiveIntervalList& active_intervals) { + const auto get_operand = [&](IR::Inst* inst) -> Operand { + size_t current_sp = inst_stack_space; + if (ctx.free_stack_slots.empty()) { + inst_stack_space += 8; + } else { + current_sp += ctx.free_stack_slots.back(); + ctx.free_stack_slots.pop_back(); + } + switch (GetRegBytesOfType(inst->Type())) { + case 8: + return byte[r11 + current_sp]; + case 16: + return word[r11 + current_sp]; + case 32: + return dword[r11 + current_sp]; + case 64: + return qword[r11 + current_sp]; + default: + UNREACHABLE_MSG("Unsupported register size: %zu", GetRegBytesOfType(inst->Type())); + return {}; + } + }; + auto spill_candidate = std::max_element( + active_intervals.begin(), active_intervals.end(), + [](const ActiveInstInterval& a, const ActiveInstInterval& b) { return a.end < b.end; }); + if (spill_candidate == active_intervals.end() || spill_candidate->end <= interval.start) { + inst_to_operands[interval.inst][interval.component] = get_operand(interval.inst); + ctx.active_spill_intervals.push_back(interval); + } else { + Operands& operands = inst_to_operands[spill_candidate->inst]; + Reg reg = operands[spill_candidate->component].getReg(); + inst_to_operands[interval.inst][interval.component] = + reg.isXMM() ? reg : ResizeRegToType(reg, interval.inst->Type()); + operands[spill_candidate->component] = get_operand(spill_candidate->inst); + ctx.active_spill_intervals.push_back(*spill_candidate); + *spill_candidate = interval; + } +} + +void EmitContext::AdjustInstInterval(InstInterval& interval, const FlatInstList& insts) { + IR::Inst* inst = interval.inst; + size_t dist = std::distance(insts.begin(), std::find(insts.begin(), insts.end(), inst)); + interval.start = dist; + interval.end = dist; + for (const auto& use : inst->Uses()) { + if (use.user->GetOpcode() == IR::Opcode::Phi) { + // We assign the value at the end of the phi block + IR::Inst& last_inst = use.user->PhiBlock(use.operand)->back(); + dist = std::distance(insts.begin(), std::find(insts.begin(), insts.end(), &last_inst)); + interval.start = std::min(interval.start, dist); + interval.end = std::max(interval.end, dist); + } else { + dist = std::distance(insts.begin(), std::find(insts.begin(), insts.end(), use.user)); + interval.end = std::max(interval.end, dist); + } + } + if (inst->GetOpcode() == IR::Opcode::Phi) { + for (size_t i = 0; i < inst->NumArgs(); i++) { + IR::Block* block = inst->PhiBlock(i); + dist = + std::distance(insts.begin(), std::find(insts.begin(), insts.end(), &block->back())); + interval.start = std::min(interval.start, dist); + interval.end = std::max(interval.end, dist); + phi_assignments[block].emplace_back(inst, inst->Arg(i)); + } + } +} + +// Rregister utilization: +// Instruction registers: +// General purpose registers: rcx, rdx, rsi, r8, r9, r10 +// XMM registers: xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6 +// +// Value / temporary registers: +// General purpose registers: rax (scratch), rbx, r12, r13, r14, r15 +// XMM registers: xmm7 (scratch), xmm7 (scratch), xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, +// xmm15 +// +// r11: Stack pointer for spilled instructions +// rdi: User data pointer +// rsp: Stack pointer +// +// If instruction registers are never used, will be used as temporary registers +void EmitContext::AllocateRegisters() { + const std::array initial_gp_inst_regs = {rcx, rdx, rsi, r8, r9, r10}; + const std::array initial_xmm_inst_regs = {xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6}; + const std::array initial_gp_temp_regs = {rax, rbx, r12, r13, r14, r15}; + const std::array initial_xmm_temp_regs = {xmm7, xmm7, xmm8, xmm9, xmm10, + xmm11, xmm12, xmm13, xmm14, xmm15}; + + boost::container::small_vector intervals; + FlatInstList insts; + // We copy insts tot the flat list for faster iteration + for (IR::Block* block : program.blocks) { + insts.reserve(insts.size() + block->size()); + for (IR::Inst& inst : *block) { + insts.push_back(&inst); + } + } + for (IR::Inst* inst : insts) { + if (inst->GetOpcode() == IR::Opcode::ConditionRef || inst->Type() == IR::Type::Void) { + continue; + } + intervals.emplace_back(inst, 0, 0); + AdjustInstInterval(intervals.back(), insts); + } + std::sort(intervals.begin(), intervals.end(), + [](const InstInterval& a, const InstInterval& b) { return a.start < b.start; }); + RegAllocContext ctx; + ctx.free_gp_regs.insert(ctx.free_gp_regs.end(), initial_gp_temp_regs.begin(), + initial_gp_temp_regs.end()); + ctx.free_xmm_regs.insert(ctx.free_xmm_regs.end(), initial_xmm_temp_regs.begin(), + initial_xmm_temp_regs.end()); + boost::container::static_vector unused_gp_inst_regs; + boost::container::static_vector unused_xmm_inst_regs; + unused_gp_inst_regs.insert(unused_gp_inst_regs.end(), ctx.free_gp_regs.begin(), + ctx.free_gp_regs.end()); + unused_xmm_inst_regs.insert(unused_xmm_inst_regs.end(), ctx.free_xmm_regs.begin(), + ctx.free_xmm_regs.end()); + for (const InstInterval& interval : intervals) { + // Free old interval resources + for (auto it = ctx.active_gp_intervals.begin(); it != ctx.active_gp_intervals.end();) { + if (it->end <= interval.start) { + Reg64 reg = inst_to_operands[it->inst][it->component].getReg().cvt64(); + ctx.free_gp_regs.push_back(reg); + it = ctx.active_gp_intervals.erase(it); + } else { + ++it; + } + } + for (auto it = ctx.active_xmm_intervals.begin(); it != ctx.active_xmm_intervals.end();) { + if (it->end <= interval.start) { + Xmm reg = inst_to_operands[it->inst][it->component].getReg().cvt128(); + ctx.free_xmm_regs.push_back(reg); + it = ctx.active_xmm_intervals.erase(it); + } else { + ++it; + } + } + for (auto it = ctx.active_spill_intervals.begin(); + it != ctx.active_spill_intervals.end();) { + if (it->end <= interval.start) { + const Address& addr = inst_to_operands[it->inst][it->component].getAddress(); + ctx.free_stack_slots.push_back(addr.getDisp()); + it = ctx.active_spill_intervals.erase(it); + } else { + ++it; + } + } + u8 num_components = GetNumComponentsOfType(interval.inst->Type()); + bool is_floating = IsFloatingType(interval.inst->Type()); + if (is_floating) { + for (size_t i = 0; i < num_components; ++i) { + ActiveInstInterval active(interval, i); + if (!ctx.free_xmm_regs.empty()) { + Xmm& reg = ctx.free_xmm_regs.back(); + ctx.free_xmm_regs.pop_back(); + inst_to_operands[active.inst][active.component] = reg; + unused_xmm_inst_regs.erase( + std::remove(unused_xmm_inst_regs.begin(), unused_xmm_inst_regs.end(), reg), + unused_xmm_inst_regs.end()); + ctx.active_xmm_intervals.push_back(active); + } else { + SpillInst(ctx, active, ctx.active_xmm_intervals); + } + } + } else { + for (size_t i = 0; i < num_components; ++i) { + ActiveInstInterval active(interval, i); + if (!ctx.free_gp_regs.empty()) { + Reg64& reg = ctx.free_gp_regs.back(); + ctx.free_gp_regs.pop_back(); + inst_to_operands[active.inst][active.component] = + ResizeRegToType(reg, active.inst->Type()); + unused_gp_inst_regs.erase( + std::remove(unused_gp_inst_regs.begin(), unused_gp_inst_regs.end(), reg), + unused_gp_inst_regs.end()); + ctx.active_gp_intervals.push_back(active); + } else { + SpillInst(ctx, active, ctx.active_gp_intervals); + } + } + } + } + temp_gp_regs.insert(temp_gp_regs.end(), unused_gp_inst_regs.begin(), unused_gp_inst_regs.end()); + temp_xmm_regs.insert(temp_xmm_regs.end(), unused_xmm_inst_regs.begin(), + unused_xmm_inst_regs.end()); + num_scratch_gp_regs = unused_gp_inst_regs.size() + 1; // rax is scratch + num_scratch_xmm_regs = unused_xmm_inst_regs.size() + 1; // xmm7 is scratch + temp_gp_regs.insert(temp_gp_regs.end(), initial_gp_inst_regs.begin(), + initial_gp_inst_regs.end()); + temp_xmm_regs.insert(temp_xmm_regs.end(), initial_xmm_inst_regs.begin(), + initial_xmm_inst_regs.end()); +} + +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/x64_emit_context.h b/src/shader_recompiler/backend/asm_x64/x64_emit_context.h new file mode 100644 index 000000000..59e0f2822 --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/x64_emit_context.h @@ -0,0 +1,113 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include +#include "shader_recompiler/ir/program.h" + +namespace Shader::Backend::X64 { + +using Operands = boost::container::static_vector; + +class EmitContext { +public: + static constexpr size_t NumGPRegs = 16; + static constexpr size_t NumXmmRegs = 16; + + using PhiAssignmentList = boost::container::small_vector, 4>; + + EmitContext(const IR::Program& program_, Xbyak::CodeGenerator& code_); + + [[nodiscard]] Xbyak::CodeGenerator& Code() const { + return code; + } + + [[nodiscard]] const IR::Program& Program() const { + return program; + } + + [[nodiscard]] Xbyak::Label& EndLabel() { + return end_label; + } + + [[nodiscard]] Xbyak::Label& BlockLabel(IR::Block* block) { + return block_labels.at(block); + } + + [[nodiscard]] Xbyak::Reg64& TempGPReg(bool reserve = true); + [[nodiscard]] Xbyak::Xmm& TempXmmReg(bool reserve = true); + + [[nodiscard]] Operands Def(IR::Inst* inst); + [[nodiscard]] Operands Def(const IR::Value& value); + [[nodiscard]] std::optional> + PhiAssignments(IR::Block* block) const; + + void ResetTempRegs(); + + void Prologue(); + void Epilogue(); + +private: + struct InstInterval { + IR::Inst* inst; + size_t start; + size_t end; + }; + + struct ActiveInstInterval : InstInterval { + size_t component; + + ActiveInstInterval(const InstInterval& interval, size_t component_) + : InstInterval(interval), component(component_) {} + }; + using ActiveIntervalList = boost::container::small_vector; + + struct RegAllocContext { + boost::container::static_vector free_gp_regs; + boost::container::static_vector free_xmm_regs; + boost::container::small_vector free_stack_slots; + ActiveIntervalList active_gp_intervals; + ActiveIntervalList active_xmm_intervals; + ActiveIntervalList active_spill_intervals; + }; + + using FlatInstList = boost::container::small_vector; + + const IR::Program& program; + Xbyak::CodeGenerator& code; + + // Map of blocks to their phi assignments + boost::container::small_flat_map phi_assignments; + + // Map of instructions to their operands + boost::container::small_flat_map inst_to_operands; + + // Space used for spilled instructions + size_t inst_stack_space = 0; + + // Temporary register allocation + boost::container::static_vector temp_gp_regs; + boost::container::static_vector temp_xmm_regs; + size_t temp_gp_reg_index = 0; + size_t temp_xmm_reg_index = 0; + size_t num_scratch_gp_regs = 0; + size_t num_scratch_xmm_regs = 0; + + // Preseved registers + boost::container::static_vector preserved_regs; + + // Labels + boost::container::small_flat_map block_labels; + Xbyak::Label end_label; + + void SpillInst(RegAllocContext& ctx, const ActiveInstInterval& interval, + ActiveIntervalList& active_intervals); + void AdjustInstInterval(InstInterval& interval, const FlatInstList& insts); + void AllocateRegisters(); +}; + +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/x64_utils.cpp b/src/shader_recompiler/backend/asm_x64/x64_utils.cpp new file mode 100644 index 000000000..90375b9d4 --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/x64_utils.cpp @@ -0,0 +1,285 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/asm_x64/x64_utils.h" + +using namespace Xbyak; +using namespace Xbyak::util; + +namespace Shader::Backend::X64 { + +bool IsFloatingType(IR::Type type) { + // We store F16 on general purpose registers since we don't do + // arithmetic on them + return type == IR::Type::F32 || type == IR::Type::F64; +} + +bool IsConditionalOpcode(IR::Opcode opcode) { + switch (opcode) { + case IR::Opcode::FPOrdEqual32: + case IR::Opcode::FPOrdEqual64: + case IR::Opcode::FPUnordEqual32: + case IR::Opcode::FPUnordEqual64: + case IR::Opcode::FPOrdNotEqual32: + case IR::Opcode::FPOrdNotEqual64: + case IR::Opcode::FPUnordNotEqual32: + case IR::Opcode::FPUnordNotEqual64: + case IR::Opcode::FPOrdLessThan32: + case IR::Opcode::FPOrdLessThan64: + case IR::Opcode::FPUnordLessThan32: + case IR::Opcode::FPUnordLessThan64: + case IR::Opcode::FPOrdGreaterThan32: + case IR::Opcode::FPOrdGreaterThan64: + case IR::Opcode::FPUnordGreaterThan32: + case IR::Opcode::FPUnordGreaterThan64: + case IR::Opcode::FPOrdLessThanEqual32: + case IR::Opcode::FPOrdLessThanEqual64: + case IR::Opcode::FPUnordLessThanEqual32: + case IR::Opcode::FPUnordLessThanEqual64: + case IR::Opcode::FPOrdGreaterThanEqual32: + case IR::Opcode::FPOrdGreaterThanEqual64: + case IR::Opcode::FPUnordGreaterThanEqual32: + case IR::Opcode::FPUnordGreaterThanEqual64: + case IR::Opcode::FPIsNan32: + case IR::Opcode::FPIsNan64: + case IR::Opcode::FPIsInf32: + case IR::Opcode::FPIsInf64: + case IR::Opcode::FPCmpClass32: + case IR::Opcode::SLessThan32: + case IR::Opcode::SLessThan64: + case IR::Opcode::ULessThan32: + case IR::Opcode::ULessThan64: + case IR::Opcode::IEqual32: + case IR::Opcode::IEqual64: + case IR::Opcode::SLessThanEqual: + case IR::Opcode::ULessThanEqual: + case IR::Opcode::SGreaterThan: + case IR::Opcode::UGreaterThan: + case IR::Opcode::INotEqual32: + case IR::Opcode::INotEqual64: + case IR::Opcode::SGreaterThanEqual: + case IR::Opcode::UGreaterThanEqual: + return true; + default: + return false; + } +} + +size_t GetRegBytesOfType(IR::Type type) { + switch (type) { + case IR::Type::U1: + case IR::Type::U8: + return 1; + case IR::Type::U16: + case IR::Type::F16: + case IR::Type::F16x2: + case IR::Type::F16x3: + case IR::Type::F16x4: + return 2; + case IR::Type::U32: + case IR::Type::U32x2: + case IR::Type::U32x3: + case IR::Type::U32x4: + case IR::Type::F32: + case IR::Type::F32x2: + case IR::Type::F32x3: + case IR::Type::F32x4: + case IR::Type::ScalarReg: + case IR::Type::VectorReg: + return 4; + case IR::Type::U64: + case IR::Type::F64: + case IR::Type::F64x2: + case IR::Type::F64x3: + case IR::Type::F64x4: + case IR::Type::Attribute: + case IR::Type::Patch: + return 8; + default: + break; + } + UNREACHABLE_MSG("Unsupported type %s", IR::NameOf(type)); + return 0; +} + +u8 GetNumComponentsOfType(IR::Type type) { + switch (type) { + case IR::Type::U1: + case IR::Type::U8: + case IR::Type::U16: + case IR::Type::F16: + case IR::Type::U32: + case IR::Type::F32: + case IR::Type::U64: + case IR::Type::F64: + case IR::Type::ScalarReg: + case IR::Type::VectorReg: + case IR::Type::Attribute: + case IR::Type::Patch: + return 1; + case IR::Type::U32x2: + case IR::Type::F32x2: + case IR::Type::F16x2: + case IR::Type::F64x2: + return 2; + case IR::Type::U32x3: + case IR::Type::F32x3: + case IR::Type::F16x3: + case IR::Type::F64x3: + return 3; + case IR::Type::U32x4: + case IR::Type::F32x4: + case IR::Type::F16x4: + case IR::Type::F64x4: + return 4; + default: + break; + } + UNREACHABLE_MSG("Unsupported type %s", IR::NameOf(type)); + return 0; +} + +Reg ResizeRegToType(const Reg& reg, IR::Type type) { + ASSERT(reg.getKind() == Operand::Kind::REG); + switch (GetRegBytesOfType(type)) { + case 1: + return reg.cvt8(); + case 2: + return reg.cvt16(); + case 4: + return reg.cvt32(); + case 8: + return reg.cvt64(); + default: + break; + } + UNREACHABLE_MSG("Unsupported type %s", IR::NameOf(type)); + return reg; +} + +void MovFloat(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src) { + CodeGenerator& c = ctx.Code(); + if (src.isMEM() && dst.isMEM()) { + Reg tmp = ctx.TempGPReg(false).cvt32(); + c.mov(tmp, src); + c.mov(dst, tmp); + } else if (src.isMEM() && dst.isXMM()) { + c.movss(dst.getReg().cvt128(), src.getAddress()); + } else if (src.isXMM() && dst.isMEM()) { + c.movss(dst.getAddress(), src.getReg().cvt128()); + } else if (src.isXMM() && dst.isXMM()) { + c.movaps(dst.getReg().cvt128(), src.getReg().cvt128()); + } else { + UNREACHABLE_MSG("Unsupported mov float %s %s", src.toString(), dst.toString()); + } +} + +void MovDouble(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src) { + CodeGenerator& c = ctx.Code(); + if (src.isMEM() && dst.isMEM()) { + const Reg64& tmp = ctx.TempGPReg(false); + c.mov(tmp, src); + c.mov(dst, tmp); + } else if (src.isMEM() && dst.isXMM()) { + c.movsd(dst.getReg().cvt128(), src.getAddress()); + } else if (src.isXMM() && dst.isMEM()) { + c.movsd(dst.getAddress(), src.getReg().cvt128()); + } else if (src.isXMM() && dst.isXMM()) { + c.movapd(dst.getReg().cvt128(), src.getReg().cvt128()); + } else { + UNREACHABLE_MSG("Unsupported mov double %s %s", src.toString(), dst.toString()); + } +} + +void MovGP(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src) { + CodeGenerator& c = ctx.Code(); + if (src.isMEM() && dst.isMEM()) { + const Reg64& tmp = ctx.TempGPReg(false); + c.mov(tmp, src); + c.mov(dst, tmp); + } else { + c.mov(dst, src); + } +} + +void MovValue(EmitContext& ctx, const Operands& dst, const IR::Value& src) { + if (!src.IsImmediate()) { + const Operands& src_op = ctx.Def(src); + if (IsFloatingType(src.Type())) { + switch (GetRegBytesOfType(src.Type())) { + case 32: + for (size_t i = 0; i < src_op.size(); i++) { + MovFloat(ctx, dst[i], src_op[i]); + } + break; + case 64: + for (size_t i = 0; i < src_op.size(); i++) { + MovDouble(ctx, dst[i], src_op[i]); + } + break; + default: + UNREACHABLE_MSG("Unsupported type %s", IR::NameOf(src.Type())); + break; + } + } else { + for (size_t i = 0; i < src_op.size(); i++) { + MovGP(ctx, dst[i], src_op[i]); + } + } + } else { + CodeGenerator& c = ctx.Code(); + const bool is_mem = dst[0].isMEM(); + Reg64& tmp = ctx.TempGPReg(false); + switch (src.Type()) { + case IR::Type::U1: + c.mov(is_mem ? tmp.cvt8() : dst[0], src.U1()); + break; + case IR::Type::U8: + c.mov(is_mem ? tmp.cvt8() : dst[0], src.U8()); + break; + case IR::Type::U16: + c.mov(is_mem ? tmp.cvt16() : dst[0], src.U16()); + break; + case IR::Type::U32: + c.mov(is_mem ? tmp.cvt32() : dst[0], src.U32()); + break; + case IR::Type::F32: + c.mov(tmp.cvt32(), std::bit_cast(src.F32())); + if (!is_mem) { + c.movd(dst[0].getReg().cvt128(), tmp.cvt32()); + return; + } + break; + case IR::Type::U64: + c.mov(is_mem ? tmp : dst[0], src.U64()); + break; + case IR::Type::F64: + c.mov(tmp, std::bit_cast(src.F64())); + if (!is_mem) { + c.movq(dst[0].getReg().cvt128(), tmp); + return; + } + break; + case IR::Type::ScalarReg: + c.mov(is_mem ? tmp.cvt32() : dst[0], std::bit_cast(src.ScalarReg())); + break; + case IR::Type::VectorReg: + c.mov(is_mem ? tmp.cvt32() : dst[0], std::bit_cast(src.VectorReg())); + break; + case IR::Type::Attribute: + c.mov(is_mem ? tmp : dst[0], std::bit_cast(src.Attribute())); + break; + case IR::Type::Patch: + c.mov(is_mem ? tmp : dst[0], std::bit_cast(src.Patch())); + break; + default: + UNREACHABLE_MSG("Unsupported type %s", IR::NameOf(src.Type())); + break; + } + if (is_mem) { + c.mov(dst[0], tmp); + } + } +} + +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/x64_utils.h b/src/shader_recompiler/backend/asm_x64/x64_utils.h new file mode 100644 index 000000000..2d665653d --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/x64_utils.h @@ -0,0 +1,23 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" +#include "shader_recompiler/ir/type.h" + +namespace Shader::Backend::X64 { + +bool IsFloatingType(IR::Type type); +bool IsConditionalOpcode(IR::Opcode opcode); +size_t GetRegBytesOfType(IR::Type type); +u8 GetNumComponentsOfType(IR::Type type); +Xbyak::Reg ResizeRegToType(const Xbyak::Reg& reg, IR::Type type); +void MovFloat(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src); +void MovDouble(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src); +void MovGP(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src); +void MovValue(EmitContext& ctx, const Operands& dst, const IR::Value& src); + +} // namespace Shader::Backend::X64 \ No newline at end of file From 2e6f3aae32606ad9fa04b6e2a4469e6972dfc209 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Tue, 1 Apr 2025 11:47:22 +0200 Subject: [PATCH 34/49] Better conditions --- .../backend/asm_x64/emit_x64.cpp | 35 ++++++++++++------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64.cpp index d7d284cb8..cc25ee27b 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64.cpp @@ -12,14 +12,27 @@ namespace Shader::Backend::X64 { using namespace Xbyak; using namespace Xbyak::util; -static void TestCondition(EmitContext& ctx, const IR::Inst* ref) { +static void EmitCondition(EmitContext& ctx, const IR::Inst* ref, Label& label, bool invert) { IR::Value cond = ref->Arg(0); - Operand& op = ctx.Def(cond)[0]; - Reg8 tmp = op.isREG() ? op.getReg().cvt8() : ctx.TempGPReg(false).cvt8(); - if (!op.isREG()) { - ctx.Code().mov(tmp, op); + if (cond.IsImmediate()) { + // If imediate, we evaluate at compile time + if (cond.U1() != invert) { + ctx.Code().jmp(label); + } + } else { + Operand& op = ctx.Def(cond)[0]; + if (op.isREG()) { + Reg8 reg = op.getReg().cvt8(); + ctx.Code().test(reg, reg); + } else { + ctx.Code().test(op, 0xff); + } + if (invert) { + ctx.Code().jz(label); + } else { + ctx.Code().jnz(label); + } } - ctx.Code().test(tmp, tmp); } template @@ -120,22 +133,20 @@ static void Traverse(EmitContext& ctx, const IR::Program& program) { case IR::AbstractSyntaxNode::Type::If: { IR::Inst* ref = node.data.if_node.cond.InstRecursive(); Label& merge = ctx.BlockLabel(node.data.if_node.merge); - TestCondition(ctx, ref); - c.jz(merge); + EmitCondition(ctx, ref, merge, true); break; } case IR::AbstractSyntaxNode::Type::Repeat: { IR::Inst* ref = node.data.repeat.cond.InstRecursive(); Label& loop_header = ctx.BlockLabel(node.data.repeat.loop_header); - TestCondition(ctx, ref); - c.jnz(loop_header); + EmitCondition(ctx, ref, loop_header, false); break; } case IR::AbstractSyntaxNode::Type::Break: { IR::Inst* ref = node.data.break_node.cond.InstRecursive(); Label& merge = ctx.BlockLabel(node.data.break_node.merge); - TestCondition(ctx, ref); - c.jz(merge); + EmitCondition(ctx, ref, merge, true); + +c.jz(merge); break; } case IR::AbstractSyntaxNode::Type::Return: { From 20f7a7231ea942083df887673506c9d97f816dc6 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Thu, 3 Apr 2025 12:11:05 +0200 Subject: [PATCH 35/49] Implement some opecodes --- CMakeLists.txt | 9 +- .../backend/asm_x64/emit_x64.cpp | 94 +++- .../backend/asm_x64/emit_x64_atomic.cpp | 138 +++++ .../backend/asm_x64/emit_x64_barrier.cpp | 20 + .../asm_x64/emit_x64_bitwise_conversion.cpp | 228 ++++++++ .../backend/asm_x64/emit_x64_composite.cpp | 242 +++++++++ .../backend/asm_x64/emit_x64_condition.cpp | 6 - .../backend/asm_x64/emit_x64_condition.h | 8 - .../backend/asm_x64/emit_x64_image.cpp | 62 +++ .../backend/asm_x64/emit_x64_instructions.h | 485 ++++++++++++++++++ .../asm_x64/emit_x64_shared_memory.cpp | 24 + .../backend/asm_x64/x64_emit_context.cpp | 6 +- .../backend/asm_x64/x64_emit_context.h | 2 +- .../spirv/emit_spirv_context_get_set.cpp | 4 + .../backend/spirv/emit_spirv_instructions.h | 2 +- .../backend/spirv/emit_spirv_special.cpp | 4 - .../ir/compute_value/do_nop_functions.h | 2 +- src/shader_recompiler/ir/ir_emitter.cpp | 8 +- src/shader_recompiler/ir/ir_emitter.h | 2 +- src/shader_recompiler/ir/microinstruction.cpp | 2 +- src/shader_recompiler/ir/opcodes.inc | 3 +- .../passes/flatten_extended_userdata_pass.cpp | 2 +- 22 files changed, 1312 insertions(+), 41 deletions(-) create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_atomic.cpp create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_barrier.cpp create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp delete mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_condition.cpp delete mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_condition.h create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_shared_memory.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index a550a7a88..14ca4ded6 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -899,8 +899,13 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h if (ARCHITECTURE STREQUAL "x86_64") set(SHADER_RECOMPILER ${SHADER_RECOMPILER} - src/shader_recompiler/backend/asm_x64/emit_x64_condition.cpp - src/shader_recompiler/backend/asm_x64/emit_x64_condition.h + src/shader_recompiler/backend/asm_x64/emit_x64_atomic.cpp + src/shader_recompiler/backend/asm_x64/emit_x64_barrier.cpp + src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp + src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp + src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp + src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h + src/shader_recompiler/backend/asm_x64/emit_x64_shared_memory.cpp src/shader_recompiler/backend/asm_x64/emit_x64.cpp src/shader_recompiler/backend/asm_x64/emit_x64.h src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64.cpp index cc25ee27b..e128216fc 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64.cpp @@ -3,7 +3,7 @@ #include "common/func_traits.h" #include "shader_recompiler/backend/asm_x64/emit_x64.h" -#include "shader_recompiler/backend/asm_x64/emit_x64_condition.h" +#include "shader_recompiler/backend/asm_x64/emit_x64_instructions.h" #include "shader_recompiler/backend/asm_x64/x64_emit_context.h" #include "shader_recompiler/backend/asm_x64/x64_utils.h" @@ -12,6 +12,8 @@ namespace Shader::Backend::X64 { using namespace Xbyak; using namespace Xbyak::util; +namespace { + static void EmitCondition(EmitContext& ctx, const IR::Inst* ref, Label& label, bool invert) { IR::Value cond = ref->Arg(0); if (cond.IsImmediate()) { @@ -20,7 +22,7 @@ static void EmitCondition(EmitContext& ctx, const IR::Inst* ref, Label& label, b ctx.Code().jmp(label); } } else { - Operand& op = ctx.Def(cond)[0]; + const Operand& op = ctx.Def(cond.InstRecursive())[0]; if (op.isREG()) { Reg8 reg = op.getReg().cvt8(); ctx.Code().test(reg, reg); @@ -37,7 +39,7 @@ static void EmitCondition(EmitContext& ctx, const IR::Inst* ref, Label& label, b template ArgType Arg(EmitContext& ctx, const IR::Value& arg) { - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { return ctx.Def(arg); } else if constexpr (std::is_same_v) { return arg; @@ -62,7 +64,7 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) { } template -static void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence) { +void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence) { using Traits = Common::FuncTraits; if constexpr (has_dest) { if constexpr (is_first_arg_inst) { @@ -82,7 +84,7 @@ static void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence) } template -static void Invoke(EmitContext& ctx, IR::Inst* inst) { +void Invoke(EmitContext& ctx, IR::Inst* inst) { using Traits = Common::FuncTraits; static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); if constexpr (Traits::NUM_ARGS == 1) { @@ -101,7 +103,7 @@ static void Invoke(EmitContext& ctx, IR::Inst* inst) { } } -static void EmitInst(EmitContext& ctx, IR::Inst* inst) { +void EmitInst(EmitContext& ctx, IR::Inst* inst) { switch (inst->GetOpcode()) { #define OPCODE(name, result_type, ...) \ case IR::Opcode::name: \ @@ -112,7 +114,7 @@ static void EmitInst(EmitContext& ctx, IR::Inst* inst) { UNREACHABLE_MSG("Invalid opcode {}", inst->GetOpcode()); } -static void Traverse(EmitContext& ctx, const IR::Program& program) { +void Traverse(EmitContext& ctx, const IR::Program& program) { CodeGenerator& c = ctx.Code(); for (const IR::AbstractSyntaxNode& node : program.syntax_list) { ctx.ResetTempRegs(); @@ -164,6 +166,8 @@ static void Traverse(EmitContext& ctx, const IR::Program& program) { } } +} // Anonymous namespace + void EmitX64(const IR::Program& program, Xbyak::CodeGenerator& c) { EmitContext context(program, c); Traverse(context, program); @@ -171,4 +175,80 @@ void EmitX64(const IR::Program& program, Xbyak::CodeGenerator& c) { context.Epilogue(); } +void EmitPhi(EmitContext& ctx) { + +} + +void EmitVoid(EmitContext&) {} + +void EmitIdentity(EmitContext& ctx) { + throw NotImplementedException("Forward identity declaration"); +} + +void EmitConditionRef(EmitContext& ctx) { + +} + +void EmitReference(EmitContext&) {} + +void EmitPhiMove(EmitContext&) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitGetScc(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitGetExec(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitGetVcc(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitGetSccLo(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitGetVccLo(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitGetVccHi(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitGetM0(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitSetScc(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitSetExec(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitSetVcc(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitSetSccLo(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitSetVccLo(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitSetVccHi(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitSetM0(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + } // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_atomic.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_atomic.cpp new file mode 100644 index 000000000..1b865fdf9 --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_atomic.cpp @@ -0,0 +1,138 @@ +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" + +namespace Shader::Backend::X64 { + + +void EmitSharedAtomicIAdd32(EmitContext& ctx) { + throw NotImplementedException("SharedAtomicIAdd32"); +} + +void EmitSharedAtomicUMax32(EmitContext& ctx) { + throw NotImplementedException("SharedAtomicUMax32"); +} + +void EmitSharedAtomicSMax32(EmitContext& ctx) { + throw NotImplementedException("SharedAtomicSMax32"); +} + +void EmitSharedAtomicUMin32(EmitContext& ctx) { + throw NotImplementedException("SharedAtomicUMin32"); +} + +void EmitSharedAtomicSMin32(EmitContext& ctx) { + throw NotImplementedException("SharedAtomicSMin32"); +} + +void EmitSharedAtomicAnd32(EmitContext& ctx) { + throw NotImplementedException("SharedAtomicAnd32"); +} + +void EmitSharedAtomicOr32(EmitContext& ctx) { + throw NotImplementedException("SharedAtomicOr32"); +} + +void EmitSharedAtomicXor32(EmitContext& ctx) { + throw NotImplementedException("SharedAtomicXor32"); +} + +void EmitBufferAtomicIAdd32(EmitContext& ctx) { + throw NotImplementedException("BufferAtomicIAdd32"); +} + +void EmitBufferAtomicSMin32(EmitContext& ctx) { + throw NotImplementedException("BufferAtomicSMin32"); +} + +void EmitBufferAtomicUMin32(EmitContext& ctx) { + throw NotImplementedException("BufferAtomicUMin32"); +} + +void EmitBufferAtomicSMax32(EmitContext& ctx) { + throw NotImplementedException("BufferAtomicSMax32"); +} + +void EmitBufferAtomicUMax32(EmitContext& ctx) { + throw NotImplementedException("BufferAtomicUMax32"); +} + +void EmitBufferAtomicInc32(EmitContext& ctx) { + throw NotImplementedException("BufferAtomicInc32"); +} + +void EmitBufferAtomicDec32(EmitContext& ctx) { + throw NotImplementedException("BufferAtomicDec32"); +} + +void EmitBufferAtomicAnd32(EmitContext& ctx) { + throw NotImplementedException("BufferAtomicAnd32"); +} + +void EmitBufferAtomicOr32(EmitContext& ctx) { + throw NotImplementedException("BufferAtomicOr32"); +} + +void EmitBufferAtomicXor32(EmitContext& ctx) { + throw NotImplementedException("BufferAtomicXor32"); +} + +void EmitBufferAtomicSwap32(EmitContext& ctx) { + throw NotImplementedException("BufferAtomicSwap32"); +} + +void EmitImageAtomicIAdd32(EmitContext& ctx) { + throw NotImplementedException("ImageAtomicIAdd32"); +} + +void EmitImageAtomicSMin32(EmitContext& ctx) { + throw NotImplementedException("ImageAtomicSMin32"); +} + +void EmitImageAtomicUMin32(EmitContext& ctx) { + throw NotImplementedException("ImageAtomicUMin32"); +} + +void EmitImageAtomicSMax32(EmitContext& ctx) { + throw NotImplementedException("ImageAtomicSMax32"); +} + +void EmitImageAtomicUMax32(EmitContext& ctx) { + throw NotImplementedException("ImageAtomicUMax32"); +} + +void EmitImageAtomicInc32(EmitContext& ctx) { + throw NotImplementedException("ImageAtomicInc32"); +} + +void EmitImageAtomicDec32(EmitContext& ctx) { + throw NotImplementedException("ImageAtomicDec32"); +} + +void EmitImageAtomicAnd32(EmitContext& ctx) { + throw NotImplementedException("ImageAtomicAnd32"); +} + +void EmitImageAtomicOr32(EmitContext& ctx) { + throw NotImplementedException("ImageAtomicOr32"); +} + +void EmitImageAtomicXor32(EmitContext& ctx) { + throw NotImplementedException("ImageAtomicXor32"); +} + +void EmitImageAtomicExchange32(EmitContext& ctx) { + throw NotImplementedException("ImageAtomicExchange32"); +} + +void EmitDataAppend(EmitContext& ctx) { + throw NotImplementedException("DataAppend"); +} + +void EmitDataConsume(EmitContext& ctx) { + throw NotImplementedException("DataConsume"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_barrier.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_barrier.cpp new file mode 100644 index 000000000..62df58ae9 --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_barrier.cpp @@ -0,0 +1,20 @@ +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" + +namespace Shader::Backend::X64 { + +void EmitBarrier(EmitContext& ctx) { + +} + +void EmitWorkgroupMemoryBarrier(EmitContext& ctx) { + +} + +void EmitDeviceMemoryBarrier(EmitContext& ctx) { + +} + +} \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp new file mode 100644 index 000000000..def2974e2 --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp @@ -0,0 +1,228 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" +#include "shader_recompiler/backend/asm_x64/x64_utils.h" + +using namespace Xbyak; +using namespace Xbyak::util; + +namespace Shader::Backend::X64 { + +void EmitBitCastU16F16(EmitContext& ctx, const Operands& dest, const Operands& src) { + // We handle 16-bit floats in general purpose registers + MovGP(ctx, dest[0], src[0]); +} + +void EmitBitCastU32F32(EmitContext& ctx, const Operands& dest, const Operands& src) { + if (dest[0].isMEM() && src[0].isMEM()) { + Reg32 tmp = ctx.TempGPReg(false).getReg().cvt32(); + ctx.Code().mov(tmp, src[0]); + ctx.Code().mov(dest[0], tmp); + } else if (src[0].isMEM()) { + ctx.Code().mov(dest[0], src[0]); + } else if (dest[0].isMEM()) { + ctx.Code().movd(dest[0].getAddress(), src[0].getReg().cvt128()); + } else { + ctx.Code().movd(dword[rsp - 4], src[0].getReg().cvt128()); + ctx.Code().mov(dest[0], dword[rsp - 4]); + } +} + +void EmitBitCastU64F64(EmitContext& ctx, const Operands& dest, const Operands& src) { + if (dest[0].isMEM() && src[0].isMEM()) { + Reg tmp = ctx.TempGPReg(false); + ctx.Code().mov(tmp, src[0]); + ctx.Code().mov(dest[0], tmp); + } else if (src[0].isMEM()) { + ctx.Code().mov(dest[0], src[0]); + } else if (dest[0].isMEM()) { + ctx.Code().movq(dest[0].getAddress(), src[0].getReg().cvt128()); + } else { + ctx.Code().movq(qword[rsp - 8], src[0].getReg().cvt128()); + ctx.Code().mov(dest[0], qword[rsp - 8]); + } +} + +void EmitBitCastF16U16(EmitContext& ctx, const Operands& dest, const Operands& src) { + MovGP(ctx, dest[0], src[0]); +} + +void EmitBitCastF32U32(EmitContext& ctx, const Operands& dest, const Operands& src) { + if (dest[0].isMEM() && src[0].isMEM()) { + Reg32 tmp = ctx.TempGPReg(false).getReg().cvt32(); + ctx.Code().mov(tmp, src[0]); + ctx.Code().mov(dest[0], tmp); + } else if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], src[0]); + } else if (src[0].isMEM()) { + ctx.Code().movd(dest[0].getReg().cvt128(), src[0].getAddress()); + } else { + ctx.Code().mov(dword[rsp - 4], src[0]); + ctx.Code().movd(dest[0].getReg().cvt128(), dword[rsp - 4]); + } +} + +void EmitBitCastF64U64(EmitContext& ctx, const Operands& dest, const Operands& src) { + if (dest[0].isMEM() && src[0].isMEM()) { + Reg tmp = ctx.TempGPReg(false); + ctx.Code().mov(tmp, src[0]); + ctx.Code().mov(dest[0], tmp); + } else if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], src[0]); + } else if (src[0].isMEM()) { + ctx.Code().movq(dest[0].getReg().cvt128(), src[0].getAddress()); + } else { + ctx.Code().mov(qword[rsp - 8], src[0].getReg()); + ctx.Code().mov(dest[0].getReg().cvt128(), qword[rsp - 8]); + } +} + +void EmitPackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& src) { + const bool is_mem = dest[0].isMEM() && (src[0].isMEM() || src[1].isMEM()); + Reg tmp = is_mem ? ctx.TempGPReg(false) : dest[0].getReg(); + ctx.Code().mov(tmp, src[0]); + ctx.Code().shl(tmp, 32); + ctx.Code().or_(tmp, src[0]); + if (is_mem) { + ctx.Code().mov(dest[0], tmp); + } +} + +void EmitUnpackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg src0 = src[0].isMEM() ? ctx.TempGPReg() : src[0].getReg(); + if (src[0].isMEM()) { + ctx.Code().mov(src0, src[0]); + } + Reg dest1 = dest[1].isMEM() ? ctx.TempGPReg(false) : dest[1].getReg().changeBit(64); + ctx.Code().mov(dest1, src0); + ctx.Code().shr(dest1, 32); + if (dest[1].isMEM()) { + ctx.Code().mov(dest[1], dest1.cvt32()); + } + ctx.Code().mov(dest[0], src0.cvt32()); +} + +void EmitPackFloat2x32(EmitContext& ctx, const Operands& dest, const Operands& src) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovFloat(ctx, tmp, src[0]); + ctx.Code().pinsrd(tmp, src[1], 1); + if (dest[0].isMEM()) { + ctx.Code().movss(dest[0].getAddress(), tmp); + } +} + +void EmitPackUnorm2x16(EmitContext& ctx) { + throw NotImplementedException("PackUnorm2x16"); +} + +void EmitUnpackUnorm2x16(EmitContext& ctx) { + throw NotImplementedException("UnpackUnorm2x16"); +} + +void EmitPackSnorm2x16(EmitContext& ctx) { + throw NotImplementedException("PackSnorm2x16"); +} + +void EmitUnpackSnorm2x16(EmitContext& ctx) { + throw NotImplementedException("UnpackSnorm2x16"); +} + +void EmitPackUint2x16(EmitContext& ctx) { + throw NotImplementedException("PackUint2x16"); +} + +void EmitUnpackUint2x16(EmitContext& ctx) { + throw NotImplementedException("UnpackUint2x16"); +} + +void EmitPackSint2x16(EmitContext& ctx) { + throw NotImplementedException("PackSint2x16"); +} + +void EmitUnpackSint2x16(EmitContext& ctx) { + throw NotImplementedException("UnpackSint2x16"); +} + +void EmitPackHalf2x16(EmitContext& ctx) { + throw NotImplementedException("PackHalf2x16"); +} + +void EmitUnpackHalf2x16(EmitContext& ctx) { + throw NotImplementedException("UnpackHalf2x16"); +} + +void EmitPackUnorm4x8(EmitContext& ctx) { + throw NotImplementedException("PackUnorm4x8"); +} + +void EmitUnpackUnorm4x8(EmitContext& ctx) { + throw NotImplementedException("UnpackUnorm4x8"); +} + +void EmitPackSnorm4x8(EmitContext& ctx) { + throw NotImplementedException("PackSnorm4x8"); +} + +void EmitUnpackSnorm4x8(EmitContext& ctx) { + throw NotImplementedException("UnpackSnorm4x8"); +} + +void EmitPackUint4x8(EmitContext& ctx) { + throw NotImplementedException("PackUint4x8"); +} + +void EmitUnpackUint4x8(EmitContext& ctx) { + throw NotImplementedException("UnpackUint4x8"); +} + +void EmitPackSint4x8(EmitContext& ctx) { + throw NotImplementedException("PackSint4x8"); +} + +void EmitUnpackSint4x8(EmitContext& ctx) { + throw NotImplementedException("UnpackSint4x8"); +} + +void EmitPackUfloat10_11_11(EmitContext& ctx) { + throw NotImplementedException("PackUfloat10_11_11"); +} + +void EmitUnpackUfloat10_11_11(EmitContext& ctx) { + throw NotImplementedException("UnpackUfloat10_11_11"); +} + +void EmitPackUnorm2_10_10_10(EmitContext& ctx) { + throw NotImplementedException("PackUnorm2_10_10_10"); +} + +void EmitUnpackUnorm2_10_10_10(EmitContext& ctx) { + throw NotImplementedException("UnpackUnorm2_10_10_10"); +} + +void EmitPackSnorm2_10_10_10(EmitContext& ctx) { + throw NotImplementedException("PackSnorm2_10_10_10"); +} + +void EmitUnpackSnorm2_10_10_10(EmitContext& ctx) { + throw NotImplementedException("UnpackSnorm2_10_10_10"); +} + +void EmitPackUint2_10_10_10(EmitContext& ctx) { + throw NotImplementedException("PackUint2_10_10_10"); +} + +void EmitUnpackUint2_10_10_10(EmitContext& ctx) { + throw NotImplementedException("UnpackUint2_10_10_10"); +} + +void EmitPackSint2_10_10_10(EmitContext& ctx) { + throw NotImplementedException("PackSint2_10_10_10"); +} + +void EmitUnpackSint2_10_10_10(EmitContext& ctx) { + throw NotImplementedException("UnpackSint2_10_10_10"); +} + +} \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp new file mode 100644 index 000000000..d03516dec --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp @@ -0,0 +1,242 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/asm_x64/x64_utils.h" +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" + +namespace Shader::Backend::X64 { + +namespace { + +template +static const Operand& GetSuffleOperand(const Operands& comp1, const Operands& comp2, u32 index) { + if (index < N) { + return comp1[index]; + } else { + return comp2[index - N]; + } +} +} + +void EmitCompositeConstructU32x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2) { + MovGP(ctx, dest[0], src1[0]); + MovGP(ctx, dest[1], src2[0]); +} + +void EmitCompositeConstructU32x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3) { + MovGP(ctx, dest[0], src1[0]); + MovGP(ctx, dest[1], src2[0]); + MovGP(ctx, dest[2], src3[0]); +} + +void EmitCompositeConstructU32x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4) { + MovGP(ctx, dest[0], src1[0]); + MovGP(ctx, dest[1], src2[0]); + MovGP(ctx, dest[2], src3[0]); + MovGP(ctx, dest[3], src4[0]); +} + +void EmitCompositeConstructU32x2x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2) { + MovGP(ctx, dest[0], src1[0]); + MovGP(ctx, dest[1], src2[0]); + MovGP(ctx, dest[2], src1[1]); + MovGP(ctx, dest[3], src2[1]); +} + +void EmitCompositeExtractU32x2(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) { + MovGP(ctx, dest[0], composite[index]); +} + +void EmitCompositeExtractU32x3(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) { + MovGP(ctx, dest[0], composite[index]); +} + +void EmitCompositeExtractU32x4(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) { + MovGP(ctx, dest[0], composite[index]); +} + +void EmitCompositeInsertU32x2(EmitContext& ctx, const Operands& dest, const Operands& object, u32 index) { + MovGP(ctx, dest[index], object[0]); +} + +void EmitCompositeInsertU32x3(EmitContext& ctx, const Operands& dest, const Operands& object, u32 index) { + MovGP(ctx, dest[index], object[0]); +} + +void EmitCompositeInsertU32x4(EmitContext& ctx, const Operands& dest, const Operands& object, u32 index) { + MovGP(ctx, dest[index], object[0]); +} + +void EmitCompositeShuffleU32x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2) { + MovGP(ctx, dest[0], GetSuffleOperand<2>(composite1, composite2, idx1)); + MovGP(ctx, dest[1], GetSuffleOperand<2>(composite1, composite2, idx2)); +} + +void EmitCompositeShuffleU32x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3) { + MovGP(ctx, dest[0], GetSuffleOperand<3>(composite1, composite2, idx1)); + MovGP(ctx, dest[1], GetSuffleOperand<3>(composite1, composite2, idx2)); + MovGP(ctx, dest[2], GetSuffleOperand<3>(composite1, composite2, idx3)); +} + +void EmitCompositeShuffleU32x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4) { + MovGP(ctx, dest[0], GetSuffleOperand<4>(composite1, composite2, idx1)); + MovGP(ctx, dest[1], GetSuffleOperand<4>(composite1, composite2, idx2)); + MovGP(ctx, dest[2], GetSuffleOperand<4>(composite1, composite2, idx3)); + MovGP(ctx, dest[3], GetSuffleOperand<4>(composite1, composite2, idx4)); +} + +Id EmitCompositeConstructF16x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { + return EmitCompositeConstruct(ctx, inst, ctx.F16[2], e1, e2); +} + +Id EmitCompositeConstructF16x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3) { + return EmitCompositeConstruct(ctx, inst, ctx.F16[3], e1, e2, e3); +} + +Id EmitCompositeConstructF16x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4) { + return EmitCompositeConstruct(ctx, inst, ctx.F16[4], e1, e2, e3, e4); +} + +Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F16[1], composite, index); +} + +Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F16[1], composite, index); +} + +Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F16[1], composite, index); +} + +Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F16[2], object, composite, index); +} + +Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F16[3], object, composite, index); +} + +Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index); +} + +Id EmitCompositeShuffleF16x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) { + return ctx.OpVectorShuffle(ctx.F16[2], composite1, composite2, comp0, comp1); +} + +Id EmitCompositeShuffleF16x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2) { + return ctx.OpVectorShuffle(ctx.F16[3], composite1, composite2, comp0, comp1, comp2); +} + +Id EmitCompositeShuffleF16x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3) { + return ctx.OpVectorShuffle(ctx.F16[4], composite1, composite2, comp0, comp1, comp2, comp3); +} + +Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { + return EmitCompositeConstruct(ctx, inst, ctx.F32[2], e1, e2); +} + +Id EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3) { + return EmitCompositeConstruct(ctx, inst, ctx.F32[3], e1, e2, e3); +} + +Id EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4) { + return EmitCompositeConstruct(ctx, inst, ctx.F32[4], e1, e2, e3, e4); +} + +Id EmitCompositeConstructF32x2x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { + return EmitCompositeConstruct(ctx, inst, ctx.F32[4], e1, e2); +} + +Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F32[1], composite, index); +} + +Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F32[1], composite, index); +} + +Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F32[1], composite, index); +} + +Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F32[2], object, composite, index); +} + +Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F32[3], object, composite, index); +} + +Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F32[4], object, composite, index); +} + +Id EmitCompositeShuffleF32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) { + return ctx.OpVectorShuffle(ctx.F32[2], composite1, composite2, comp0, comp1); +} + +Id EmitCompositeShuffleF32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2) { + return ctx.OpVectorShuffle(ctx.F32[3], composite1, composite2, comp0, comp1, comp2); +} + +Id EmitCompositeShuffleF32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3) { + return ctx.OpVectorShuffle(ctx.F32[4], composite1, composite2, comp0, comp1, comp2, comp3); +} + +void EmitCompositeConstructF64x2(EmitContext&) { + UNREACHABLE_MSG("SPIR-V Instruction"); +} + +void EmitCompositeConstructF64x3(EmitContext&) { + UNREACHABLE_MSG("SPIR-V Instruction"); +} + +void EmitCompositeConstructF64x4(EmitContext&) { + UNREACHABLE_MSG("SPIR-V Instruction"); +} + +void EmitCompositeExtractF64x2(EmitContext&) { + UNREACHABLE_MSG("SPIR-V Instruction"); +} + +void EmitCompositeExtractF64x3(EmitContext&) { + UNREACHABLE_MSG("SPIR-V Instruction"); +} + +void EmitCompositeExtractF64x4(EmitContext&) { + UNREACHABLE_MSG("SPIR-V Instruction"); +} + +Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F64[2], object, composite, index); +} + +Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F64[3], object, composite, index); +} + +Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index); +} + +Id EmitCompositeShuffleF64x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) { + return ctx.OpVectorShuffle(ctx.F64[2], composite1, composite2, comp0, comp1); +} + +Id EmitCompositeShuffleF64x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2) { + return ctx.OpVectorShuffle(ctx.F64[3], composite1, composite2, comp0, comp1, comp2); +} + +Id EmitCompositeShuffleF64x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3) { + return ctx.OpVectorShuffle(ctx.F64[4], composite1, composite2, comp0, comp1, comp2, comp3); +} + +} \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_condition.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_condition.cpp deleted file mode 100644 index 046454b6f..000000000 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_condition.cpp +++ /dev/null @@ -1,6 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include "shader_recompiler/backend/asm_x64/emit_x64_condition.h" - -namespace Shader::Backend::X64 {} \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_condition.h b/src/shader_recompiler/backend/asm_x64/emit_x64_condition.h deleted file mode 100644 index 16d6093ea..000000000 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_condition.h +++ /dev/null @@ -1,8 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" - -namespace Shader::Backend::X64 {} \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp new file mode 100644 index 000000000..bc0e436e1 --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp @@ -0,0 +1,62 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" + +namespace Shader::Backend::X64 { + +void EmitImageSampleRaw(EmitContext& ctx) { + // We can reach this here. We done resource tracking pass yet. + throw NotImplementedException("ImageSampleRaw"); +} + +void EmitImageSampleImplicitLod(EmitContext& ctx) { + throw NotImplementedException("ImageSampleImplicitLod"); +} + +void EmitImageSampleExplicitLod(EmitContext& ctx) { + throw NotImplementedException("ImageSampleExplicitLod"); +} + +void EmitImageSampleDrefImplicitLod(EmitContext& ctx) { + throw NotImplementedException("ImageSampleDrefImplicitLod"); +} + +void EmitImageSampleDrefExplicitLod(EmitContext& ctx) { + throw NotImplementedException("ImageSampleDrefExplicitLod"); +} + +void EmitImageGather(EmitContext& ctx) { + throw NotImplementedException("ImageGather"); +} + +void EmitImageGatherDref(EmitContext& ctx) { + throw NotImplementedException("ImageGatherDref"); +} + +void EmitImageQueryDimensions(EmitContext& ctx) { + throw NotImplementedException("ImageQueryDimensions"); +} + +void EmitImageQueryLod(EmitContext& ctx) { + throw NotImplementedException("ImageQueryLod"); +} + +void EmitImageGradient(EmitContext& ctx) { + throw NotImplementedException("ImageGradient"); +} + +void EmitImageRead(EmitContext& ctx) { + throw NotImplementedException("ImageRead"); +} + +void EmitImageWrite(EmitContext& ctx) { + throw NotImplementedException("ImageWrite"); +} + +void EmitCubeFaceIndex(EmitContext& ctx) { + throw NotImplementedException("CubeFaceIndex"); +} + +} \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h b/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h new file mode 100644 index 000000000..51970986d --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h @@ -0,0 +1,485 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include "common/types.h" + +namespace Shader::IR { +enum class Attribute : u64; +enum class ScalarReg : u32; +enum class Patch : u64; +class Inst; +class Value; +} // namespace Shader::IR + +namespace Shader::Backend::X64 { + +using Operands = boost::container::static_vector; + +class EmitContext; + +// Microinstruction emitters +void EmitPhi(EmitContext& ctx); +void EmitVoid(EmitContext& ctx); +void EmitIdentity(EmitContext& ctx); +void EmitConditionRef(EmitContext& ctx, const IR::Value& value); +void EmitReference(EmitContext&); +void EmitPhiMove(EmitContext&); +void EmitJoin(EmitContext& ctx); +void EmitGetScc(EmitContext& ctx); +void EmitGetExec(EmitContext& ctx); +void EmitGetVcc(EmitContext& ctx); +void EmitGetSccLo(EmitContext& ctx); +void EmitGetVccLo(EmitContext& ctx); +void EmitGetVccHi(EmitContext& ctx); +void EmitGetM0(EmitContext& ctx); +void EmitSetScc(EmitContext& ctx); +void EmitSetExec(EmitContext& ctx); +void EmitSetVcc(EmitContext& ctx); +void EmitSetSccLo(EmitContext& ctx); +void EmitSetVccLo(EmitContext& ctx); +void EmitSetVccHi(EmitContext& ctx); +void EmitSetM0(EmitContext& ctx); +void EmitFPCmpClass32(EmitContext& ctx); +void EmitPrologue(EmitContext& ctx); +void EmitEpilogue(EmitContext& ctx); +void EmitDiscard(EmitContext& ctx); +void EmitDiscardCond(EmitContext& ctx, Id condition); +void EmitDebugPrint(EmitContext& ctx, IR::Inst* inst, Id arg0, Id arg1, Id arg2, Id arg3, Id arg4); +void EmitBarrier(EmitContext& ctx); +void EmitWorkgroupMemoryBarrier(EmitContext& ctx); +void EmitDeviceMemoryBarrier(EmitContext& ctx); +Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg); +void EmitSetUserData(EmitContext& ctx, const IR::Value& offset, const IR::Value& data); +void EmitGetThreadBitScalarReg(EmitContext& ctx); +void EmitSetThreadBitScalarReg(EmitContext& ctx); +void EmitGetScalarRegister(EmitContext& ctx); +void EmitSetScalarRegister(EmitContext& ctx); +void EmitGetVectorRegister(EmitContext& ctx); +void EmitSetVectorRegister(EmitContext& ctx); +void EmitSetGotoVariable(EmitContext& ctx); +void EmitGetGotoVariable(EmitContext& ctx); +void EmitSetScc(EmitContext& ctx); +Id EmitReadConst(EmitContext& ctx, IR::Inst* inst); +Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index); +Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +void EmitStoreBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitBufferAtomicIAdd32(EmitContext& ctx); +void EmitBufferAtomicSMin32(EmitContext& ctx); +void EmitBufferAtomicUMin32(EmitContext& ctx); +void EmitBufferAtomicSMax32(EmitContext& ctx); +void EmitBufferAtomicUMax32(EmitContext& ctx); +void EmitBufferAtomicInc32(EmitContext& ctx); +void EmitBufferAtomicDec32(EmitContext& ctx); +void EmitBufferAtomicAnd32(EmitContext& ctx); +void EmitBufferAtomicOr32(EmitContext& ctx); +void EmitBufferAtomicXor32(EmitContext& ctx); +void EmitBufferAtomicSwap32(EmitContext& ctx); +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index); +Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp); +Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index); +void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index); +Id EmitReadTcsGenericOuputAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, + Id comp_index); +Id EmitGetPatch(EmitContext& ctx, IR::Patch patch); +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value); +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); +void EmitSetSampleMask(EmitContext& ctx, Id value); +void EmitSetFragDepth(EmitContext& ctx, Id value); +Id EmitWorkgroupId(EmitContext& ctx); +Id EmitLocalInvocationId(EmitContext& ctx); +Id EmitInvocationId(EmitContext& ctx); +Id EmitInvocationInfo(EmitContext& ctx); +Id EmitSampleId(EmitContext& ctx); +Id EmitUndefU1(EmitContext& ctx); +Id EmitUndefU8(EmitContext& ctx); +Id EmitUndefU16(EmitContext& ctx); +Id EmitUndefU32(EmitContext& ctx); +Id EmitUndefU64(EmitContext& ctx); +void EmitLoadSharedU32(EmitContext& ctx); +void EmitLoadSharedU64(EmitContext& ctx); +void EmitWriteSharedU32(EmitContext& ctx); +void EmitWriteSharedU64(EmitContext& ctx); +void EmitSharedAtomicIAdd32(EmitContext& ctx); +void EmitSharedAtomicUMax32(EmitContext& ctx); +void EmitSharedAtomicSMax32(EmitContext& ctx); +void EmitSharedAtomicUMin32(EmitContext& ctx); +void EmitSharedAtomicSMin32(EmitContext& ctx); +void EmitSharedAtomicAnd32(EmitContext& ctx); +void EmitSharedAtomicOr32(EmitContext& ctx); +void EmitSharedAtomicXor32(EmitContext& ctx); +Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); +Id EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3); +Id EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeConstructU32x2x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); +Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeShuffleU32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); +Id EmitCompositeShuffleU32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2); +Id EmitCompositeShuffleU32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3); +Id EmitCompositeConstructF16x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); +Id EmitCompositeConstructF16x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF16x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeShuffleF16x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); +Id EmitCompositeShuffleF16x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2); +Id EmitCompositeShuffleF16x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3); +Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); +Id EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeConstructF32x2x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); +Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeShuffleF32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); +Id EmitCompositeShuffleF32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2); +Id EmitCompositeShuffleF32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3); +void EmitCompositeConstructF64x2(EmitContext& ctx); +void EmitCompositeConstructF64x3(EmitContext& ctx); +void EmitCompositeConstructF64x4(EmitContext& ctx); +void EmitCompositeExtractF64x2(EmitContext& ctx); +void EmitCompositeExtractF64x3(EmitContext& ctx); +void EmitCompositeExtractF64x4(EmitContext& ctx); +Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeShuffleF64x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); +Id EmitCompositeShuffleF64x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2); +Id EmitCompositeShuffleF64x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3); +Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value); +void EmitBitCastU16F16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitBitCastU32F32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitBitCastU64F64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitBitCastF16U16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitBitCastF32U32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitBitCastF64U64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitPackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitUnpackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitPackFloat2x32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitPackUnorm2x16(EmitContext& ctx); +void EmitUnpackUnorm2x16(EmitContext& ctx); +void EmitPackSnorm2x16(EmitContext& ctx); +void EmitUnpackSnorm2x16(EmitContext& ctx); +void EmitPackUint2x16(EmitContext& ctx); +void EmitUnpackUint2x16(EmitContext& ctx); +void EmitPackSint2x16(EmitContext& ctx); +void EmitUnpackSint2x16(EmitContext& ctx); +void EmitPackHalf2x16(EmitContext& ctx); +void EmitUnpackHalf2x16(EmitContext& ctx); +void EmitPackUnorm4x8(EmitContext& ctx); +void EmitUnpackUnorm4x8(EmitContext& ctx); +void EmitPackSnorm4x8(EmitContext& ctx); +void EmitUnpackSnorm4x8(EmitContext& ctx); +void EmitPackUint4x8(EmitContext& ctx); +void EmitUnpackUint4x8(EmitContext& ctx); +void EmitPackSint4x8(EmitContext& ctx); +void EmitUnpackSint4x8(EmitContext& ctx); +void EmitPackUfloat10_11_11(EmitContext& ctx); +void EmitUnpackUfloat10_11_11(EmitContext& ctx); +void EmitPackUnorm2_10_10_10(EmitContext& ctx); +void EmitUnpackUnorm2_10_10_10(EmitContext& ctx); +void EmitPackSnorm2_10_10_10(EmitContext& ctx); +void EmitUnpackSnorm2_10_10_10(EmitContext& ctx); +void EmitPackUint2_10_10_10(EmitContext& ctx); +void EmitUnpackUint2_10_10_10(EmitContext& ctx); +void EmitPackSint2_10_10_10(EmitContext& ctx); +void EmitUnpackSint2_10_10_10(EmitContext& ctx); +Id EmitFPAbs16(EmitContext& ctx, Id value); +Id EmitFPAbs32(EmitContext& ctx, Id value); +Id EmitFPAbs64(EmitContext& ctx, Id value); +Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPSub32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPMax32(EmitContext& ctx, Id a, Id b, bool is_legacy = false); +Id EmitFPMax64(EmitContext& ctx, Id a, Id b); +Id EmitFPMin32(EmitContext& ctx, Id a, Id b, bool is_legacy = false); +Id EmitFPMin64(EmitContext& ctx, Id a, Id b); +Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPDiv32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPDiv64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPNeg16(EmitContext& ctx, Id value); +Id EmitFPNeg32(EmitContext& ctx, Id value); +Id EmitFPNeg64(EmitContext& ctx, Id value); +Id EmitFPSin(EmitContext& ctx, Id value); +Id EmitFPCos(EmitContext& ctx, Id value); +Id EmitFPExp2(EmitContext& ctx, Id value); +Id EmitFPLdexp(EmitContext& ctx, Id value, Id exp); +Id EmitFPLog2(EmitContext& ctx, Id value); +Id EmitFPRecip32(EmitContext& ctx, Id value); +Id EmitFPRecip64(EmitContext& ctx, Id value); +Id EmitFPRecipSqrt32(EmitContext& ctx, Id value); +Id EmitFPRecipSqrt64(EmitContext& ctx, Id value); +Id EmitFPSqrt(EmitContext& ctx, Id value); +Id EmitFPSaturate16(EmitContext& ctx, Id value); +Id EmitFPSaturate32(EmitContext& ctx, Id value); +Id EmitFPSaturate64(EmitContext& ctx, Id value); +Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPRoundEven16(EmitContext& ctx, Id value); +Id EmitFPRoundEven32(EmitContext& ctx, Id value); +Id EmitFPRoundEven64(EmitContext& ctx, Id value); +Id EmitFPFloor16(EmitContext& ctx, Id value); +Id EmitFPFloor32(EmitContext& ctx, Id value); +Id EmitFPFloor64(EmitContext& ctx, Id value); +Id EmitFPCeil16(EmitContext& ctx, Id value); +Id EmitFPCeil32(EmitContext& ctx, Id value); +Id EmitFPCeil64(EmitContext& ctx, Id value); +Id EmitFPTrunc16(EmitContext& ctx, Id value); +Id EmitFPTrunc32(EmitContext& ctx, Id value); +Id EmitFPTrunc64(EmitContext& ctx, Id value); +Id EmitFPFract32(EmitContext& ctx, Id value); +Id EmitFPFract64(EmitContext& ctx, Id value); +Id EmitFPFrexpSig32(EmitContext& ctx, Id value); +Id EmitFPFrexpSig64(EmitContext& ctx, Id value); +Id EmitFPFrexpExp32(EmitContext& ctx, Id value); +Id EmitFPFrexpExp64(EmitContext& ctx, Id value); +Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPIsNan16(EmitContext& ctx, Id value); +Id EmitFPIsNan32(EmitContext& ctx, Id value); +Id EmitFPIsNan64(EmitContext& ctx, Id value); +Id EmitFPIsInf32(EmitContext& ctx, Id value); +Id EmitFPIsInf64(EmitContext& ctx, Id value); +Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitIAdd64(EmitContext& ctx, Id a, Id b); +Id EmitIAddCary32(EmitContext& ctx, Id a, Id b); +Id EmitISub32(EmitContext& ctx, Id a, Id b); +Id EmitISub64(EmitContext& ctx, Id a, Id b); +Id EmitSMulExt(EmitContext& ctx, Id a, Id b); +Id EmitUMulExt(EmitContext& ctx, Id a, Id b); +Id EmitIMul32(EmitContext& ctx, Id a, Id b); +Id EmitIMul64(EmitContext& ctx, Id a, Id b); +Id EmitSDiv32(EmitContext& ctx, Id a, Id b); +Id EmitUDiv32(EmitContext& ctx, Id a, Id b); +Id EmitSMod32(EmitContext& ctx, Id a, Id b); +Id EmitUMod32(EmitContext& ctx, Id a, Id b); +Id EmitINeg32(EmitContext& ctx, Id value); +Id EmitINeg64(EmitContext& ctx, Id value); +Id EmitIAbs32(EmitContext& ctx, Id value); +Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift); +Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitwiseAnd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitwiseOr64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count); +Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); +Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); +Id EmitBitReverse32(EmitContext& ctx, Id value); +Id EmitBitCount32(EmitContext& ctx, Id value); +Id EmitBitCount64(EmitContext& ctx, Id value); +Id EmitBitwiseNot32(EmitContext& ctx, Id value); +Id EmitFindSMsb32(EmitContext& ctx, Id value); +Id EmitFindUMsb32(EmitContext& ctx, Id value); +Id EmitFindILsb32(EmitContext& ctx, Id value); +Id EmitFindILsb64(EmitContext& ctx, Id value); +Id EmitSMin32(EmitContext& ctx, Id a, Id b); +Id EmitUMin32(EmitContext& ctx, Id a, Id b); +Id EmitSMax32(EmitContext& ctx, Id a, Id b); +Id EmitUMax32(EmitContext& ctx, Id a, Id b); +Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); +Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); +Id EmitSLessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSLessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitIEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitIEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitINotEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitINotEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); +Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); +Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); +Id EmitLogicalNot(EmitContext& ctx, Id value); +Id EmitConvertS16F16(EmitContext& ctx, Id value); +Id EmitConvertS16F32(EmitContext& ctx, Id value); +Id EmitConvertS16F64(EmitContext& ctx, Id value); +Id EmitConvertS32F16(EmitContext& ctx, Id value); +Id EmitConvertS32F32(EmitContext& ctx, Id value); +Id EmitConvertS32F64(EmitContext& ctx, Id value); +Id EmitConvertS64F16(EmitContext& ctx, Id value); +Id EmitConvertS64F32(EmitContext& ctx, Id value); +Id EmitConvertS64F64(EmitContext& ctx, Id value); +Id EmitConvertU16F16(EmitContext& ctx, Id value); +Id EmitConvertU16F32(EmitContext& ctx, Id value); +Id EmitConvertU16F64(EmitContext& ctx, Id value); +Id EmitConvertU32F16(EmitContext& ctx, Id value); +Id EmitConvertU32F32(EmitContext& ctx, Id value); +Id EmitConvertU32F64(EmitContext& ctx, Id value); +Id EmitConvertU64F16(EmitContext& ctx, Id value); +Id EmitConvertU64F32(EmitContext& ctx, Id value); +Id EmitConvertU64F64(EmitContext& ctx, Id value); +Id EmitConvertU64U32(EmitContext& ctx, Id value); +Id EmitConvertU32U64(EmitContext& ctx, Id value); +Id EmitConvertF16F32(EmitContext& ctx, Id value); +Id EmitConvertF32F16(EmitContext& ctx, Id value); +Id EmitConvertF32F64(EmitContext& ctx, Id value); +Id EmitConvertF64F32(EmitContext& ctx, Id value); +Id EmitConvertF16S8(EmitContext& ctx, Id value); +Id EmitConvertF16S16(EmitContext& ctx, Id value); +Id EmitConvertF16S32(EmitContext& ctx, Id value); +Id EmitConvertF16S64(EmitContext& ctx, Id value); +Id EmitConvertF16U8(EmitContext& ctx, Id value); +Id EmitConvertF16U16(EmitContext& ctx, Id value); +Id EmitConvertF16U32(EmitContext& ctx, Id value); +Id EmitConvertF16U64(EmitContext& ctx, Id value); +Id EmitConvertF32S8(EmitContext& ctx, Id value); +Id EmitConvertF32S16(EmitContext& ctx, Id value); +Id EmitConvertF32S32(EmitContext& ctx, Id value); +Id EmitConvertF32S64(EmitContext& ctx, Id value); +Id EmitConvertF32U8(EmitContext& ctx, Id value); +Id EmitConvertF32U16(EmitContext& ctx, Id value); +Id EmitConvertF32U32(EmitContext& ctx, Id value); +Id EmitConvertF32U64(EmitContext& ctx, Id value); +Id EmitConvertF64S8(EmitContext& ctx, Id value); +Id EmitConvertF64S16(EmitContext& ctx, Id value); +Id EmitConvertF64S32(EmitContext& ctx, Id value); +Id EmitConvertF64S64(EmitContext& ctx, Id value); +Id EmitConvertF64U8(EmitContext& ctx, Id value); +Id EmitConvertF64U16(EmitContext& ctx, Id value); +Id EmitConvertF64U32(EmitContext& ctx, Id value); +Id EmitConvertF64U64(EmitContext& ctx, Id value); +Id EmitConvertU16U32(EmitContext& ctx, Id value); +Id EmitConvertU32U16(EmitContext& ctx, Id value); + +void EmitImageSampleRaw(EmitContext& ctx); +void EmitImageSampleImplicitLod(EmitContext& ctx); +void EmitImageSampleExplicitLod(EmitContext& ctx); +void EmitImageSampleDrefImplicitLod(EmitContext& ctx); +void EmitImageSampleDrefExplicitLod(EmitContext& ctx); +void EmitImageGather(EmitContext& ctx); +void EmitImageGatherDref(EmitContext& ctx); +void EmitImageQueryDimensions(EmitContext& ctx); +void EmitImageQueryLod(EmitContext& ctx); +void EmitImageGradient(EmitContext& ctx); +void EmitImageRead(EmitContext& ctx); +void EmitImageWrite(EmitContext& ctx); + +void EmitImageAtomicIAdd32(EmitContext& ctx); +void EmitImageAtomicSMin32(EmitContext& ctx); +void EmitImageAtomicUMin32(EmitContext& ctx); +void EmitImageAtomicSMax32(EmitContext& ctx); +void EmitImageAtomicUMax32(EmitContext& ctx); +void EmitImageAtomicInc32(EmitContext& ctx); +void EmitImageAtomicDec32(EmitContext& ctx); +void EmitImageAtomicAnd32(EmitContext& ctx); +void EmitImageAtomicOr32(EmitContext& ctx); +void EmitImageAtomicXor32(EmitContext& ctx); +void EmitImageAtomicExchange32(EmitContext& ctx); +Id EmitCubeFaceIndex(EmitContext& ctx, IR::Inst* inst, Id cube_coords); +Id EmitLaneId(EmitContext& ctx); +Id EmitWarpId(EmitContext& ctx); +Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index); +Id EmitReadFirstLane(EmitContext& ctx, Id value); +Id EmitReadLane(EmitContext& ctx, Id value, u32 lane); +Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane); +void EmitDataAppend(EmitContext& ctx); +void EmitDataConsume(EmitContext& ctx); + +void EmitEmitVertex(EmitContext& ctx); +void EmitEmitPrimitive(EmitContext& ctx); + +} \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_shared_memory.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_shared_memory.cpp new file mode 100644 index 000000000..5957afd33 --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_shared_memory.cpp @@ -0,0 +1,24 @@ +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" + +namespace Shader::Backend::X64 { + +void EmitLoadSharedU32(EmitContext& ctx) { + throw NotImplementedException("LoadSharedU32"); +} + +void EmitLoadSharedU64(EmitContext& ctx) { + throw NotImplementedException("LoadSharedU64"); +} + +void EmitWriteSharedU32(EmitContext& ctx) { + throw NotImplementedException("WriteSharedU32"); +} + +void EmitWriteSharedU64(EmitContext& ctx) { + throw NotImplementedException("WriteSharedU64"); +} +} \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp b/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp index 1c5d5c103..1b706eeeb 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp +++ b/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp @@ -43,12 +43,12 @@ Xmm& EmitContext::TempXmmReg(bool reserve) { std::ranges::find(preserved_regs, reg) == preserved_regs.end()) { preserved_regs.push_back(reg); code.sub(rsp, 16); - code.movdqu(ptr[rsp], reg); + code.movups(ptr[rsp], reg); } return reg; } -Operands EmitContext::Def(IR::Inst* inst) { +const Operands& EmitContext::Def(IR::Inst* inst) { return inst_to_operands.at(inst); } @@ -141,7 +141,7 @@ void EmitContext::Epilogue() { for (auto it = preserved_regs.rbegin(); it != preserved_regs.rend(); ++it) { Reg& reg = *it; if (reg.isMMX()) { - code.movdqu(reg.cvt128(), ptr[rsp]); + code.movups(reg.cvt128(), ptr[rsp]); code.add(rsp, 16); } else { code.pop(reg); diff --git a/src/shader_recompiler/backend/asm_x64/x64_emit_context.h b/src/shader_recompiler/backend/asm_x64/x64_emit_context.h index 59e0f2822..c967f9295 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_emit_context.h +++ b/src/shader_recompiler/backend/asm_x64/x64_emit_context.h @@ -41,7 +41,7 @@ public: [[nodiscard]] Xbyak::Reg64& TempGPReg(bool reserve = true); [[nodiscard]] Xbyak::Xmm& TempXmmReg(bool reserve = true); - [[nodiscard]] Operands Def(IR::Inst* inst); + [[nodiscard]] const Operands& Def(IR::Inst* inst); [[nodiscard]] Operands Def(const IR::Value& value); [[nodiscard]] std::optional> PhiAssignments(IR::Block* block) const; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 5b4f3c3c5..fa6d45b0d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -128,6 +128,10 @@ Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg) { return ud_reg; } +void EmitSetUserData(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + void EmitGetThreadBitScalarReg(EmitContext& ctx) { UNREACHABLE_MSG("Unreachable instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index a8901d8f6..41f2d4514 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -47,12 +47,12 @@ void EmitPrologue(EmitContext& ctx); void EmitEpilogue(EmitContext& ctx); void EmitDiscard(EmitContext& ctx); void EmitDiscardCond(EmitContext& ctx, Id condition); -void EmitStoreFlatbuf(EmitContext& ctx, const IR::Value& data, const IR::Value& offset); void EmitDebugPrint(EmitContext& ctx, IR::Inst* inst, Id arg0, Id arg1, Id arg2, Id arg3, Id arg4); void EmitBarrier(EmitContext& ctx); void EmitWorkgroupMemoryBarrier(EmitContext& ctx); void EmitDeviceMemoryBarrier(EmitContext& ctx); Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg); +void EmitSetUserData(EmitContext& ctx); void EmitGetThreadBitScalarReg(EmitContext& ctx); void EmitSetThreadBitScalarReg(EmitContext& ctx); void EmitGetScalarRegister(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index f48c76395..fe7bd3356 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -102,10 +102,6 @@ void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { throw NotImplementedException("Geometry streams"); } -void EmitStoreFlatbuf(EmitContext& ctx, const IR::Value& data, const IR::Value& offset) { - UNREACHABLE_MSG("StoreFlatbuf not intended for SPIR-V"); -} - void EmitDebugPrint(EmitContext& ctx, IR::Inst* inst, Id fmt, Id arg0, Id arg1, Id arg2, Id arg3) { IR::DebugPrintFlags flags = inst->Flags(); std::array fmt_args = {arg0, arg1, arg2, arg3}; diff --git a/src/shader_recompiler/ir/compute_value/do_nop_functions.h b/src/shader_recompiler/ir/compute_value/do_nop_functions.h index 8b88742a1..66b9833e9 100644 --- a/src/shader_recompiler/ir/compute_value/do_nop_functions.h +++ b/src/shader_recompiler/ir/compute_value/do_nop_functions.h @@ -17,7 +17,6 @@ NOP_FUNCTION(Prologue) NOP_FUNCTION(Epilogue) NOP_FUNCTION(Discard) NOP_FUNCTION(DiscardCond) -NOP_FUNCTION(StoreFlatbuf) NOP_FUNCTION(DebugPrint) NOP_FUNCTION(ReadConst) @@ -45,6 +44,7 @@ NOP_FUNCTION(SharedAtomicOr32) NOP_FUNCTION(SharedAtomicXor32) NOP_FUNCTION(GetUserData) +NOP_FUNCTION(SetUserData) NOP_FUNCTION(GetThreadBitScalarReg) NOP_FUNCTION(SetThreadBitScalarReg) NOP_FUNCTION(GetScalarRegister) diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 77e12c30c..c696a4af5 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -143,6 +143,10 @@ U32 IREmitter::GetUserData(IR::ScalarReg reg) { return Inst(Opcode::GetUserData, reg); } +void IREmitter::SetUserData(const U32& offset, const U32& data) { + Inst(Opcode::SetUserData, offset, data); +} + U1 IREmitter::GetThreadBitScalarReg(IR::ScalarReg reg) { ASSERT(static_cast(reg) < IR::NumScalarRegs); return Inst(Opcode::GetThreadBitScalarReg, reg); @@ -1974,10 +1978,6 @@ void IREmitter::ImageWrite(const Value& handle, const Value& coords, const U32& return Inst(Opcode::CubeFaceIndex, cube_coords); } -void IREmitter::StoreFlatbuf(const U32& data, const U32& offset) { - Inst(Opcode::StoreFlatbuf, data, offset); -} - // Debug print maps to SPIRV's NonSemantic DebugPrintf instruction // Renderdoc will hook in its own implementation of the SPIRV instruction // Renderdoc accepts format specifiers, e.g. %u, listed here: diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 14f06eef8..a95fbde25 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -48,7 +48,6 @@ public: void Epilogue(); void Discard(); void Discard(const U1& cond); - void StoreFlatbuf(const U32& data, const U32& offset); void DebugPrint(const char* fmt, boost::container::small_vector args); void Barrier(); @@ -56,6 +55,7 @@ public: void DeviceMemoryBarrier(); [[nodiscard]] U32 GetUserData(IR::ScalarReg reg); + void SetUserData(const U32& offset, const U32& data); [[nodiscard]] U1 GetThreadBitScalarReg(IR::ScalarReg reg); void SetThreadBitScalarReg(IR::ScalarReg reg, const U1& value); diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index 45b0f3de0..9ff76e2ed 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -100,10 +100,10 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::ImageAtomicOr32: case Opcode::ImageAtomicXor32: case Opcode::ImageAtomicExchange32: - case Opcode::StoreFlatbuf: case Opcode::DebugPrint: case Opcode::EmitVertex: case Opcode::EmitPrimitive: + case Opcode::SetUserData: return true; default: return false; diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index f30c1ee67..ac9ff4196 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -14,7 +14,6 @@ OPCODE(Prologue, Void, OPCODE(Epilogue, Void, ) OPCODE(Discard, Void, ) OPCODE(DiscardCond, Void, U1, ) -OPCODE(StoreFlatbuf, Void, U32, U32 ) OPCODE(DebugPrint, Void, StringLiteral, Opaque, Opaque, Opaque, Opaque, ) // Constant memory operations @@ -48,6 +47,8 @@ OPCODE(SharedAtomicXor32, U32, U32, // Context getters/setters OPCODE(GetUserData, U32, ScalarReg, ) +// We don't use ScalarReg since we do arithmetics on the register index +OPCODE(SetUserData, Void, U32, U32 ) OPCODE(GetThreadBitScalarReg, U1, ScalarReg, ) OPCODE(SetThreadBitScalarReg, Void, ScalarReg, U1, ) OPCODE(GetScalarRegister, U32, ScalarReg, ) diff --git a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp index a2b9a0056..307e72b45 100644 --- a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp +++ b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp @@ -159,7 +159,7 @@ static IR::Program GenerateSrtReadConstsSubProgram(IR::Program& program, PassInf } IR::IREmitter ir(*inst->GetParent(), ++IR::Block::InstructionList::s_iterator_to(*inst)); - ir.StoreFlatbuf(IR::U32(inst), save_offset); + ir.SetUserData(save_offset, IR::U32(inst)); } data.original_inst->SetFlags(1); IR::IREmitter ir(*data.original_inst); From 35d3ee8ecdc5869627d7c360bec78c2ab00f0783 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Thu, 3 Apr 2025 18:52:02 +0200 Subject: [PATCH 36/49] Composite --- .../backend/asm_x64/emit_x64_composite.cpp | 282 ++++++++++++------ .../backend/asm_x64/emit_x64_instructions.h | 108 ++++--- 2 files changed, 245 insertions(+), 145 deletions(-) diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp index d03516dec..910fd2cec 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp @@ -6,9 +6,12 @@ namespace Shader::Backend::X64 { +using namespace Xbyak; +using namespace Xbyak::util; + namespace { -template +template static const Operand& GetSuffleOperand(const Operands& comp1, const Operands& comp2, u32 index) { if (index < N) { return comp1[index]; @@ -55,16 +58,34 @@ void EmitCompositeExtractU32x4(EmitContext& ctx, const Operands& dest, const Ope MovGP(ctx, dest[0], composite[index]); } -void EmitCompositeInsertU32x2(EmitContext& ctx, const Operands& dest, const Operands& object, u32 index) { - MovGP(ctx, dest[index], object[0]); +void EmitCompositeInsertU32x2(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) { + if (index == 0) { + MovGP(ctx, dest[0], object[0]); + MovGP(ctx, dest[1], composite[1]); + } else { + MovGP(ctx, dest[0], composite[0]); + MovGP(ctx, dest[1], object[0]); + } } -void EmitCompositeInsertU32x3(EmitContext& ctx, const Operands& dest, const Operands& object, u32 index) { - MovGP(ctx, dest[index], object[0]); +void EmitCompositeInsertU32x3(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) { + for (u32 i = 0; i < 3; ++i) { + if (i == index) { + MovGP(ctx, dest[i], object[0]); + } else { + MovGP(ctx, dest[i], composite[i]); + } + } } -void EmitCompositeInsertU32x4(EmitContext& ctx, const Operands& dest, const Operands& object, u32 index) { - MovGP(ctx, dest[index], object[0]); +void EmitCompositeInsertU32x4(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) { + for (u32 i = 0; i < 3; ++i) { + if (i == index) { + MovGP(ctx, dest[i], object[0]); + } else { + MovGP(ctx, dest[i], composite[i]); + } + } } void EmitCompositeShuffleU32x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2) { @@ -85,158 +106,245 @@ void EmitCompositeShuffleU32x4(EmitContext& ctx, const Operands& dest, const Ope MovGP(ctx, dest[3], GetSuffleOperand<4>(composite1, composite2, idx4)); } -Id EmitCompositeConstructF16x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { - return EmitCompositeConstruct(ctx, inst, ctx.F16[2], e1, e2); +void EmitCompositeConstructF16x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2) { + MovGP(ctx, dest[0], src1[0]); + MovGP(ctx, dest[1], src2[0]); } -Id EmitCompositeConstructF16x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3) { - return EmitCompositeConstruct(ctx, inst, ctx.F16[3], e1, e2, e3); +void EmitCompositeConstructF16x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3) { + MovGP(ctx, dest[0], src1[0]); + MovGP(ctx, dest[1], src2[0]); + MovGP(ctx, dest[2], src3[0]); } -Id EmitCompositeConstructF16x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4) { - return EmitCompositeConstruct(ctx, inst, ctx.F16[4], e1, e2, e3, e4); +void EmitCompositeConstructF16x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4) { + MovGP(ctx, dest[0], src1[0]); + MovGP(ctx, dest[1], src2[0]); + MovGP(ctx, dest[2], src3[0]); + MovGP(ctx, dest[3], src4[0]); } -Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) { - return ctx.OpCompositeExtract(ctx.F16[1], composite, index); +void EmitCompositeExtractF16x2(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) { + MovGP(ctx, dest[0], composite[index]); } -Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index) { - return ctx.OpCompositeExtract(ctx.F16[1], composite, index); +void EmitCompositeExtractF16x3(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) { + MovGP(ctx, dest[0], composite[index]); } -Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index) { - return ctx.OpCompositeExtract(ctx.F16[1], composite, index); +void EmitCompositeExtractF16x4(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) { + MovGP(ctx, dest[0], composite[index]); } -Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index) { - return ctx.OpCompositeInsert(ctx.F16[2], object, composite, index); +void EmitCompositeInsertF16x2(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) { + if (index == 0) { + MovGP(ctx, dest[0], object[0]); + MovGP(ctx, dest[1], composite[1]); + } else { + MovGP(ctx, dest[0], composite[0]); + MovGP(ctx, dest[1], object[0]); + } } -Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index) { - return ctx.OpCompositeInsert(ctx.F16[3], object, composite, index); +void EmitCompositeInsertF16x3(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) { + for (u32 i = 0; i < 3; ++i) { + if (i == index) { + MovGP(ctx, dest[i], object[0]); + } else { + MovGP(ctx, dest[i], composite[i]); + } + } } -Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index) { - return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index); +void EmitCompositeInsertF16x4(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) { + for (u32 i = 0; i < 4; ++i) { + if (i == index) { + MovGP(ctx, dest[i], object[0]); + } else { + MovGP(ctx, dest[i], composite[i]); + } + } } -Id EmitCompositeShuffleF16x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) { - return ctx.OpVectorShuffle(ctx.F16[2], composite1, composite2, comp0, comp1); +void EmitCompositeShuffleF16x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2) { + MovGP(ctx, dest[0], GetSuffleOperand<2>(composite1, composite2, idx1)); + MovGP(ctx, dest[1], GetSuffleOperand<2>(composite1, composite2, idx2)); } -Id EmitCompositeShuffleF16x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, - u32 comp2) { - return ctx.OpVectorShuffle(ctx.F16[3], composite1, composite2, comp0, comp1, comp2); +void EmitCompositeShuffleF16x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3) { + MovGP(ctx, dest[0], GetSuffleOperand<3>(composite1, composite2, idx1)); + MovGP(ctx, dest[1], GetSuffleOperand<3>(composite1, composite2, idx2)); + MovGP(ctx, dest[2], GetSuffleOperand<3>(composite1, composite2, idx3)); } -Id EmitCompositeShuffleF16x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, - u32 comp2, u32 comp3) { - return ctx.OpVectorShuffle(ctx.F16[4], composite1, composite2, comp0, comp1, comp2, comp3); +void EmitCompositeShuffleF16x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4) { + MovGP(ctx, dest[0], GetSuffleOperand<4>(composite1, composite2, idx1)); + MovGP(ctx, dest[1], GetSuffleOperand<4>(composite1, composite2, idx2)); + MovGP(ctx, dest[2], GetSuffleOperand<4>(composite1, composite2, idx3)); + MovGP(ctx, dest[3], GetSuffleOperand<4>(composite1, composite2, idx4)); } -Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { - return EmitCompositeConstruct(ctx, inst, ctx.F32[2], e1, e2); +void EmitCompositeConstructF32x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2) { + MovFloat(ctx, dest[0], src1[0]); + MovFloat(ctx, dest[1], src2[0]); } -Id EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3) { - return EmitCompositeConstruct(ctx, inst, ctx.F32[3], e1, e2, e3); +void EmitCompositeConstructF32x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3) { + MovFloat(ctx, dest[0], src1[0]); + MovFloat(ctx, dest[1], src2[0]); + MovFloat(ctx, dest[2], src3[0]); } -Id EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4) { - return EmitCompositeConstruct(ctx, inst, ctx.F32[4], e1, e2, e3, e4); +void EmitCompositeConstructF32x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4) { + MovFloat(ctx, dest[0], src1[0]); + MovFloat(ctx, dest[1], src2[0]); + MovFloat(ctx, dest[2], src3[0]); + MovFloat(ctx, dest[3], src4[0]); } -Id EmitCompositeConstructF32x2x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { - return EmitCompositeConstruct(ctx, inst, ctx.F32[4], e1, e2); +void EmitCompositeConstructF32x2x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2) { + MovFloat(ctx, dest[0], src1[0]); + MovFloat(ctx, dest[1], src2[0]); + MovFloat(ctx, dest[2], src1[1]); + MovFloat(ctx, dest[3], src2[1]); } -Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) { - return ctx.OpCompositeExtract(ctx.F32[1], composite, index); +void EmitCompositeExtractF32x2(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) { + MovFloat(ctx, dest[0], composite[index]); } -Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index) { - return ctx.OpCompositeExtract(ctx.F32[1], composite, index); +void EmitCompositeExtractF32x3(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) { + MovFloat(ctx, dest[0], composite[index]); } -Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index) { - return ctx.OpCompositeExtract(ctx.F32[1], composite, index); +void EmitCompositeExtractF32x4(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) { + MovFloat(ctx, dest[0], composite[index]); } -Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index) { - return ctx.OpCompositeInsert(ctx.F32[2], object, composite, index); +void EmitCompositeInsertF32x2(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) { + if (index == 0) { + MovFloat(ctx, dest[0], object[0]); + MovFloat(ctx, dest[1], composite[1]); + } else { + MovFloat(ctx, dest[0], composite[0]); + MovFloat(ctx, dest[1], object[0]); + } } -Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index) { - return ctx.OpCompositeInsert(ctx.F32[3], object, composite, index); +void EmitCompositeInsertF32x3(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) { + for (u32 i = 0; i < 3; ++i) { + if (i == index) { + MovFloat(ctx, dest[i], object[0]); + } else { + MovFloat(ctx, dest[i], composite[i]); + } + } } -Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index) { - return ctx.OpCompositeInsert(ctx.F32[4], object, composite, index); +void EmitCompositeInsertF32x4(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) { + for (u32 i = 0; i < 4; ++i) { + if (i == index) { + MovFloat(ctx, dest[i], object[0]); + } else { + MovFloat(ctx, dest[i], composite[i]); + } + } } -Id EmitCompositeShuffleF32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) { - return ctx.OpVectorShuffle(ctx.F32[2], composite1, composite2, comp0, comp1); +void EmitCompositeShuffleF32x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2) { + MovFloat(ctx, dest[0], GetSuffleOperand<2>(composite1, composite2, idx1)); + MovFloat(ctx, dest[1], GetSuffleOperand<2>(composite1, composite2, idx2)); } -Id EmitCompositeShuffleF32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, - u32 comp2) { - return ctx.OpVectorShuffle(ctx.F32[3], composite1, composite2, comp0, comp1, comp2); +void EmitCompositeShuffleF32x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3) { + MovFloat(ctx, dest[0], GetSuffleOperand<3>(composite1, composite2, idx1)); + MovFloat(ctx, dest[1], GetSuffleOperand<3>(composite1, composite2, idx2)); + MovFloat(ctx, dest[2], GetSuffleOperand<3>(composite1, composite2, idx3)); } -Id EmitCompositeShuffleF32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, - u32 comp2, u32 comp3) { - return ctx.OpVectorShuffle(ctx.F32[4], composite1, composite2, comp0, comp1, comp2, comp3); +void EmitCompositeShuffleF32x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4) { + MovFloat(ctx, dest[0], GetSuffleOperand<4>(composite1, composite2, idx1)); + MovFloat(ctx, dest[1], GetSuffleOperand<4>(composite1, composite2, idx2)); + MovFloat(ctx, dest[2], GetSuffleOperand<4>(composite1, composite2, idx3)); + MovFloat(ctx, dest[3], GetSuffleOperand<4>(composite1, composite2, idx4)); } -void EmitCompositeConstructF64x2(EmitContext&) { - UNREACHABLE_MSG("SPIR-V Instruction"); +void EmitCompositeConstructF64x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2) { + MovDouble(ctx, dest[0], src1[0]); + MovDouble(ctx, dest[1], src2[0]); } -void EmitCompositeConstructF64x3(EmitContext&) { - UNREACHABLE_MSG("SPIR-V Instruction"); +void EmitCompositeConstructF64x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3) { + MovDouble(ctx, dest[0], src1[0]); + MovDouble(ctx, dest[1], src2[0]); + MovDouble(ctx, dest[2], src3[0]); } -void EmitCompositeConstructF64x4(EmitContext&) { - UNREACHABLE_MSG("SPIR-V Instruction"); +void EmitCompositeConstructF64x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4) { + MovDouble(ctx, dest[0], src1[0]); + MovDouble(ctx, dest[1], src2[0]); + MovDouble(ctx, dest[2], src3[0]); + MovDouble(ctx, dest[3], src4[0]); } -void EmitCompositeExtractF64x2(EmitContext&) { - UNREACHABLE_MSG("SPIR-V Instruction"); +void EmitCompositeExtractF64x2(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) { + MovDouble(ctx, dest[0], composite[index]); } -void EmitCompositeExtractF64x3(EmitContext&) { - UNREACHABLE_MSG("SPIR-V Instruction"); +void EmitCompositeExtractF64x3(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) { + MovDouble(ctx, dest[0], composite[index]); } -void EmitCompositeExtractF64x4(EmitContext&) { - UNREACHABLE_MSG("SPIR-V Instruction"); +void EmitCompositeExtractF64x4(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) { + MovDouble(ctx, dest[0], composite[index]); } -Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index) { - return ctx.OpCompositeInsert(ctx.F64[2], object, composite, index); +void EmitCompositeInsertF64x2(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) { + if (index == 0) { + MovDouble(ctx, dest[0], object[0]); + MovDouble(ctx, dest[1], composite[1]); + } else { + MovDouble(ctx, dest[0], composite[0]); + MovDouble(ctx, dest[1], object[0]); + } } -Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index) { - return ctx.OpCompositeInsert(ctx.F64[3], object, composite, index); +void EmitCompositeInsertF64x3(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) { + for (u32 i = 0; i < 3; ++i) { + if (i == index) { + MovDouble(ctx, dest[i], object[0]); + } else { + MovDouble(ctx, dest[i], composite[i]); + } + } } -Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index) { - return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index); +void EmitCompositeInsertF64x4(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index) { + for (u32 i = 0; i < 4; ++i) { + if (i == index) { + MovDouble(ctx, dest[i], object[0]); + } else { + MovDouble(ctx, dest[i], composite[i]); + } + } } -Id EmitCompositeShuffleF64x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) { - return ctx.OpVectorShuffle(ctx.F64[2], composite1, composite2, comp0, comp1); +void EmitCompositeShuffleF64x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2) { + MovDouble(ctx, dest[0], GetSuffleOperand<2>(composite1, composite2, idx1)); + MovDouble(ctx, dest[1], GetSuffleOperand<2>(composite1, composite2, idx2)); } -Id EmitCompositeShuffleF64x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, - u32 comp2) { - return ctx.OpVectorShuffle(ctx.F64[3], composite1, composite2, comp0, comp1, comp2); +void EmitCompositeShuffleF64x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3) { + MovDouble(ctx, dest[0], GetSuffleOperand<3>(composite1, composite2, idx1)); + MovDouble(ctx, dest[1], GetSuffleOperand<3>(composite1, composite2, idx2)); + MovDouble(ctx, dest[2], GetSuffleOperand<3>(composite1, composite2, idx3)); } -Id EmitCompositeShuffleF64x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, - u32 comp2, u32 comp3) { - return ctx.OpVectorShuffle(ctx.F64[4], composite1, composite2, comp0, comp1, comp2, comp3); +void EmitCompositeShuffleF64x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4) { + MovDouble(ctx, dest[0], GetSuffleOperand<4>(composite1, composite2, idx1)); + MovDouble(ctx, dest[1], GetSuffleOperand<4>(composite1, composite2, idx2)); + MovDouble(ctx, dest[2], GetSuffleOperand<4>(composite1, composite2, idx3)); + MovDouble(ctx, dest[3], GetSuffleOperand<4>(composite1, composite2, idx4)); } } \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h b/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h index 51970986d..6c086553b 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h @@ -132,64 +132,56 @@ void EmitSharedAtomicSMin32(EmitContext& ctx); void EmitSharedAtomicAnd32(EmitContext& ctx); void EmitSharedAtomicOr32(EmitContext& ctx); void EmitSharedAtomicXor32(EmitContext& ctx); -Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); -Id EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3); -Id EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4); -Id EmitCompositeConstructU32x2x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); -Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeShuffleU32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); -Id EmitCompositeShuffleU32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, - u32 comp2); -Id EmitCompositeShuffleU32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, - u32 comp2, u32 comp3); -Id EmitCompositeConstructF16x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); -Id EmitCompositeConstructF16x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3); -Id EmitCompositeConstructF16x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4); -Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeShuffleF16x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); -Id EmitCompositeShuffleF16x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, - u32 comp2); -Id EmitCompositeShuffleF16x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, - u32 comp2, u32 comp3); -Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); -Id EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3); -Id EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4); -Id EmitCompositeConstructF32x2x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); -Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeShuffleF32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); -Id EmitCompositeShuffleF32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, - u32 comp2); -Id EmitCompositeShuffleF32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, - u32 comp2, u32 comp3); -void EmitCompositeConstructF64x2(EmitContext& ctx); -void EmitCompositeConstructF64x3(EmitContext& ctx); -void EmitCompositeConstructF64x4(EmitContext& ctx); -void EmitCompositeExtractF64x2(EmitContext& ctx); -void EmitCompositeExtractF64x3(EmitContext& ctx); -void EmitCompositeExtractF64x4(EmitContext& ctx); -Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeShuffleF64x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); -Id EmitCompositeShuffleF64x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, - u32 comp2); -Id EmitCompositeShuffleF64x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, - u32 comp2, u32 comp3); +void EmitCompositeConstructU32x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2); +void EmitCompositeConstructU32x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3); +void EmitCompositeConstructU32x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4); +void EmitCompositeConstructU32x2x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2); +void EmitCompositeExtractU32x2(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index); +void EmitCompositeExtractU32x3(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index); +void EmitCompositeExtractU32x4(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index); +void EmitCompositeInsertU32x2(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index); +void EmitCompositeInsertU32x3(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index); +void EmitCompositeInsertU32x4(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index); +void EmitCompositeShuffleU32x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2); +void EmitCompositeShuffleU32x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3); +void EmitCompositeShuffleU32x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4); +void EmitCompositeConstructF16x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2); +void EmitCompositeConstructF16x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3); +void EmitCompositeConstructF16x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4); +void EmitCompositeExtractF16x2(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index); +void EmitCompositeExtractF16x3(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index); +void EmitCompositeExtractF16x4(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index); +void EmitCompositeInsertF16x2(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index); +void EmitCompositeInsertF16x3(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index); +void EmitCompositeInsertF16x4(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index); +void EmitCompositeShuffleF16x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2); +void EmitCompositeShuffleF16x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3); +void EmitCompositeShuffleF16x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4); +void EmitCompositeConstructF32x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2); +void EmitCompositeConstructF32x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3); +void EmitCompositeConstructF32x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4); +void EmitCompositeConstructF32x2x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2); +void EmitCompositeExtractF32x2(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index); +void EmitCompositeExtractF32x3(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index); +void EmitCompositeExtractF32x4(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index); +void EmitCompositeInsertF32x2(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index); +void EmitCompositeInsertF32x3(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index); +void EmitCompositeInsertF32x4(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index); +void EmitCompositeShuffleF32x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2); +void EmitCompositeShuffleF32x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3); +void EmitCompositeShuffleF32x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4); +void EmitCompositeConstructF64x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2); +void EmitCompositeConstructF64x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3); +void EmitCompositeConstructF64x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4); +void EmitCompositeExtractF64x2(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index); +void EmitCompositeExtractF64x3(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index); +void EmitCompositeExtractF64x4(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index); +void EmitCompositeInsertF64x2(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index); +void EmitCompositeInsertF64x3(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index); +void EmitCompositeInsertF64x4(EmitContext& ctx, const Operands& dest, const Operands& composite, const Operands& object, u32 index); +void EmitCompositeShuffleF64x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2); +void EmitCompositeShuffleF64x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3); +void EmitCompositeShuffleF64x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4); Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); From 2584ec2d76519ae0893dfe20fa1407fe509f4df1 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Thu, 3 Apr 2025 19:19:22 +0200 Subject: [PATCH 37/49] Copy context get set --- CMakeLists.txt | 1 + .../asm_x64/emit_x64_context_get_set.cpp | 537 ++++++++++++++++++ .../spirv/emit_spirv_context_get_set.cpp | 1 - 3 files changed, 538 insertions(+), 1 deletion(-) create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 14ca4ded6..ddef52f92 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -903,6 +903,7 @@ if (ARCHITECTURE STREQUAL "x86_64") src/shader_recompiler/backend/asm_x64/emit_x64_barrier.cpp src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp + src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h src/shader_recompiler/backend/asm_x64/emit_x64_shared_memory.cpp diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp new file mode 100644 index 000000000..5486f0179 --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp @@ -0,0 +1,537 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" + +namespace Shader::Backend::X64 { + +using namespace Xbyak; +using namespace Xbyak::util; + +Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg) { + const u32 index = ctx.binding.user_data + ctx.info.ud_mask.Index(reg); + const u32 half = PushData::UdRegsIndex + (index >> 2); + const Id ud_ptr{ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]), + ctx.push_data_block, ctx.ConstU32(half), + ctx.ConstU32(index & 3))}; + const Id ud_reg{ctx.OpLoad(ctx.U32[1], ud_ptr)}; + ctx.Name(ud_reg, fmt::format("ud_{}", u32(reg))); + return ud_reg; +} + +void EmitSetUserData(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitGetThreadBitScalarReg(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitSetThreadBitScalarReg(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitGetScalarRegister(EmitContext&) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitSetScalarRegister(EmitContext&) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitGetVectorRegister(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitSetVectorRegister(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitSetGotoVariable(EmitContext&) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitGetGotoVariable(EmitContext&) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +using BufferAlias = EmitContext::BufferAlias; + +Id EmitReadConst(EmitContext& ctx, IR::Inst* inst) { + const auto& srt_flatbuf = ctx.buffers.back(); + ASSERT(srt_flatbuf.binding >= 0 && srt_flatbuf.buffer_type == BufferType::ReadConstUbo); + const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32]; + const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.Def(inst->Arg(1)))}; + return ctx.OpLoad(ctx.U32[1], ptr); +} + +Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) { + const auto& buffer = ctx.buffers[handle]; + index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords); + const auto [id, pointer_type] = buffer[BufferAlias::U32]; + const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; + const Id result{ctx.OpLoad(ctx.U32[1], ptr)}; + + if (Sirit::ValidId(buffer.size_dwords)) { + const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, buffer.size_dwords); + return ctx.OpSelect(ctx.U32[1], in_bounds, result, ctx.u32_zero_value); + } else { + return result; + } +} + +Id EmitReadStepRate(EmitContext& ctx, int rate_idx) { + const auto index{rate_idx == 0 ? PushData::Step0Index : PushData::Step1Index}; + return ctx.OpLoad( + ctx.U32[1], ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]), + ctx.push_data_block, ctx.ConstU32(index))); +} + +static Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) { + if (IR::IsPosition(attr)) { + ASSERT(attr == IR::Attribute::Position0); + const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); + const auto pointer{ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, index, ctx.ConstU32(0u))}; + const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); + } + + if (IR::IsParam(attr)) { + const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)}; + const auto param = ctx.input_params.at(param_id).id; + const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); + const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)}; + const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); + } + UNREACHABLE(); +} + +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) { + if (ctx.info.l_stage == LogicalStage::Geometry) { + return EmitGetAttributeForGeometry(ctx, attr, comp, index); + } else if (ctx.info.l_stage == LogicalStage::TessellationControl || + ctx.info.l_stage == LogicalStage::TessellationEval) { + if (IR::IsTessCoord(attr)) { + const u32 component = attr == IR::Attribute::TessellationEvaluationPointU ? 0 : 1; + const auto component_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); + const auto pointer{ + ctx.OpAccessChain(component_ptr, ctx.tess_coord, ctx.ConstU32(component))}; + return ctx.OpLoad(ctx.F32[1], pointer); + } + UNREACHABLE(); + } + + if (IR::IsParam(attr)) { + const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; + const auto& param{ctx.input_params.at(index)}; + if (param.buffer_handle >= 0) { + const auto step_rate = EmitReadStepRate(ctx, param.id.value); + const auto offset = ctx.OpIAdd( + ctx.U32[1], + ctx.OpIMul( + ctx.U32[1], + ctx.OpUDiv(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id), step_rate), + ctx.ConstU32(param.num_components)), + ctx.ConstU32(comp)); + return EmitReadConstBuffer(ctx, param.buffer_handle, offset); + } + + Id result; + if (param.is_loaded) { + // Attribute is either default or manually interpolated. The id points to an already + // loaded vector. + result = ctx.OpCompositeExtract(param.component_type, param.id, comp); + } else if (param.num_components > 1) { + // Attribute is a vector and we need to access a specific component. + const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))}; + result = ctx.OpLoad(param.component_type, pointer); + } else { + // Attribute is a single float or interger, simply load it. + result = ctx.OpLoad(param.component_type, param.id); + } + if (param.is_integer) { + result = ctx.OpBitcast(ctx.F32[1], result); + } + return result; + } + + switch (attr) { + case IR::Attribute::FragCoord: { + const Id coord = ctx.OpLoad( + ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.frag_coord, ctx.ConstU32(comp))); + if (comp == 3) { + return ctx.OpFDiv(ctx.F32[1], ctx.ConstF32(1.f), coord); + } + return coord; + } + case IR::Attribute::TessellationEvaluationPointU: + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value)); + case IR::Attribute::TessellationEvaluationPointV: + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.ConstU32(1U))); + default: + UNREACHABLE_MSG("Read attribute {}", attr); + } +} + +Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) { + switch (attr) { + case IR::Attribute::VertexId: + return ctx.OpLoad(ctx.U32[1], ctx.vertex_index); + case IR::Attribute::InstanceId: + return ctx.OpLoad(ctx.U32[1], ctx.instance_id); + case IR::Attribute::InstanceId0: + return EmitReadStepRate(ctx, 0); + case IR::Attribute::InstanceId1: + return EmitReadStepRate(ctx, 1); + case IR::Attribute::WorkgroupIndex: + return ctx.workgroup_index_id; + case IR::Attribute::WorkgroupId: + return ctx.OpCompositeExtract(ctx.U32[1], ctx.OpLoad(ctx.U32[3], ctx.workgroup_id), comp); + case IR::Attribute::LocalInvocationId: + return ctx.OpCompositeExtract(ctx.U32[1], ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id), + comp); + case IR::Attribute::IsFrontFace: + return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1[1], ctx.front_facing), ctx.u32_one_value, + ctx.u32_zero_value); + case IR::Attribute::PrimitiveId: + return ctx.OpLoad(ctx.U32[1], ctx.primitive_id); + case IR::Attribute::InvocationId: + ASSERT(ctx.info.l_stage == LogicalStage::Geometry || + ctx.info.l_stage == LogicalStage::TessellationControl); + return ctx.OpLoad(ctx.U32[1], ctx.invocation_id); + case IR::Attribute::PatchVertices: + ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl); + return ctx.OpLoad(ctx.U32[1], ctx.patch_vertices); + case IR::Attribute::PackedHullInvocationInfo: { + ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl); + // [0:8]: patch id within VGT + // [8:12]: output control point id + // But 0:8 should be treated as 0 for attribute addressing purposes + if (ctx.runtime_info.hs_info.IsPassthrough()) { + // Gcn shader would run with 1 thread, but we need to run a thread for + // each output control point. + // If Gcn shader uses this value, we should make sure all threads in the + // Vulkan shader use 0 + return ctx.ConstU32(0u); + } else { + const Id invocation_id = ctx.OpLoad(ctx.U32[1], ctx.invocation_id); + return ctx.OpShiftLeftLogical(ctx.U32[1], invocation_id, ctx.ConstU32(8u)); + } + } + default: + UNREACHABLE_MSG("Read U32 attribute {}", attr); + } +} + +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element) { + if (attr == IR::Attribute::Position1) { + LOG_WARNING(Render_Vulkan, "Ignoring pos1 export"); + return; + } + const Id pointer{OutputAttrPointer(ctx, attr, element)}; + const auto component_type{OutputAttrComponentType(ctx, attr)}; + if (component_type.second) { + ctx.OpStore(pointer, ctx.OpBitcast(component_type.first, value)); + } else { + ctx.OpStore(pointer, value); + } +} + +Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index) { + const auto attr_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); + return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(attr_comp_ptr, ctx.input_attr_array, + vertex_index, attr_index, comp_index)); +} + +Id EmitReadTcsGenericOuputAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, + Id comp_index) { + const auto attr_comp_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]); + return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(attr_comp_ptr, ctx.output_attr_array, + vertex_index, attr_index, comp_index)); +} + +void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index) { + // Implied vertex index is invocation_id + const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]); + Id pointer = + ctx.OpAccessChain(component_ptr, ctx.output_attr_array, + ctx.OpLoad(ctx.U32[1], ctx.invocation_id), attr_index, comp_index); + ctx.OpStore(pointer, value); +} + +Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) { + const u32 index{IR::GenericPatchIndex(patch)}; + const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))}; + const Id type{ctx.l_stage == LogicalStage::TessellationControl ? ctx.output_f32 + : ctx.input_f32}; + const Id pointer{ctx.OpAccessChain(type, ctx.patches.at(index), element)}; + return ctx.OpLoad(ctx.F32[1], pointer); +} + +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) { + const Id pointer{[&] { + if (IR::IsGeneric(patch)) { + const u32 index{IR::GenericPatchIndex(patch)}; + const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))}; + return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element); + } + switch (patch) { + case IR::Patch::TessellationLodLeft: + case IR::Patch::TessellationLodRight: + case IR::Patch::TessellationLodTop: + case IR::Patch::TessellationLodBottom: { + const u32 index{static_cast(patch) - u32(IR::Patch::TessellationLodLeft)}; + const Id index_id{ctx.ConstU32(index)}; + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id); + } + case IR::Patch::TessellationLodInteriorU: + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, + ctx.u32_zero_value); + case IR::Patch::TessellationLodInteriorV: + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, ctx.ConstU32(1u)); + default: + UNREACHABLE_MSG("Patch {}", u32(patch)); + } + }()}; + ctx.OpStore(pointer, value); +} + +template +static Id EmitLoadBufferBoundsCheck(EmitContext& ctx, Id index, Id buffer_size, Id result, + bool is_float) { + if (Sirit::ValidId(buffer_size)) { + // Bounds checking enabled, wrap in a select. + const auto result_type = is_float ? ctx.F32[N] : ctx.U32[N]; + auto compare_index = index; + auto zero_value = is_float ? ctx.f32_zero_value : ctx.u32_zero_value; + if (N > 1) { + compare_index = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(N - 1)); + std::array zero_ids; + zero_ids.fill(zero_value); + zero_value = ctx.ConstantComposite(result_type, zero_ids); + } + const Id in_bounds = ctx.OpULessThan(ctx.U1[1], compare_index, buffer_size); + return ctx.OpSelect(result_type, in_bounds, result, zero_value); + } + // Bounds checking not enabled, just return the plain value. + return result; +} + +template +static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { + const auto flags = inst->Flags(); + const auto& spv_buffer = ctx.buffers[handle]; + if (Sirit::ValidId(spv_buffer.offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); + } + const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); + const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32; + const auto [id, pointer_type] = spv_buffer[alias]; + + boost::container::static_vector ids; + for (u32 i = 0; i < N; i++) { + const Id index_i = i == 0 ? index : ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i)); + const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i); + const Id result_i = ctx.OpLoad(data_types[1], ptr_i); + if (!flags.typed) { + // Untyped loads have bounds checking per-component. + ids.push_back(EmitLoadBufferBoundsCheck<1>(ctx, index_i, spv_buffer.size_dwords, + result_i, alias == BufferAlias::F32)); + } else { + ids.push_back(result_i); + } + } + + const Id result = N == 1 ? ids[0] : ctx.OpCompositeConstruct(data_types[N], ids); + if (flags.typed) { + // Typed loads have single bounds check for the whole load. + return EmitLoadBufferBoundsCheck(ctx, index, spv_buffer.size_dwords, result, + alias == BufferAlias::F32); + } + return result; +} + +Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { + const auto& spv_buffer = ctx.buffers[handle]; + if (Sirit::ValidId(spv_buffer.offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); + } + const auto [id, pointer_type] = spv_buffer[BufferAlias::U8]; + const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)}; + const Id result{ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, ptr))}; + return EmitLoadBufferBoundsCheck<1>(ctx, address, spv_buffer.size, result, false); +} + +Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { + const auto& spv_buffer = ctx.buffers[handle]; + if (Sirit::ValidId(spv_buffer.offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); + } + const auto [id, pointer_type] = spv_buffer[BufferAlias::U16]; + const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u)); + const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; + const Id result{ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, ptr))}; + return EmitLoadBufferBoundsCheck<1>(ctx, index, spv_buffer.size_shorts, result, false); +} + +Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { + return EmitLoadBufferB32xN<1, BufferAlias::U32>(ctx, inst, handle, address); +} + +Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { + return EmitLoadBufferB32xN<2, BufferAlias::U32>(ctx, inst, handle, address); +} + +Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { + return EmitLoadBufferB32xN<3, BufferAlias::U32>(ctx, inst, handle, address); +} + +Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { + return EmitLoadBufferB32xN<4, BufferAlias::U32>(ctx, inst, handle, address); +} + +Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { + return EmitLoadBufferB32xN<1, BufferAlias::F32>(ctx, inst, handle, address); +} + +Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { + return EmitLoadBufferB32xN<2, BufferAlias::F32>(ctx, inst, handle, address); +} + +Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { + return EmitLoadBufferB32xN<3, BufferAlias::F32>(ctx, inst, handle, address); +} + +Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { + return EmitLoadBufferB32xN<4, BufferAlias::F32>(ctx, inst, handle, address); +} + +Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { + UNREACHABLE_MSG("SPIR-V instruction"); +} + +template +void EmitStoreBufferBoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto emit_func) { + if (Sirit::ValidId(buffer_size)) { + // Bounds checking enabled, wrap in a conditional branch. + auto compare_index = index; + if (N > 1) { + index = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(N - 1)); + } + const Id in_bounds = ctx.OpULessThan(ctx.U1[1], compare_index, buffer_size); + const Id in_bounds_label = ctx.OpLabel(); + const Id merge_label = ctx.OpLabel(); + ctx.OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone); + ctx.OpBranchConditional(in_bounds, in_bounds_label, merge_label); + ctx.AddLabel(in_bounds_label); + emit_func(); + ctx.OpBranch(merge_label); + ctx.AddLabel(merge_label); + return; + } + // Bounds checking not enabled, just perform the store. + emit_func(); +} + +template +static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, + Id value) { + const auto flags = inst->Flags(); + const auto& spv_buffer = ctx.buffers[handle]; + if (Sirit::ValidId(spv_buffer.offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); + } + const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); + const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32; + const auto [id, pointer_type] = spv_buffer[alias]; + + auto store = [&] { + for (u32 i = 0; i < N; i++) { + const Id index_i = i == 0 ? index : ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i)); + const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i); + const Id value_i = N == 1 ? value : ctx.OpCompositeExtract(data_types[1], value, i); + auto store_i = [&]() { ctx.OpStore(ptr_i, value_i); }; + if (!flags.typed) { + // Untyped stores have bounds checking per-component. + EmitStoreBufferBoundsCheck<1>(ctx, index_i, spv_buffer.size_dwords, store_i); + } else { + store_i(); + } + } + }; + + if (flags.typed) { + // Typed stores have single bounds check for the whole store. + EmitStoreBufferBoundsCheck(ctx, index, spv_buffer.size_dwords, store); + } else { + store(); + } +} + +void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) { + const auto& spv_buffer = ctx.buffers[handle]; + if (Sirit::ValidId(spv_buffer.offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); + } + const auto [id, pointer_type] = spv_buffer[BufferAlias::U8]; + const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)}; + const Id result{ctx.OpUConvert(ctx.U8, value)}; + EmitStoreBufferBoundsCheck<1>(ctx, address, spv_buffer.size, [&] { ctx.OpStore(ptr, result); }); +} + +void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) { + const auto& spv_buffer = ctx.buffers[handle]; + if (Sirit::ValidId(spv_buffer.offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); + } + const auto [id, pointer_type] = spv_buffer[BufferAlias::U16]; + const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u)); + const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; + const Id result{ctx.OpUConvert(ctx.U16, value)}; + EmitStoreBufferBoundsCheck<1>(ctx, index, spv_buffer.size_shorts, + [&] { ctx.OpStore(ptr, result); }); +} + +void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + EmitStoreBufferB32xN<1, BufferAlias::U32>(ctx, inst, handle, address, value); +} + +void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + EmitStoreBufferB32xN<2, BufferAlias::U32>(ctx, inst, handle, address, value); +} + +void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + EmitStoreBufferB32xN<3, BufferAlias::U32>(ctx, inst, handle, address, value); +} + +void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + EmitStoreBufferB32xN<4, BufferAlias::U32>(ctx, inst, handle, address, value); +} + +void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + EmitStoreBufferB32xN<1, BufferAlias::F32>(ctx, inst, handle, address, value); +} + +void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + EmitStoreBufferB32xN<2, BufferAlias::F32>(ctx, inst, handle, address, value); +} + +void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + EmitStoreBufferB32xN<3, BufferAlias::F32>(ctx, inst, handle, address, value); +} + +void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + EmitStoreBufferB32xN<4, BufferAlias::F32>(ctx, inst, handle, address, value); +} + +void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + UNREACHABLE_MSG("SPIR-V instruction"); +} + +} \ No newline at end of file diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index fa6d45b0d..76617df9a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -168,7 +168,6 @@ using BufferAlias = EmitContext::BufferAlias; Id EmitReadConst(EmitContext& ctx, IR::Inst* inst) { const auto& srt_flatbuf = ctx.buffers.back(); - ASSERT_MSG(inst->Flags() == 1, "ReadConst was not processed by the flattening pass"); ASSERT(srt_flatbuf.binding >= 0 && srt_flatbuf.buffer_type == BufferType::ReadConstUbo); const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32]; const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.Def(inst->Arg(1)))}; From 3d971701dbadf27c2517401a29e23078bf4178fa Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sat, 5 Apr 2025 02:59:01 +0200 Subject: [PATCH 38/49] Utils, context, convert and ctx get set --- CMakeLists.txt | 1 + .../asm_x64/emit_x64_context_get_set.cpp | 506 +++--------------- .../backend/asm_x64/emit_x64_convert.cpp | 455 ++++++++++++++++ .../backend/asm_x64/emit_x64_instructions.h | 152 +++--- .../backend/asm_x64/x64_emit_context.h | 2 + .../backend/asm_x64/x64_utils.cpp | 14 +- 6 files changed, 629 insertions(+), 501 deletions(-) create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index ddef52f92..c8596f317 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -904,6 +904,7 @@ if (ARCHITECTURE STREQUAL "x86_64") src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp + src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h src/shader_recompiler/backend/asm_x64/emit_x64_shared_memory.cpp diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp index 5486f0179..192570d8f 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp @@ -3,25 +3,26 @@ #include "shader_recompiler/exception.h" #include "shader_recompiler/backend/asm_x64/x64_emit_context.h" +#include "shader_recompiler/backend/asm_x64/x64_utils.h" namespace Shader::Backend::X64 { using namespace Xbyak; using namespace Xbyak::util; -Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg) { - const u32 index = ctx.binding.user_data + ctx.info.ud_mask.Index(reg); - const u32 half = PushData::UdRegsIndex + (index >> 2); - const Id ud_ptr{ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]), - ctx.push_data_block, ctx.ConstU32(half), - ctx.ConstU32(index & 3))}; - const Id ud_reg{ctx.OpLoad(ctx.U32[1], ud_ptr)}; - ctx.Name(ud_reg, fmt::format("ud_{}", u32(reg))); - return ud_reg; +void EmitGetUserData(EmitContext& ctx, const Operands& dest, IR::ScalarReg reg) { + const u32 offset = static_cast(reg) << 2; + Reg& tmp = ctx.TempGPReg(); + ctx.Code().lea(tmp, ptr[ctx.UserData() + offset]); + MovGP( ctx, dest[0], ptr[tmp]); } -void EmitSetUserData(EmitContext& ctx) { - UNREACHABLE_MSG("Unreachable instruction"); +void EmitSetUserData(EmitContext& ctx, const Operands& offset, const Operands& value) { + Reg& tmp = ctx.TempGPReg(); + ctx.Code().mov(tmp, offset[0]); + ctx.Code().shl(tmp, 2); + ctx.Code().lea(tmp, ptr[ctx.UserData() + tmp]); + MovGP(ctx, ptr[tmp], value[0]); } void EmitGetThreadBitScalarReg(EmitContext& ctx) { @@ -56,482 +57,145 @@ void EmitGetGotoVariable(EmitContext&) { UNREACHABLE_MSG("Unreachable instruction"); } -using BufferAlias = EmitContext::BufferAlias; - -Id EmitReadConst(EmitContext& ctx, IR::Inst* inst) { - const auto& srt_flatbuf = ctx.buffers.back(); - ASSERT(srt_flatbuf.binding >= 0 && srt_flatbuf.buffer_type == BufferType::ReadConstUbo); - const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32]; - const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.Def(inst->Arg(1)))}; - return ctx.OpLoad(ctx.U32[1], ptr); -} - -Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) { - const auto& buffer = ctx.buffers[handle]; - index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords); - const auto [id, pointer_type] = buffer[BufferAlias::U32]; - const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; - const Id result{ctx.OpLoad(ctx.U32[1], ptr)}; - - if (Sirit::ValidId(buffer.size_dwords)) { - const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, buffer.size_dwords); - return ctx.OpSelect(ctx.U32[1], in_bounds, result, ctx.u32_zero_value); +void EmitReadConst(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset) { + Reg& tmp = ctx.TempGPReg(false); + ctx.Code().mov(tmp, base[1]); + ctx.Code().shl(tmp, 32); + ctx.Code().or_(tmp, base[0]); + if (offset[0].isMEM()) { + ctx.Code().add(tmp, offset[0]); } else { - return result; + ctx.Code().lea(tmp, ptr[tmp + offset[0].getReg()]); } + MovGP(ctx, dest[0], ptr[tmp]); } -Id EmitReadStepRate(EmitContext& ctx, int rate_idx) { - const auto index{rate_idx == 0 ? PushData::Step0Index : PushData::Step1Index}; - return ctx.OpLoad( - ctx.U32[1], ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]), - ctx.push_data_block, ctx.ConstU32(index))); +void EmitReadConstBuffer(EmitContext& ctx) { + throw NotImplementedException("ReadConstBuffer"); } -static Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) { - if (IR::IsPosition(attr)) { - ASSERT(attr == IR::Attribute::Position0); - const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); - const auto pointer{ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, index, ctx.ConstU32(0u))}; - const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); - return ctx.OpLoad(ctx.F32[1], - ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); - } - - if (IR::IsParam(attr)) { - const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)}; - const auto param = ctx.input_params.at(param_id).id; - const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); - const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)}; - const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); - return ctx.OpLoad(ctx.F32[1], - ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); - } - UNREACHABLE(); +void EmitReadStepRate(EmitContext& ctx) { + throw NotImplementedException("ReadStepRate"); } -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) { - if (ctx.info.l_stage == LogicalStage::Geometry) { - return EmitGetAttributeForGeometry(ctx, attr, comp, index); - } else if (ctx.info.l_stage == LogicalStage::TessellationControl || - ctx.info.l_stage == LogicalStage::TessellationEval) { - if (IR::IsTessCoord(attr)) { - const u32 component = attr == IR::Attribute::TessellationEvaluationPointU ? 0 : 1; - const auto component_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); - const auto pointer{ - ctx.OpAccessChain(component_ptr, ctx.tess_coord, ctx.ConstU32(component))}; - return ctx.OpLoad(ctx.F32[1], pointer); - } - UNREACHABLE(); - } - - if (IR::IsParam(attr)) { - const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; - const auto& param{ctx.input_params.at(index)}; - if (param.buffer_handle >= 0) { - const auto step_rate = EmitReadStepRate(ctx, param.id.value); - const auto offset = ctx.OpIAdd( - ctx.U32[1], - ctx.OpIMul( - ctx.U32[1], - ctx.OpUDiv(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id), step_rate), - ctx.ConstU32(param.num_components)), - ctx.ConstU32(comp)); - return EmitReadConstBuffer(ctx, param.buffer_handle, offset); - } - - Id result; - if (param.is_loaded) { - // Attribute is either default or manually interpolated. The id points to an already - // loaded vector. - result = ctx.OpCompositeExtract(param.component_type, param.id, comp); - } else if (param.num_components > 1) { - // Attribute is a vector and we need to access a specific component. - const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))}; - result = ctx.OpLoad(param.component_type, pointer); - } else { - // Attribute is a single float or interger, simply load it. - result = ctx.OpLoad(param.component_type, param.id); - } - if (param.is_integer) { - result = ctx.OpBitcast(ctx.F32[1], result); - } - return result; - } - - switch (attr) { - case IR::Attribute::FragCoord: { - const Id coord = ctx.OpLoad( - ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.frag_coord, ctx.ConstU32(comp))); - if (comp == 3) { - return ctx.OpFDiv(ctx.F32[1], ctx.ConstF32(1.f), coord); - } - return coord; - } - case IR::Attribute::TessellationEvaluationPointU: - return ctx.OpLoad(ctx.F32[1], - ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value)); - case IR::Attribute::TessellationEvaluationPointV: - return ctx.OpLoad(ctx.F32[1], - ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.ConstU32(1U))); - default: - UNREACHABLE_MSG("Read attribute {}", attr); - } +void EmitGetAttribute(EmitContext& ctx) { + throw NotImplementedException("GetAttribute"); } -Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) { - switch (attr) { - case IR::Attribute::VertexId: - return ctx.OpLoad(ctx.U32[1], ctx.vertex_index); - case IR::Attribute::InstanceId: - return ctx.OpLoad(ctx.U32[1], ctx.instance_id); - case IR::Attribute::InstanceId0: - return EmitReadStepRate(ctx, 0); - case IR::Attribute::InstanceId1: - return EmitReadStepRate(ctx, 1); - case IR::Attribute::WorkgroupIndex: - return ctx.workgroup_index_id; - case IR::Attribute::WorkgroupId: - return ctx.OpCompositeExtract(ctx.U32[1], ctx.OpLoad(ctx.U32[3], ctx.workgroup_id), comp); - case IR::Attribute::LocalInvocationId: - return ctx.OpCompositeExtract(ctx.U32[1], ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id), - comp); - case IR::Attribute::IsFrontFace: - return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1[1], ctx.front_facing), ctx.u32_one_value, - ctx.u32_zero_value); - case IR::Attribute::PrimitiveId: - return ctx.OpLoad(ctx.U32[1], ctx.primitive_id); - case IR::Attribute::InvocationId: - ASSERT(ctx.info.l_stage == LogicalStage::Geometry || - ctx.info.l_stage == LogicalStage::TessellationControl); - return ctx.OpLoad(ctx.U32[1], ctx.invocation_id); - case IR::Attribute::PatchVertices: - ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl); - return ctx.OpLoad(ctx.U32[1], ctx.patch_vertices); - case IR::Attribute::PackedHullInvocationInfo: { - ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl); - // [0:8]: patch id within VGT - // [8:12]: output control point id - // But 0:8 should be treated as 0 for attribute addressing purposes - if (ctx.runtime_info.hs_info.IsPassthrough()) { - // Gcn shader would run with 1 thread, but we need to run a thread for - // each output control point. - // If Gcn shader uses this value, we should make sure all threads in the - // Vulkan shader use 0 - return ctx.ConstU32(0u); - } else { - const Id invocation_id = ctx.OpLoad(ctx.U32[1], ctx.invocation_id); - return ctx.OpShiftLeftLogical(ctx.U32[1], invocation_id, ctx.ConstU32(8u)); - } - } - default: - UNREACHABLE_MSG("Read U32 attribute {}", attr); - } +void EmitGetAttributeU32(EmitContext& ctx) { + throw NotImplementedException("GetAttributeU32"); } -void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element) { - if (attr == IR::Attribute::Position1) { - LOG_WARNING(Render_Vulkan, "Ignoring pos1 export"); - return; - } - const Id pointer{OutputAttrPointer(ctx, attr, element)}; - const auto component_type{OutputAttrComponentType(ctx, attr)}; - if (component_type.second) { - ctx.OpStore(pointer, ctx.OpBitcast(component_type.first, value)); - } else { - ctx.OpStore(pointer, value); - } +void EmitSetAttribute(EmitContext& ctx) { + throw NotImplementedException("SetAttribute"); } -Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index) { - const auto attr_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); - return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(attr_comp_ptr, ctx.input_attr_array, - vertex_index, attr_index, comp_index)); +void EmitGetTessGenericAttribute(EmitContext& ctx) { + throw NotImplementedException("GetTessGenericAttribute"); } -Id EmitReadTcsGenericOuputAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, - Id comp_index) { - const auto attr_comp_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]); - return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(attr_comp_ptr, ctx.output_attr_array, - vertex_index, attr_index, comp_index)); +void EmitReadTcsGenericOuputAttribute(EmitContext& ctx) { + throw NotImplementedException("ReadTcsGenericOuputAttribute"); } -void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index) { - // Implied vertex index is invocation_id - const auto component_ptr = ctx.TypePointer(spv::StorageClass::Output, ctx.F32[1]); - Id pointer = - ctx.OpAccessChain(component_ptr, ctx.output_attr_array, - ctx.OpLoad(ctx.U32[1], ctx.invocation_id), attr_index, comp_index); - ctx.OpStore(pointer, value); +void EmitSetTcsGenericAttribute(EmitContext& ctx) { + throw NotImplementedException("SetTcsGenericAttribute"); } -Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) { - const u32 index{IR::GenericPatchIndex(patch)}; - const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))}; - const Id type{ctx.l_stage == LogicalStage::TessellationControl ? ctx.output_f32 - : ctx.input_f32}; - const Id pointer{ctx.OpAccessChain(type, ctx.patches.at(index), element)}; - return ctx.OpLoad(ctx.F32[1], pointer); +void EmitGetPatch(EmitContext& ctx) { + throw NotImplementedException("GetPatch"); } -void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) { - const Id pointer{[&] { - if (IR::IsGeneric(patch)) { - const u32 index{IR::GenericPatchIndex(patch)}; - const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))}; - return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element); - } - switch (patch) { - case IR::Patch::TessellationLodLeft: - case IR::Patch::TessellationLodRight: - case IR::Patch::TessellationLodTop: - case IR::Patch::TessellationLodBottom: { - const u32 index{static_cast(patch) - u32(IR::Patch::TessellationLodLeft)}; - const Id index_id{ctx.ConstU32(index)}; - return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id); - } - case IR::Patch::TessellationLodInteriorU: - return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, - ctx.u32_zero_value); - case IR::Patch::TessellationLodInteriorV: - return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, ctx.ConstU32(1u)); - default: - UNREACHABLE_MSG("Patch {}", u32(patch)); - } - }()}; - ctx.OpStore(pointer, value); +void EmitSetPatch(EmitContext& ctx) { + throw NotImplementedException("SetPatch"); } -template -static Id EmitLoadBufferBoundsCheck(EmitContext& ctx, Id index, Id buffer_size, Id result, - bool is_float) { - if (Sirit::ValidId(buffer_size)) { - // Bounds checking enabled, wrap in a select. - const auto result_type = is_float ? ctx.F32[N] : ctx.U32[N]; - auto compare_index = index; - auto zero_value = is_float ? ctx.f32_zero_value : ctx.u32_zero_value; - if (N > 1) { - compare_index = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(N - 1)); - std::array zero_ids; - zero_ids.fill(zero_value); - zero_value = ctx.ConstantComposite(result_type, zero_ids); - } - const Id in_bounds = ctx.OpULessThan(ctx.U1[1], compare_index, buffer_size); - return ctx.OpSelect(result_type, in_bounds, result, zero_value); - } - // Bounds checking not enabled, just return the plain value. - return result; +void EmitLoadBufferU8(EmitContext& ctx) { + throw NotImplementedException("LoadBufferU8"); } -template -static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - const auto flags = inst->Flags(); - const auto& spv_buffer = ctx.buffers[handle]; - if (Sirit::ValidId(spv_buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); - } - const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); - const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32; - const auto [id, pointer_type] = spv_buffer[alias]; - - boost::container::static_vector ids; - for (u32 i = 0; i < N; i++) { - const Id index_i = i == 0 ? index : ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i)); - const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i); - const Id result_i = ctx.OpLoad(data_types[1], ptr_i); - if (!flags.typed) { - // Untyped loads have bounds checking per-component. - ids.push_back(EmitLoadBufferBoundsCheck<1>(ctx, index_i, spv_buffer.size_dwords, - result_i, alias == BufferAlias::F32)); - } else { - ids.push_back(result_i); - } - } - - const Id result = N == 1 ? ids[0] : ctx.OpCompositeConstruct(data_types[N], ids); - if (flags.typed) { - // Typed loads have single bounds check for the whole load. - return EmitLoadBufferBoundsCheck(ctx, index, spv_buffer.size_dwords, result, - alias == BufferAlias::F32); - } - return result; +void EmitLoadBufferU16(EmitContext& ctx) { + throw NotImplementedException("LoadBufferU16"); } -Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - const auto& spv_buffer = ctx.buffers[handle]; - if (Sirit::ValidId(spv_buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); - } - const auto [id, pointer_type] = spv_buffer[BufferAlias::U8]; - const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)}; - const Id result{ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, ptr))}; - return EmitLoadBufferBoundsCheck<1>(ctx, address, spv_buffer.size, result, false); +void EmitLoadBufferU32(EmitContext& ctx) { + throw NotImplementedException("LoadBufferU32"); } -Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - const auto& spv_buffer = ctx.buffers[handle]; - if (Sirit::ValidId(spv_buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); - } - const auto [id, pointer_type] = spv_buffer[BufferAlias::U16]; - const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u)); - const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; - const Id result{ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, ptr))}; - return EmitLoadBufferBoundsCheck<1>(ctx, index, spv_buffer.size_shorts, result, false); +void EmitLoadBufferU32x2(EmitContext& ctx) { + throw NotImplementedException("LoadBufferU32x2"); } -Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - return EmitLoadBufferB32xN<1, BufferAlias::U32>(ctx, inst, handle, address); +void EmitLoadBufferU32x3(EmitContext& ctx) { + throw NotImplementedException("LoadBufferU32x3"); } -Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - return EmitLoadBufferB32xN<2, BufferAlias::U32>(ctx, inst, handle, address); +void EmitLoadBufferU32x4(EmitContext& ctx) { + throw NotImplementedException("LoadBufferU32x4"); } -Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - return EmitLoadBufferB32xN<3, BufferAlias::U32>(ctx, inst, handle, address); +void EmitLoadBufferF32(EmitContext& ctx) { + throw NotImplementedException("LoadBufferF32"); } -Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - return EmitLoadBufferB32xN<4, BufferAlias::U32>(ctx, inst, handle, address); +void EmitLoadBufferF32x2(EmitContext& ctx) { + throw NotImplementedException("LoadBufferF32x2"); } -Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - return EmitLoadBufferB32xN<1, BufferAlias::F32>(ctx, inst, handle, address); +void EmitLoadBufferF32x3(EmitContext& ctx) { + throw NotImplementedException("LoadBufferF32x3"); } -Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - return EmitLoadBufferB32xN<2, BufferAlias::F32>(ctx, inst, handle, address); +void EmitLoadBufferF32x4(EmitContext& ctx) { + throw NotImplementedException("LoadBufferF32x4"); } -Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - return EmitLoadBufferB32xN<3, BufferAlias::F32>(ctx, inst, handle, address); +void EmitLoadBufferFormatF32(EmitContext& ctx) { + throw NotImplementedException("LoadBufferFormatF32"); } -Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - return EmitLoadBufferB32xN<4, BufferAlias::F32>(ctx, inst, handle, address); +void EmitStoreBufferU8(EmitContext& ctx) { + throw NotImplementedException("StoreBufferU8"); } -Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - UNREACHABLE_MSG("SPIR-V instruction"); +void EmitStoreBufferU16(EmitContext& ctx) { + throw NotImplementedException("StoreBufferU16"); } -template -void EmitStoreBufferBoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto emit_func) { - if (Sirit::ValidId(buffer_size)) { - // Bounds checking enabled, wrap in a conditional branch. - auto compare_index = index; - if (N > 1) { - index = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(N - 1)); - } - const Id in_bounds = ctx.OpULessThan(ctx.U1[1], compare_index, buffer_size); - const Id in_bounds_label = ctx.OpLabel(); - const Id merge_label = ctx.OpLabel(); - ctx.OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone); - ctx.OpBranchConditional(in_bounds, in_bounds_label, merge_label); - ctx.AddLabel(in_bounds_label); - emit_func(); - ctx.OpBranch(merge_label); - ctx.AddLabel(merge_label); - return; - } - // Bounds checking not enabled, just perform the store. - emit_func(); +void EmitStoreBufferU32(EmitContext& ctx) { + throw NotImplementedException("StoreBufferU32"); } -template -static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, - Id value) { - const auto flags = inst->Flags(); - const auto& spv_buffer = ctx.buffers[handle]; - if (Sirit::ValidId(spv_buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); - } - const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); - const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32; - const auto [id, pointer_type] = spv_buffer[alias]; - - auto store = [&] { - for (u32 i = 0; i < N; i++) { - const Id index_i = i == 0 ? index : ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i)); - const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i); - const Id value_i = N == 1 ? value : ctx.OpCompositeExtract(data_types[1], value, i); - auto store_i = [&]() { ctx.OpStore(ptr_i, value_i); }; - if (!flags.typed) { - // Untyped stores have bounds checking per-component. - EmitStoreBufferBoundsCheck<1>(ctx, index_i, spv_buffer.size_dwords, store_i); - } else { - store_i(); - } - } - }; - - if (flags.typed) { - // Typed stores have single bounds check for the whole store. - EmitStoreBufferBoundsCheck(ctx, index, spv_buffer.size_dwords, store); - } else { - store(); - } +void EmitStoreBufferU32x2(EmitContext& ctx) { + throw NotImplementedException("StoreBufferU32x2"); } -void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) { - const auto& spv_buffer = ctx.buffers[handle]; - if (Sirit::ValidId(spv_buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); - } - const auto [id, pointer_type] = spv_buffer[BufferAlias::U8]; - const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)}; - const Id result{ctx.OpUConvert(ctx.U8, value)}; - EmitStoreBufferBoundsCheck<1>(ctx, address, spv_buffer.size, [&] { ctx.OpStore(ptr, result); }); +void EmitStoreBufferU32x3(EmitContext& ctx) { + throw NotImplementedException("StoreBufferU32x3"); } -void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) { - const auto& spv_buffer = ctx.buffers[handle]; - if (Sirit::ValidId(spv_buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); - } - const auto [id, pointer_type] = spv_buffer[BufferAlias::U16]; - const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u)); - const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; - const Id result{ctx.OpUConvert(ctx.U16, value)}; - EmitStoreBufferBoundsCheck<1>(ctx, index, spv_buffer.size_shorts, - [&] { ctx.OpStore(ptr, result); }); +void EmitStoreBufferU32x4(EmitContext& ctx) { + throw NotImplementedException("StoreBufferU32x4"); } -void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<1, BufferAlias::U32>(ctx, inst, handle, address, value); +void EmitStoreBufferF32(EmitContext& ctx) { + throw NotImplementedException("StoreBufferF32"); } -void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<2, BufferAlias::U32>(ctx, inst, handle, address, value); +void EmitStoreBufferF32x2(EmitContext& ctx) { + throw NotImplementedException("StoreBufferF32x2"); } -void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<3, BufferAlias::U32>(ctx, inst, handle, address, value); +void EmitStoreBufferF32x3(EmitContext& ctx) { + throw NotImplementedException("StoreBufferF32x3"); } -void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<4, BufferAlias::U32>(ctx, inst, handle, address, value); +void EmitStoreBufferF32x4(EmitContext& ctx) { + throw NotImplementedException("StoreBufferF32x4"); } -void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<1, BufferAlias::F32>(ctx, inst, handle, address, value); -} - -void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<2, BufferAlias::F32>(ctx, inst, handle, address, value); -} - -void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<3, BufferAlias::F32>(ctx, inst, handle, address, value); -} - -void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<4, BufferAlias::F32>(ctx, inst, handle, address, value); -} - -void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - UNREACHABLE_MSG("SPIR-V instruction"); +void EmitStoreBufferFormatF32(EmitContext& ctx) { + throw NotImplementedException("StoreBufferFormatF32"); } } \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp new file mode 100644 index 000000000..52726342e --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp @@ -0,0 +1,455 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" +#include "shader_recompiler/backend/asm_x64/x64_utils.h" + +namespace Shader::Backend::X64 { + +using namespace Xbyak; +using namespace Xbyak::util; + +namespace { + +static void EmitInlineF16ToF32(EmitContext& ctx, const Operand& dest, const Operand& src) { + CodeGenerator& c = ctx.Code(); + Label nonzero_exp, zero_mantissa, norm_loop, norm_done, normal, done; + Reg sign = ctx.TempGPReg().cvt32(); + Reg exponent = ctx.TempGPReg().cvt32(); + Reg mantissa = ctx.TempGPReg().cvt32(); + + c.movzx(mantissa, src); + + // Extract sign, exponent, and mantissa + c.mov(sign, mantissa); + c.and_(sign, 0x8000); + c.shl(sign, 16); + c.mov(exponent, mantissa); + c.and_(exponent, 0x7C00); + c.shr(exponent, 10); + c.and_(mantissa, 0x03FF); + + // Check for zero exponent and mantissa + c.test(exponent, exponent); + c.jnz(nonzero_exp); + c.test(mantissa, mantissa); + c.jz(zero_mantissa); + + // Nromalize subnormal number + c.mov(exponent, 1); + c.L(norm_loop); + c.test(mantissa, 0x400); + c.jnz(norm_done); + c.shl(mantissa, 1); + c.dec(exponent); + c.jmp(norm_loop); + c.L(norm_done); + c.and_(mantissa, 0x03FF); + c.jmp(normal); + + // Zero mantissa + c.L(zero_mantissa); + c.and_(mantissa, sign); + c.jmp(done); + + // Non-zero exponent + c.L(nonzero_exp); + c.cmp(exponent, 0x1F); + c.jne(normal); + + // Infinite or NaN + c.shl(mantissa, 13); + c.or_(mantissa, sign); + c.or_(mantissa, 0x7F800000); + c.jmp(done); + + // Normal number + c.L(normal); + c.add(exponent, 112); + c.shl(exponent, 23); + c.shl(mantissa, 13); + c.or_(mantissa, sign); + c.or_(mantissa, exponent); + + c.L(done); + if (dest.isMEM()) { + c.mov(dest, mantissa); + } else { + c.movd(dest.getReg().cvt128(), mantissa); + } +} + +static void EmitInlineF32ToF16(EmitContext& ctx, const Operand& dest, const Operand& src) { + CodeGenerator& c = ctx.Code(); + Label zero_exp, underflow, overflow, done; + Reg sign = ctx.TempGPReg().cvt32(); + Reg exponent = ctx.TempGPReg().cvt32(); + Reg mantissa = dest.isMEM() ? ctx.TempGPReg().cvt32() : dest.getReg().cvt32(); + + if (src.isMEM()) { + c.mov(mantissa, src); + } else { + c.movd(mantissa, src.getReg().cvt128()); + } + + // Extract sign, exponent, and mantissa + c.mov(exponent, mantissa); + c.mov(sign, mantissa); + c.and_(exponent, 0x7F800000); + c.and_(mantissa, 0x007FFFFF); + c.shr(exponent, 23); + c.shl(mantissa, 3); + c.shr(sign, 16); + c.and_(sign, 0x8000); + + // Subnormal numbers will be zero + c.test(exponent, exponent); + c.jz(zero_exp); + + // Check for overflow and underflow + c.sub(exponent, 112); + c.cmp(exponent, 0); + c.jle(underflow); + c.cmp(exponent, 0x1F); + c.jge(overflow); + + // Normal number + c.shl(exponent, 10); + c.shr(mantissa, 13); + c.or_(mantissa, exponent); + c.or_(mantissa, sign); + c.jmp(done); + + // Undeflow + c.L(underflow); + c.xor_(mantissa, mantissa); + c.jmp(done); + + // Overflow + c.L(overflow); + c.mov(mantissa, 0x7C00); + c.or_(mantissa, sign); + c.jmp(done); + + // Zero value + c.L(zero_exp); + c.and_(mantissa, sign); + + c.L(done); + if (dest.isMEM()) { + c.mov(dest, mantissa); + } else { + c.and_(mantissa, 0xFFFF); + } +} + +} + +void EmitConvertS16F16(EmitContext& ctx, const Operands& dest, const Operands& src) { + Xmm tmp_xmm = ctx.TempXmmReg(false); + Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); + EmitInlineF16ToF32(ctx, tmp_xmm, src[0]); + ctx.Code().cvttss2si(tmp_reg, tmp_xmm); + ctx.Code().and_(tmp_reg, 0xFFFF); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp_reg.cvt16()); + } +} + +void EmitConvertS16F32(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + ctx.Code().cvttss2si(tmp, src[0]); + ctx.Code().and_(tmp, 0xFFFF); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp.cvt16()); + } +} + +void EmitConvertS16F64(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + ctx.Code().cvttsd2si(tmp, src[0]); + ctx.Code().and_(tmp, 0xFFFF); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp.cvt16()); + } +} + +void EmitConvertS32F16(EmitContext& ctx, const Operands& dest, const Operands& src) { + Xmm tmp_xmm = ctx.TempXmmReg(false); + Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); + EmitInlineF16ToF32(ctx, tmp_xmm, src[0]); + ctx.Code().cvttss2si(tmp_reg, tmp_xmm); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp_reg); + } +} + +void EmitConvertS32F32(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + ctx.Code().cvttss2si(tmp, src[0]); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp); + } +} + +void EmitConvertS32F64(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + ctx.Code().cvttsd2si(tmp, src[0]); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp); + } +} + +void EmitConvertS64F16(EmitContext& ctx, const Operands& dest, const Operands& src) { + Xmm tmp_xmm = ctx.TempXmmReg(false); + Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg() : dest[0].getReg(); + EmitInlineF16ToF32(ctx, tmp_xmm, src[0]); + ctx.Code().cvttss2si(tmp_reg, tmp_xmm); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp_reg); + } +} + +void EmitConvertS64F32(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + ctx.Code().cvttss2si(tmp, src[0]); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp); + } +} + +void EmitConvertS64F64(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + ctx.Code().cvttsd2si(tmp, src[0]); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp); + } +} + +void EmitConvertU16F16(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertS16F16(ctx, dest, src); +} + +void EmitConvertU16F32(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertS16F32(ctx, dest, src); +} + +void EmitConvertU16F64(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertS16F64(ctx, dest, src); +} + +void EmitConvertU32F16(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertS32F16(ctx, dest, src); +} + +void EmitConvertU32F32(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertS32F32(ctx, dest, src); +} + +void EmitConvertU32F64(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertS32F64(ctx, dest, src); +} + +void EmitConvertU64F16(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertS64F16(ctx, dest, src); +} + +void EmitConvertU64F32(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertS64F32(ctx, dest, src); +} + +void EmitConvertU64F64(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertS64F64(ctx, dest, src); +} + +void EmitConvertU64U32(EmitContext& ctx, const Operands& dest, const Operands& src) { + MovGP(ctx, dest[0], src[0]); +} + +void EmitConvertU32U64(EmitContext& ctx, const Operands& dest, const Operands& src) { + MovGP(ctx, dest[0], src[0]); +} + +void EmitConvertF16F32(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitInlineF32ToF16(ctx, dest[0], src[0]); +} + +void EmitConvertF32F16(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitInlineF16ToF32(ctx, dest[0], src[0]); +} + +void EmitConvertF32F64(EmitContext& ctx, const Operands& dest, const Operands& src) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().cvtsd2ss(tmp, src[0]); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp); + } +} + +void EmitConvertF64F32(EmitContext& ctx, const Operands& dest, const Operands& src) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().cvtss2sd(tmp, src[0]); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp); + } +} + +void EmitConvertF16S8(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); + Xmm tmp_xmm = ctx.TempXmmReg(false); + ctx.Code().movsx(tmp_reg, src[0]); + ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg); + EmitInlineF32ToF16(ctx, dest[0], tmp_xmm); +} + +void EmitConvertF16S16(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); + Xmm tmp_xmm = ctx.TempXmmReg(false); + ctx.Code().movsx(tmp_reg, src[0]); + ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg); + EmitInlineF32ToF16(ctx, dest[0], tmp_xmm); +} + +void EmitConvertF16S32(EmitContext& ctx, const Operands& dest, const Operands& src) { + Xmm tmp = ctx.TempXmmReg(false); + ctx.Code().cvtsi2ss(tmp, src[0]); + EmitInlineF32ToF16(ctx, dest[0], tmp); +} + +void EmitConvertF16S64(EmitContext& ctx, const Operands& dest, const Operands& src) { + Xmm tmp = ctx.TempXmmReg(false); + ctx.Code().cvtsi2ss(tmp, src[0]); + EmitInlineF32ToF16(ctx, dest[0], tmp); +} + +void EmitConvertF16U8(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertF16S8(ctx, dest, src); +} + +void EmitConvertF16U16(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertF16S16(ctx, dest, src); +} + +void EmitConvertF16U32(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertF16S32(ctx, dest, src); +} + +void EmitConvertF16U64(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertF16S64(ctx, dest, src); +} + +void EmitConvertF32S8(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg tmp_reg = ctx.TempGPReg(false).cvt32(); + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().movsx(tmp_reg, src[0]); + ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp_xmm); + } +} + +void EmitConvertF32S16(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg tmp_reg = ctx.TempGPReg(false).cvt32(); + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().movsx(tmp_reg, src[0]); + ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp_xmm); + } +} + +void EmitConvertF32S32(EmitContext& ctx, const Operands& dest, const Operands& src) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().cvtsi2ss(tmp, src[0]); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp); + } +} + +void EmitConvertF32S64(EmitContext& ctx, const Operands& dest, const Operands& src) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().cvtsi2ss(tmp, src[0]); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp); + } +} + +void EmitConvertF32U8(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertF32S8(ctx, dest, src); +} + +void EmitConvertF32U16(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertF32S16(ctx, dest, src); +} + +void EmitConvertF32U32(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertF32S32(ctx, dest, src); +} + +void EmitConvertF32U64(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertF32S64(ctx, dest, src); +} + +void EmitConvertF64S8(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg tmp_reg = ctx.TempGPReg(false).cvt32(); + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().movsx(tmp_reg, src[0]); + ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp_xmm); + } +} + +void EmitConvertF64S16(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg tmp_reg = ctx.TempGPReg(false).cvt32(); + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().movsx(tmp_reg, src[0]); + ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp_xmm); + } +} + +void EmitConvertF64S32(EmitContext& ctx, const Operands& dest, const Operands& src) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().cvtsi2sd(tmp, src[0]); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp); + } +} + +void EmitConvertF64S64(EmitContext& ctx, const Operands& dest, const Operands& src) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().cvtsi2sd(tmp, src[0]); + if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], tmp); + } +} + +void EmitConvertF64U8(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertF64S8(ctx, dest, src); +} + +void EmitConvertF64U16(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertF64S16(ctx, dest, src); +} + +void EmitConvertF64U32(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertF64S32(ctx, dest, src); +} + +void EmitConvertF64U64(EmitContext& ctx, const Operands& dest, const Operands& src) { + EmitConvertF64S64(ctx, dest, src); +} + +void EmitConvertU16U32(EmitContext& ctx, const Operands& dest, const Operands& src) { + MovGP(ctx, dest[0], src[0]); +} + +void EmitConvertU32U16(EmitContext& ctx, const Operands& dest, const Operands& src) { + MovGP(ctx, dest[0], src[0]); +} + +} + diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h b/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h index 6c086553b..48f0facd4 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h @@ -52,8 +52,8 @@ void EmitDebugPrint(EmitContext& ctx, IR::Inst* inst, Id arg0, Id arg1, Id arg2, void EmitBarrier(EmitContext& ctx); void EmitWorkgroupMemoryBarrier(EmitContext& ctx); void EmitDeviceMemoryBarrier(EmitContext& ctx); -Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg); -void EmitSetUserData(EmitContext& ctx, const IR::Value& offset, const IR::Value& data); +void EmitGetUserData(EmitContext& ctx, const Operands& dest, IR::ScalarReg reg); +void EmitSetUserData(EmitContext& ctx, const Operands& offset, const Operands& value); void EmitGetThreadBitScalarReg(EmitContext& ctx); void EmitSetThreadBitScalarReg(EmitContext& ctx); void EmitGetScalarRegister(EmitContext& ctx); @@ -63,30 +63,30 @@ void EmitSetVectorRegister(EmitContext& ctx); void EmitSetGotoVariable(EmitContext& ctx); void EmitGetGotoVariable(EmitContext& ctx); void EmitSetScc(EmitContext& ctx); -Id EmitReadConst(EmitContext& ctx, IR::Inst* inst); -Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index); -Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); -void EmitStoreBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); -void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitReadConst(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset); +void EmitReadConstBuffer(EmitContext& ctx); +void EmitLoadBufferU8(EmitContext& ctx); +void EmitLoadBufferU16(EmitContext& ctx); +void EmitLoadBufferU32(EmitContext& ctx); +void EmitLoadBufferU32x2(EmitContext& ctx); +void EmitLoadBufferU32x3(EmitContext& ctx); +void EmitLoadBufferU32x4(EmitContext& ctx); +void EmitLoadBufferF32(EmitContext& ctx); +void EmitLoadBufferF32x2(EmitContext& ctx); +void EmitLoadBufferF32x3(EmitContext& ctx); +void EmitLoadBufferF32x4(EmitContext& ctx); +void EmitLoadBufferFormatF32(EmitContext& ctx); +void EmitStoreBufferU8(EmitContext& ctx); +void EmitStoreBufferU16(EmitContext& ctx); +void EmitStoreBufferU32(EmitContext& ctx); +void EmitStoreBufferU32x2(EmitContext& ctx); +void EmitStoreBufferU32x3(EmitContext& ctx); +void EmitStoreBufferU32x4(EmitContext& ctx); +void EmitStoreBufferF32(EmitContext& ctx); +void EmitStoreBufferF32x2(EmitContext& ctx); +void EmitStoreBufferF32x3(EmitContext& ctx); +void EmitStoreBufferF32x4(EmitContext& ctx); +void EmitStoreBufferFormatF32(EmitContext& ctx); void EmitBufferAtomicIAdd32(EmitContext& ctx); void EmitBufferAtomicSMin32(EmitContext& ctx); void EmitBufferAtomicUMin32(EmitContext& ctx); @@ -386,56 +386,56 @@ Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); Id EmitLogicalNot(EmitContext& ctx, Id value); -Id EmitConvertS16F16(EmitContext& ctx, Id value); -Id EmitConvertS16F32(EmitContext& ctx, Id value); -Id EmitConvertS16F64(EmitContext& ctx, Id value); -Id EmitConvertS32F16(EmitContext& ctx, Id value); -Id EmitConvertS32F32(EmitContext& ctx, Id value); -Id EmitConvertS32F64(EmitContext& ctx, Id value); -Id EmitConvertS64F16(EmitContext& ctx, Id value); -Id EmitConvertS64F32(EmitContext& ctx, Id value); -Id EmitConvertS64F64(EmitContext& ctx, Id value); -Id EmitConvertU16F16(EmitContext& ctx, Id value); -Id EmitConvertU16F32(EmitContext& ctx, Id value); -Id EmitConvertU16F64(EmitContext& ctx, Id value); -Id EmitConvertU32F16(EmitContext& ctx, Id value); -Id EmitConvertU32F32(EmitContext& ctx, Id value); -Id EmitConvertU32F64(EmitContext& ctx, Id value); -Id EmitConvertU64F16(EmitContext& ctx, Id value); -Id EmitConvertU64F32(EmitContext& ctx, Id value); -Id EmitConvertU64F64(EmitContext& ctx, Id value); -Id EmitConvertU64U32(EmitContext& ctx, Id value); -Id EmitConvertU32U64(EmitContext& ctx, Id value); -Id EmitConvertF16F32(EmitContext& ctx, Id value); -Id EmitConvertF32F16(EmitContext& ctx, Id value); -Id EmitConvertF32F64(EmitContext& ctx, Id value); -Id EmitConvertF64F32(EmitContext& ctx, Id value); -Id EmitConvertF16S8(EmitContext& ctx, Id value); -Id EmitConvertF16S16(EmitContext& ctx, Id value); -Id EmitConvertF16S32(EmitContext& ctx, Id value); -Id EmitConvertF16S64(EmitContext& ctx, Id value); -Id EmitConvertF16U8(EmitContext& ctx, Id value); -Id EmitConvertF16U16(EmitContext& ctx, Id value); -Id EmitConvertF16U32(EmitContext& ctx, Id value); -Id EmitConvertF16U64(EmitContext& ctx, Id value); -Id EmitConvertF32S8(EmitContext& ctx, Id value); -Id EmitConvertF32S16(EmitContext& ctx, Id value); -Id EmitConvertF32S32(EmitContext& ctx, Id value); -Id EmitConvertF32S64(EmitContext& ctx, Id value); -Id EmitConvertF32U8(EmitContext& ctx, Id value); -Id EmitConvertF32U16(EmitContext& ctx, Id value); -Id EmitConvertF32U32(EmitContext& ctx, Id value); -Id EmitConvertF32U64(EmitContext& ctx, Id value); -Id EmitConvertF64S8(EmitContext& ctx, Id value); -Id EmitConvertF64S16(EmitContext& ctx, Id value); -Id EmitConvertF64S32(EmitContext& ctx, Id value); -Id EmitConvertF64S64(EmitContext& ctx, Id value); -Id EmitConvertF64U8(EmitContext& ctx, Id value); -Id EmitConvertF64U16(EmitContext& ctx, Id value); -Id EmitConvertF64U32(EmitContext& ctx, Id value); -Id EmitConvertF64U64(EmitContext& ctx, Id value); -Id EmitConvertU16U32(EmitContext& ctx, Id value); -Id EmitConvertU32U16(EmitContext& ctx, Id value); +void EmitConvertS16F16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertS16F32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertS16F64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertS32F16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertS32F32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertS32F64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertS64F16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertS64F32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertS64F64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertU16F16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertU16F32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertU16F64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertU32F16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertU32F32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertU32F64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertU64F16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertU64F32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertU64F64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertU64U32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertU32U64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF16F32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF32F16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF32F64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF64F32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF16S8(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF16S16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF16S32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF16S64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF16U8(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF16U16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF16U32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF16U64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF32S8(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF32S16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF32S32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF32S64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF32U8(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF32U16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF32U32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF32U64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF64S8(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF64S16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF64S32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF64S64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF64U8(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF64U16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF64U32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertF64U64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertU16U32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitConvertU32U16(EmitContext& ctx, const Operands& dest, const Operands& src); void EmitImageSampleRaw(EmitContext& ctx); void EmitImageSampleImplicitLod(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/asm_x64/x64_emit_context.h b/src/shader_recompiler/backend/asm_x64/x64_emit_context.h index c967f9295..43aebc26a 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_emit_context.h +++ b/src/shader_recompiler/backend/asm_x64/x64_emit_context.h @@ -41,6 +41,8 @@ public: [[nodiscard]] Xbyak::Reg64& TempGPReg(bool reserve = true); [[nodiscard]] Xbyak::Xmm& TempXmmReg(bool reserve = true); + [[nodiscard]] const Xbyak::Reg64& UserData() const {return Xbyak::util::r11;} + [[nodiscard]] const Operands& Def(IR::Inst* inst); [[nodiscard]] Operands Def(const IR::Value& value); [[nodiscard]] std::optional> diff --git a/src/shader_recompiler/backend/asm_x64/x64_utils.cpp b/src/shader_recompiler/backend/asm_x64/x64_utils.cpp index 90375b9d4..7948a41e8 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_utils.cpp +++ b/src/shader_recompiler/backend/asm_x64/x64_utils.cpp @@ -193,12 +193,18 @@ void MovDouble(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand void MovGP(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src) { CodeGenerator& c = ctx.Code(); - if (src.isMEM() && dst.isMEM()) { - const Reg64& tmp = ctx.TempGPReg(false); + Reg tmp = (src.isMEM() && dst.isMEM()) ? ctx.TempGPReg(false).changeBit(dst.getBit()) : dst.getReg(); + if (src.getBit() == dst.getBit()) { c.mov(tmp, src); - c.mov(dst, tmp); + } else if (src.getBit() < dst.getBit()) { + c.movzx(tmp, src); } else { - c.mov(dst, src); + Operand src_tmp = src; + src_tmp.setBit(dst.getBit()); + c.mov(tmp, src_tmp); + } + if (src.isMEM() && dst.isMEM()) { + c.mov(dst, tmp); } } From 258a0225c27d679509b5c499af74261eee0ac948 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sun, 6 Apr 2025 01:25:21 +0200 Subject: [PATCH 39/49] Floating point and fixes --- CMakeLists.txt | 1 + .../asm_x64/emit_x64_bitwise_conversion.cpp | 62 +- .../asm_x64/emit_x64_context_get_set.cpp | 4 +- .../backend/asm_x64/emit_x64_convert.cpp | 213 +----- .../asm_x64/emit_x64_floating_point.cpp | 723 ++++++++++++++++++ .../backend/asm_x64/emit_x64_instructions.h | 196 ++--- .../backend/asm_x64/x64_emit_context.cpp | 10 + .../backend/asm_x64/x64_emit_context.h | 4 +- .../backend/asm_x64/x64_utils.cpp | 149 ++++ .../backend/asm_x64/x64_utils.h | 2 + 10 files changed, 1026 insertions(+), 338 deletions(-) create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index c8596f317..e067040b9 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -905,6 +905,7 @@ if (ARCHITECTURE STREQUAL "x86_64") src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp + src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h src/shader_recompiler/backend/asm_x64/emit_x64_shared_memory.cpp diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp index def2974e2..14d6d77ac 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp @@ -16,32 +16,24 @@ void EmitBitCastU16F16(EmitContext& ctx, const Operands& dest, const Operands& s } void EmitBitCastU32F32(EmitContext& ctx, const Operands& dest, const Operands& src) { - if (dest[0].isMEM() && src[0].isMEM()) { - Reg32 tmp = ctx.TempGPReg(false).getReg().cvt32(); - ctx.Code().mov(tmp, src[0]); - ctx.Code().mov(dest[0], tmp); - } else if (src[0].isMEM()) { - ctx.Code().mov(dest[0], src[0]); + if (src[0].isMEM()) { + MovGP(ctx, dest[0], src[0]); } else if (dest[0].isMEM()) { ctx.Code().movd(dest[0].getAddress(), src[0].getReg().cvt128()); } else { ctx.Code().movd(dword[rsp - 4], src[0].getReg().cvt128()); - ctx.Code().mov(dest[0], dword[rsp - 4]); + MovGP(ctx, dest[0], dword[rsp - 4]); } } void EmitBitCastU64F64(EmitContext& ctx, const Operands& dest, const Operands& src) { - if (dest[0].isMEM() && src[0].isMEM()) { - Reg tmp = ctx.TempGPReg(false); - ctx.Code().mov(tmp, src[0]); - ctx.Code().mov(dest[0], tmp); - } else if (src[0].isMEM()) { - ctx.Code().mov(dest[0], src[0]); + if (src[0].isMEM()) { + MovGP(ctx, dest[0], src[0]); } else if (dest[0].isMEM()) { ctx.Code().movq(dest[0].getAddress(), src[0].getReg().cvt128()); } else { ctx.Code().movq(qword[rsp - 8], src[0].getReg().cvt128()); - ctx.Code().mov(dest[0], qword[rsp - 8]); + MovGP(ctx, dest[0], qword[rsp - 8]); } } @@ -50,31 +42,23 @@ void EmitBitCastF16U16(EmitContext& ctx, const Operands& dest, const Operands& s } void EmitBitCastF32U32(EmitContext& ctx, const Operands& dest, const Operands& src) { - if (dest[0].isMEM() && src[0].isMEM()) { - Reg32 tmp = ctx.TempGPReg(false).getReg().cvt32(); - ctx.Code().mov(tmp, src[0]); - ctx.Code().mov(dest[0], tmp); - } else if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], src[0]); + if (dest[0].isMEM()) { + MovGP(ctx, dest[0], src[0]); } else if (src[0].isMEM()) { ctx.Code().movd(dest[0].getReg().cvt128(), src[0].getAddress()); } else { - ctx.Code().mov(dword[rsp - 4], src[0]); + MovGP(ctx, dword[rsp - 4], src[0]); ctx.Code().movd(dest[0].getReg().cvt128(), dword[rsp - 4]); } } void EmitBitCastF64U64(EmitContext& ctx, const Operands& dest, const Operands& src) { - if (dest[0].isMEM() && src[0].isMEM()) { - Reg tmp = ctx.TempGPReg(false); - ctx.Code().mov(tmp, src[0]); - ctx.Code().mov(dest[0], tmp); - } else if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], src[0]); + if (dest[0].isMEM()) { + MovGP(ctx, dest[0], src[0]); } else if (src[0].isMEM()) { ctx.Code().movq(dest[0].getReg().cvt128(), src[0].getAddress()); } else { - ctx.Code().mov(qword[rsp - 8], src[0].getReg()); + MovGP(ctx, qword[rsp - 8], src[0]); ctx.Code().mov(dest[0].getReg().cvt128(), qword[rsp - 8]); } } @@ -82,35 +66,27 @@ void EmitBitCastF64U64(EmitContext& ctx, const Operands& dest, const Operands& s void EmitPackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& src) { const bool is_mem = dest[0].isMEM() && (src[0].isMEM() || src[1].isMEM()); Reg tmp = is_mem ? ctx.TempGPReg(false) : dest[0].getReg(); - ctx.Code().mov(tmp, src[0]); + MovGP(ctx, tmp, src[1]); ctx.Code().shl(tmp, 32); ctx.Code().or_(tmp, src[0]); - if (is_mem) { - ctx.Code().mov(dest[0], tmp); - } + MovGP(ctx, dest[0], tmp); } void EmitUnpackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& src) { Reg src0 = src[0].isMEM() ? ctx.TempGPReg() : src[0].getReg(); - if (src[0].isMEM()) { - ctx.Code().mov(src0, src[0]); - } + MovGP(ctx, src0, src[0]); Reg dest1 = dest[1].isMEM() ? ctx.TempGPReg(false) : dest[1].getReg().changeBit(64); - ctx.Code().mov(dest1, src0); + MovGP(ctx, dest1, src0); ctx.Code().shr(dest1, 32); - if (dest[1].isMEM()) { - ctx.Code().mov(dest[1], dest1.cvt32()); - } - ctx.Code().mov(dest[0], src0.cvt32()); + MovGP(ctx, dest[1], dest1); + MovGP(ctx, dest[0], src0.cvt32()); } void EmitPackFloat2x32(EmitContext& ctx, const Operands& dest, const Operands& src) { Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); MovFloat(ctx, tmp, src[0]); ctx.Code().pinsrd(tmp, src[1], 1); - if (dest[0].isMEM()) { - ctx.Code().movss(dest[0].getAddress(), tmp); - } + MovFloat(ctx, dest[0], tmp); } void EmitPackUnorm2x16(EmitContext& ctx) { diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp index 192570d8f..3669b3708 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp @@ -19,7 +19,7 @@ void EmitGetUserData(EmitContext& ctx, const Operands& dest, IR::ScalarReg reg) void EmitSetUserData(EmitContext& ctx, const Operands& offset, const Operands& value) { Reg& tmp = ctx.TempGPReg(); - ctx.Code().mov(tmp, offset[0]); + MovGP(ctx, tmp, offset[0]); ctx.Code().shl(tmp, 2); ctx.Code().lea(tmp, ptr[ctx.UserData() + tmp]); MovGP(ctx, ptr[tmp], value[0]); @@ -59,7 +59,7 @@ void EmitGetGotoVariable(EmitContext&) { void EmitReadConst(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset) { Reg& tmp = ctx.TempGPReg(false); - ctx.Code().mov(tmp, base[1]); + MovGP(ctx, tmp, base[1]); ctx.Code().shl(tmp, 32); ctx.Code().or_(tmp, base[0]); if (offset[0].isMEM()) { diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp index 52726342e..f9ca78432 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp @@ -1,7 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include "shader_recompiler/exception.h" #include "shader_recompiler/backend/asm_x64/x64_emit_context.h" #include "shader_recompiler/backend/asm_x64/x64_utils.h" @@ -10,169 +9,27 @@ namespace Shader::Backend::X64 { using namespace Xbyak; using namespace Xbyak::util; -namespace { - -static void EmitInlineF16ToF32(EmitContext& ctx, const Operand& dest, const Operand& src) { - CodeGenerator& c = ctx.Code(); - Label nonzero_exp, zero_mantissa, norm_loop, norm_done, normal, done; - Reg sign = ctx.TempGPReg().cvt32(); - Reg exponent = ctx.TempGPReg().cvt32(); - Reg mantissa = ctx.TempGPReg().cvt32(); - - c.movzx(mantissa, src); - - // Extract sign, exponent, and mantissa - c.mov(sign, mantissa); - c.and_(sign, 0x8000); - c.shl(sign, 16); - c.mov(exponent, mantissa); - c.and_(exponent, 0x7C00); - c.shr(exponent, 10); - c.and_(mantissa, 0x03FF); - - // Check for zero exponent and mantissa - c.test(exponent, exponent); - c.jnz(nonzero_exp); - c.test(mantissa, mantissa); - c.jz(zero_mantissa); - - // Nromalize subnormal number - c.mov(exponent, 1); - c.L(norm_loop); - c.test(mantissa, 0x400); - c.jnz(norm_done); - c.shl(mantissa, 1); - c.dec(exponent); - c.jmp(norm_loop); - c.L(norm_done); - c.and_(mantissa, 0x03FF); - c.jmp(normal); - - // Zero mantissa - c.L(zero_mantissa); - c.and_(mantissa, sign); - c.jmp(done); - - // Non-zero exponent - c.L(nonzero_exp); - c.cmp(exponent, 0x1F); - c.jne(normal); - - // Infinite or NaN - c.shl(mantissa, 13); - c.or_(mantissa, sign); - c.or_(mantissa, 0x7F800000); - c.jmp(done); - - // Normal number - c.L(normal); - c.add(exponent, 112); - c.shl(exponent, 23); - c.shl(mantissa, 13); - c.or_(mantissa, sign); - c.or_(mantissa, exponent); - - c.L(done); - if (dest.isMEM()) { - c.mov(dest, mantissa); - } else { - c.movd(dest.getReg().cvt128(), mantissa); - } -} - -static void EmitInlineF32ToF16(EmitContext& ctx, const Operand& dest, const Operand& src) { - CodeGenerator& c = ctx.Code(); - Label zero_exp, underflow, overflow, done; - Reg sign = ctx.TempGPReg().cvt32(); - Reg exponent = ctx.TempGPReg().cvt32(); - Reg mantissa = dest.isMEM() ? ctx.TempGPReg().cvt32() : dest.getReg().cvt32(); - - if (src.isMEM()) { - c.mov(mantissa, src); - } else { - c.movd(mantissa, src.getReg().cvt128()); - } - - // Extract sign, exponent, and mantissa - c.mov(exponent, mantissa); - c.mov(sign, mantissa); - c.and_(exponent, 0x7F800000); - c.and_(mantissa, 0x007FFFFF); - c.shr(exponent, 23); - c.shl(mantissa, 3); - c.shr(sign, 16); - c.and_(sign, 0x8000); - - // Subnormal numbers will be zero - c.test(exponent, exponent); - c.jz(zero_exp); - - // Check for overflow and underflow - c.sub(exponent, 112); - c.cmp(exponent, 0); - c.jle(underflow); - c.cmp(exponent, 0x1F); - c.jge(overflow); - - // Normal number - c.shl(exponent, 10); - c.shr(mantissa, 13); - c.or_(mantissa, exponent); - c.or_(mantissa, sign); - c.jmp(done); - - // Undeflow - c.L(underflow); - c.xor_(mantissa, mantissa); - c.jmp(done); - - // Overflow - c.L(overflow); - c.mov(mantissa, 0x7C00); - c.or_(mantissa, sign); - c.jmp(done); - - // Zero value - c.L(zero_exp); - c.and_(mantissa, sign); - - c.L(done); - if (dest.isMEM()) { - c.mov(dest, mantissa); - } else { - c.and_(mantissa, 0xFFFF); - } -} - -} - void EmitConvertS16F16(EmitContext& ctx, const Operands& dest, const Operands& src) { Xmm tmp_xmm = ctx.TempXmmReg(false); Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); EmitInlineF16ToF32(ctx, tmp_xmm, src[0]); ctx.Code().cvttss2si(tmp_reg, tmp_xmm); ctx.Code().and_(tmp_reg, 0xFFFF); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp_reg.cvt16()); - } + MovGP(ctx, dest[0], tmp_reg); } void EmitConvertS16F32(EmitContext& ctx, const Operands& dest, const Operands& src) { Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); ctx.Code().cvttss2si(tmp, src[0]); ctx.Code().and_(tmp, 0xFFFF); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp.cvt16()); - } + MovGP(ctx, dest[0], tmp); } void EmitConvertS16F64(EmitContext& ctx, const Operands& dest, const Operands& src) { Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); ctx.Code().cvttsd2si(tmp, src[0]); ctx.Code().and_(tmp, 0xFFFF); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp.cvt16()); - } + MovGP(ctx, dest[0], tmp); } void EmitConvertS32F16(EmitContext& ctx, const Operands& dest, const Operands& src) { @@ -180,25 +37,19 @@ void EmitConvertS32F16(EmitContext& ctx, const Operands& dest, const Operands& s Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); EmitInlineF16ToF32(ctx, tmp_xmm, src[0]); ctx.Code().cvttss2si(tmp_reg, tmp_xmm); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp_reg); - } + MovGP(ctx, dest[0], tmp_reg); } void EmitConvertS32F32(EmitContext& ctx, const Operands& dest, const Operands& src) { Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); ctx.Code().cvttss2si(tmp, src[0]); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp); - } + MovGP(ctx, dest[0], tmp); } void EmitConvertS32F64(EmitContext& ctx, const Operands& dest, const Operands& src) { Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); ctx.Code().cvttsd2si(tmp, src[0]); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp); - } + MovGP(ctx, dest[0], tmp); } void EmitConvertS64F16(EmitContext& ctx, const Operands& dest, const Operands& src) { @@ -206,25 +57,19 @@ void EmitConvertS64F16(EmitContext& ctx, const Operands& dest, const Operands& s Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg() : dest[0].getReg(); EmitInlineF16ToF32(ctx, tmp_xmm, src[0]); ctx.Code().cvttss2si(tmp_reg, tmp_xmm); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp_reg); - } + MovGP(ctx, dest[0], tmp_reg); } void EmitConvertS64F32(EmitContext& ctx, const Operands& dest, const Operands& src) { Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); ctx.Code().cvttss2si(tmp, src[0]); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp); - } + MovGP(ctx, dest[0], tmp); } void EmitConvertS64F64(EmitContext& ctx, const Operands& dest, const Operands& src) { Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); ctx.Code().cvttsd2si(tmp, src[0]); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp); - } + MovGP(ctx, dest[0], tmp); } void EmitConvertU16F16(EmitContext& ctx, const Operands& dest, const Operands& src) { @@ -282,17 +127,13 @@ void EmitConvertF32F16(EmitContext& ctx, const Operands& dest, const Operands& s void EmitConvertF32F64(EmitContext& ctx, const Operands& dest, const Operands& src) { Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); ctx.Code().cvtsd2ss(tmp, src[0]); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp); - } + MovFloat(ctx, dest[0], tmp); } void EmitConvertF64F32(EmitContext& ctx, const Operands& dest, const Operands& src) { Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); ctx.Code().cvtss2sd(tmp, src[0]); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp); - } + MovDouble(ctx, dest[0], tmp); } void EmitConvertF16S8(EmitContext& ctx, const Operands& dest, const Operands& src) { @@ -344,9 +185,7 @@ void EmitConvertF32S8(EmitContext& ctx, const Operands& dest, const Operands& sr Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); ctx.Code().movsx(tmp_reg, src[0]); ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp_xmm); - } + MovFloat(ctx, dest[0], tmp_xmm); } void EmitConvertF32S16(EmitContext& ctx, const Operands& dest, const Operands& src) { @@ -354,25 +193,19 @@ void EmitConvertF32S16(EmitContext& ctx, const Operands& dest, const Operands& s Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); ctx.Code().movsx(tmp_reg, src[0]); ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp_xmm); - } + MovFloat(ctx, dest[0], tmp_xmm); } void EmitConvertF32S32(EmitContext& ctx, const Operands& dest, const Operands& src) { Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); ctx.Code().cvtsi2ss(tmp, src[0]); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp); - } + MovFloat(ctx, dest[0], tmp); } void EmitConvertF32S64(EmitContext& ctx, const Operands& dest, const Operands& src) { Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); ctx.Code().cvtsi2ss(tmp, src[0]); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp); - } + MovFloat(ctx, dest[0], tmp); } void EmitConvertF32U8(EmitContext& ctx, const Operands& dest, const Operands& src) { @@ -396,9 +229,7 @@ void EmitConvertF64S8(EmitContext& ctx, const Operands& dest, const Operands& sr Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); ctx.Code().movsx(tmp_reg, src[0]); ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp_xmm); - } + MovDouble(ctx, dest[0], tmp_xmm); } void EmitConvertF64S16(EmitContext& ctx, const Operands& dest, const Operands& src) { @@ -406,25 +237,19 @@ void EmitConvertF64S16(EmitContext& ctx, const Operands& dest, const Operands& s Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); ctx.Code().movsx(tmp_reg, src[0]); ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp_xmm); - } + MovDouble(ctx, dest[0], tmp_xmm); } void EmitConvertF64S32(EmitContext& ctx, const Operands& dest, const Operands& src) { Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); ctx.Code().cvtsi2sd(tmp, src[0]); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp); - } + MovDouble(ctx, dest[0], tmp); } void EmitConvertF64S64(EmitContext& ctx, const Operands& dest, const Operands& src) { Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); ctx.Code().cvtsi2sd(tmp, src[0]); - if (dest[0].isMEM()) { - ctx.Code().mov(dest[0], tmp); - } + MovDouble(ctx, dest[0], tmp); } void EmitConvertF64U8(EmitContext& ctx, const Operands& dest, const Operands& src) { diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp new file mode 100644 index 000000000..d209b1e36 --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp @@ -0,0 +1,723 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/backend/asm_x64/emit_x64_instructions.h" +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" +#include "shader_recompiler/backend/asm_x64/x64_utils.h" + +namespace Shader::Backend::X64 { + +using namespace Xbyak; +using namespace Xbyak::util; + + +void EmitFPAbs16(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt16() : dest[0].getReg().cvt16(); + MovGP(ctx, tmp, src[0]); + ctx.Code().and_(tmp, 0x7FFF); + MovGP(ctx, dest[0], tmp); +} + +void EmitFPAbs32(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg reg_tmp = ctx.TempXmmReg(false); + Xmm xmm_tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().mov(reg_tmp, 0x7FFFFFFF); + ctx.Code().movd(xmm_tmp, reg_tmp); + ctx.Code().andps(xmm_tmp, src[0]); + MovFloat(ctx, dest[0], xmm_tmp); +} + +void EmitFPAbs64(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg reg_tmp = ctx.TempGPReg(false); + Xmm xmm_tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().mov(reg_tmp, 0x7FFFFFFFFFFFFFFF); + ctx.Code().movq(xmm_tmp, reg_tmp); + ctx.Code().andpd(xmm_tmp, src[0]); + MovFloat(ctx, dest[0], xmm_tmp); +} + +void EmitFPAdd16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Xmm tmp1 = ctx.TempXmmReg(); + Xmm tmp2 = ctx.TempXmmReg(); + EmitInlineF16ToF32(ctx, tmp1, op1[0]); + EmitInlineF16ToF32(ctx, tmp2, op2[0]); + ctx.Code().addss(tmp1, tmp2); + EmitInlineF32ToF16(ctx, dest[0], tmp1); +} + +void EmitFPAdd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovFloat(ctx, tmp, op1[0]); + ctx.Code().addss(tmp, op2[0]); + MovFloat(ctx, dest[0], tmp); +} + +void EmitFPAdd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovDouble(ctx, tmp, op1[0]); + ctx.Code().addsd(tmp, op2[0]); + MovDouble(ctx, dest[0], tmp); +} + +void EmitFPSub32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovFloat(ctx, tmp, op1[0]); + ctx.Code().subss(tmp, op2[0]); + MovFloat(ctx, dest[0], tmp); +} + +void EmitFPFma16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) { + Xmm tmp1 = ctx.TempXmmReg(); + Xmm tmp2 = ctx.TempXmmReg(); + Xmm tmp3 = ctx.TempXmmReg(); + EmitInlineF16ToF32(ctx, tmp1, op1[0]); + EmitInlineF16ToF32(ctx, tmp2, op2[0]); + EmitInlineF16ToF32(ctx, tmp3, op3[0]); + ctx.Code().vfmadd132ss(tmp3, tmp1, tmp2); + EmitInlineF32ToF16(ctx, dest[0], tmp3); +} + +void EmitFPFma32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) { + Xmm tmp1 = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Xmm tmp2 = op1[0].isMEM() ? ctx.TempXmmReg() : op1[0].getReg().cvt128(); + Xmm tmp3 = op2[0].isMEM() ? ctx.TempXmmReg() : op2[0].getReg().cvt128(); + MovFloat(ctx, tmp1, op3[0]); + MovFloat(ctx, tmp2, op1[0]); + MovFloat(ctx, tmp3, op2[0]); + ctx.Code().vfmadd132ss(tmp3, tmp1, tmp2); + MovFloat(ctx, dest[0], tmp3); +} + +void EmitFPFma64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) { + Xmm tmp1 = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Xmm tmp2 = op1[0].isMEM() ? ctx.TempXmmReg() : op1[0].getReg().cvt128(); + Xmm tmp3 = op2[0].isMEM() ? ctx.TempXmmReg() : op2[0].getReg().cvt128(); + MovDouble(ctx, tmp1, op3[0]); + MovDouble(ctx, tmp2, op1[0]); + MovDouble(ctx, tmp3, op2[0]); + ctx.Code().vfmadd132sd(tmp3, tmp1, tmp2); + MovDouble(ctx, dest[0], tmp3); +} + +void EmitFPMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, bool is_legacy) { + if (is_legacy) { + Xmm tmp1 = ctx.TempXmmReg(); + Xmm tmp2 = ctx.TempXmmReg(); + MovFloat(ctx, tmp1, op1[0]); + MovFloat(ctx, tmp2, op1[0]); + ctx.Code().maxss(tmp2, op2[0]); + ctx.Code().cmpunordss(tmp1, tmp1); + ctx.Code().andps(tmp1, op2[0]); + ctx.Code().orps(tmp2, tmp1); + MovFloat(ctx, dest[0], tmp2); + } else { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovFloat(ctx, tmp, op1[0]); + ctx.Code().maxss(tmp, op2[0]); + MovFloat(ctx, dest[0], tmp); + } +} + +void EmitFPMax64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovDouble(ctx, tmp, op1[0]); + ctx.Code().maxsd(tmp, op2[0]); + MovDouble(ctx, dest[0], tmp); +} + +void EmitFPMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, bool is_legacy) { + if (is_legacy) { + Xmm tmp1 = ctx.TempXmmReg(); + Xmm tmp2 = ctx.TempXmmReg(); + MovFloat(ctx, tmp1, op1[0]); + MovFloat(ctx, tmp2, op1[0]); + ctx.Code().minss(tmp2, op2[0]); + ctx.Code().cmpunordss(tmp1, tmp1); + ctx.Code().andps(tmp1, op2[0]); + ctx.Code().orps(tmp2, tmp1); + MovFloat(ctx, dest[0], tmp2); + } else { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovFloat(ctx, tmp, op1[0]); + ctx.Code().minss(tmp, op2[0]); + MovFloat(ctx, dest[0], tmp); + } +} + +void EmitFPMin64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovDouble(ctx, tmp, op1[0]); + ctx.Code().minsd(tmp, op2[0]); + MovDouble(ctx, dest[0], tmp); +} + +void EmitFPMul16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Xmm tmp1 = ctx.TempXmmReg(); + Xmm tmp2 = ctx.TempXmmReg(); + EmitInlineF16ToF32(ctx, tmp1, op1[0]); + EmitInlineF16ToF32(ctx, tmp2, op2[0]); + ctx.Code().mulss(tmp1, tmp2); + EmitInlineF32ToF16(ctx, dest[0], tmp1); +} + +void EmitFPMul32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovFloat(ctx, tmp, op1[0]); + ctx.Code().mulss(tmp, op2[0]); + MovFloat(ctx, dest[0], tmp); +} + +void EmitFPMul64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovDouble(ctx, tmp, op1[0]); + ctx.Code().mulsd(tmp, op2[0]); + MovDouble(ctx, dest[0], tmp); +} + +void EmitFPDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovFloat(ctx, tmp, op1[0]); + ctx.Code().divss(tmp, op2[0]); + MovFloat(ctx, dest[0], tmp); +} + +void EmitFPDiv64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovDouble(ctx, tmp, op1[0]); + ctx.Code().divsd(tmp, op2[0]); + MovDouble(ctx, dest[0], tmp); +} + +void EmitFPNeg16(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt16() : dest[0].getReg().cvt16(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().xor_(tmp, 0x8000); + MovGP(ctx, dest[0], tmp); +} + +void EmitFPNeg32(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Reg tmp_reg = ctx.TempGPReg(false).cvt32(); + ctx.Code().mov(tmp_reg, 0x80000000); + ctx.Code().movd(tmp_xmm, tmp_reg); + ctx.Code().xorps(tmp_xmm, op1[0]); + MovFloat(ctx, dest[0], tmp_xmm); +} + +void EmitFPNeg64(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Reg tmp_reg = ctx.TempXmmReg(false); + ctx.Code().mov(tmp_reg, 0x8000000000000000); + ctx.Code().movq(tmp_xmm, tmp_reg); + ctx.Code().xorpd(tmp_xmm, op1[0]); + MovDouble(ctx, dest[0], tmp_xmm); + +} + +void EmitFPSin(EmitContext& ctx) { + throw NotImplementedException("FPSin"); +} + +void EmitFPCos(EmitContext& ctx) { + throw NotImplementedException("FPCos"); +} + +void EmitFPExp2(EmitContext& ctx) { + throw NotImplementedException("FPExp2"); +} + +void EmitFPLdexp(EmitContext& ctx) { + throw NotImplementedException("FPLdexp"); +} + +void EmitFPLog2(EmitContext& ctx) { + throw NotImplementedException("FPLog2"); +} + +void EmitFPRecip32(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().rcpss(tmp, op1[0]); + MovFloat(ctx, dest[0], tmp); +} + +void EmitFPRecip64(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Reg tmp_reg = ctx.TempGPReg(false); + ctx.Code().mov(tmp_reg, 1); + ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg); + ctx.Code().divsd(tmp_xmm, op1[0]); + MovDouble(ctx, dest[0], tmp_xmm); +} + +void EmitFPRecipSqrt32(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().rsqrtss(tmp, op1[0]); + MovFloat(ctx, dest[0], tmp); +} + +void EmitFPRecipSqrt64(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Reg tmp_reg = ctx.TempGPReg(false); + ctx.Code().mov(tmp_reg, 1); + ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg); + ctx.Code().divsd(tmp_xmm, op1[0]); + ctx.Code().sqrtsd(tmp_xmm, tmp_xmm); + MovDouble(ctx, dest[0], tmp_xmm); +} + +void EmitFPSqrt(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().sqrtss(tmp, op1[0]); + MovFloat(ctx, dest[0], tmp); +} + +void EmitFPSaturate16(EmitContext& ctx) { + throw NotImplementedException("FPSaturate16"); +} + +void EmitFPSaturate32(EmitContext& ctx) { + throw NotImplementedException("FPSaturate32"); +} + +void EmitFPSaturate64(EmitContext& ctx) { + throw NotImplementedException("FPSaturate64"); +} + +void EmitFPClamp16(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max) { + Xmm tmp1 = ctx.TempXmmReg(); + Xmm tmp2 = ctx.TempXmmReg(); + Xmm tmp3 = ctx.TempXmmReg(); + EmitInlineF16ToF32(ctx, tmp1, op[0]); + EmitInlineF16ToF32(ctx, tmp2, min[0]); + EmitInlineF16ToF32(ctx, tmp3, max[0]); + ctx.Code().maxss(tmp1, tmp2); + ctx.Code().minss(tmp1, tmp3); + EmitInlineF32ToF16(ctx, dest[0], tmp1); +} + +void EmitFPClamp32(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovFloat(ctx, tmp, op[0]); + ctx.Code().maxss(tmp, min[0]); + ctx.Code().minss(tmp, max[0]); + MovFloat(ctx, dest[0], tmp); +} + +void EmitFPClamp64(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovDouble(ctx, tmp, op[0]); + ctx.Code().maxsd(tmp, min[0]); + ctx.Code().minsd(tmp, max[0]); + MovDouble(ctx, dest[0], tmp); +} + +void EmitFPRoundEven16(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp = ctx.TempXmmReg(); + EmitInlineF16ToF32(ctx, tmp, op1[0]); + ctx.Code().roundss(tmp, tmp, 0x00); + EmitInlineF32ToF16(ctx, dest[0], tmp); +} + +void EmitFPRoundEven32(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().roundss(tmp, op1[0], 0x00); + MovFloat(ctx, dest[0], tmp); +} + +void EmitFPRoundEven64(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().roundsd(tmp, op1[0], 0x00); + MovDouble(ctx, dest[0], tmp); +} + +void EmitFPFloor16(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp = ctx.TempXmmReg(); + EmitInlineF16ToF32(ctx, tmp, op1[0]); + ctx.Code().roundss(tmp, tmp, 0x01); + EmitInlineF32ToF16(ctx, dest[0], tmp); +} + +void EmitFPFloor32(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().roundss(tmp, op1[0], 0x01); + MovFloat(ctx, dest[0], tmp); +} + +void EmitFPFloor64(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().roundsd(tmp, op1[0], 0x01); + MovDouble(ctx, dest[0], tmp); +} + +void EmitFPCeil16(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp = ctx.TempXmmReg(); + EmitInlineF16ToF32(ctx, tmp, op1[0]); + ctx.Code().roundss(tmp, tmp, 0x02); + EmitInlineF32ToF16(ctx, dest[0], tmp); +} + +void EmitFPCeil32(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().roundss(tmp, op1[0], 0x02); + MovFloat(ctx, dest[0], tmp); +} + +void EmitFPCeil64(EmitContext& ctx, const Operands& dest, const Operands& op1) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + ctx.Code().roundsd(tmp, op1[0], 0x02); + MovDouble(ctx, dest[0], tmp); +} + +void EmitFPTrunc16(EmitContext& ctx) { + throw NotImplementedException("FPTrunc16"); +} + +void EmitFPTrunc32(EmitContext& ctx) { + throw NotImplementedException("FPTrunc32"); +} + +void EmitFPTrunc64(EmitContext& ctx) { + throw NotImplementedException("FPTrunc64"); +} + +void EmitFPFract32(EmitContext& ctx) { + throw NotImplementedException("FPFract32"); +} + +void EmitFPFract64(EmitContext& ctx) { + throw NotImplementedException("FPFract64"); +} + +void EmitFPFrexpSig32(EmitContext& ctx) { + throw NotImplementedException("FPFrexpSig32"); +} + +void EmitFPFrexpSig64(EmitContext& ctx) { + throw NotImplementedException("FPFrexpSig64"); +} + +void EmitFPFrexpExp32(EmitContext& ctx) { + throw NotImplementedException("FPFrexpExp32"); +} + +void EmitFPFrexpExp64(EmitContext& ctx) { + throw NotImplementedException("FPFrexpExp64"); +} + +void EmitFPOrdEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordEqual16(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPOrdEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordEqual32(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPOrdEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordEqual64(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPUnordEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp1 = ctx.TempXmmReg(); + Xmm tmp2 = ctx.TempXmmReg(); + EmitInlineF16ToF32(ctx, tmp1, lhs[0]); + EmitInlineF16ToF32(ctx, tmp2, rhs[0]); + ctx.Code().ucomiss(tmp1, tmp2); + ctx.Code().sete(dest[0]); +} + +void EmitFPUnordEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + MovFloat(ctx, tmp, lhs[0]); + ctx.Code().ucomiss(tmp, rhs[0]); + ctx.Code().sete(dest[0]); +} + +void EmitFPUnordEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + MovDouble(ctx, tmp, lhs[0]); + ctx.Code().ucomisd(tmp, rhs[0]); + ctx.Code().sete(dest[0]); +} + +void EmitFPOrdNotEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordNotEqual16(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPOrdNotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPOrdNotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordNotEqual64(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPUnordNotEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp1 = ctx.TempXmmReg(); + Xmm tmp2 = ctx.TempXmmReg(); + EmitInlineF16ToF32(ctx, tmp1, lhs[0]); + EmitInlineF16ToF32(ctx, tmp2, rhs[0]); + ctx.Code().ucomiss(tmp1, tmp2); + ctx.Code().setne(dest[0]); +} + +void EmitFPUnordNotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + MovFloat(ctx, tmp, lhs[0]); + ctx.Code().ucomiss(tmp, rhs[0]); + ctx.Code().setne(dest[0]); +} + +void EmitFPUnordNotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + MovDouble(ctx, tmp, lhs[0]); + ctx.Code().ucomisd(tmp, rhs[0]); + ctx.Code().setne(dest[0]); +} + +void EmitFPOrdLessThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordLessThan16(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPOrdLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordLessThan32(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPOrdLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordLessThan64(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPUnordLessThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp1 = ctx.TempXmmReg(); + Xmm tmp2 = ctx.TempXmmReg(); + EmitInlineF16ToF32(ctx, tmp1, lhs[0]); + EmitInlineF16ToF32(ctx, tmp2, rhs[0]); + ctx.Code().ucomiss(tmp1, tmp2); + ctx.Code().setb(dest[0]); +} + +void EmitFPUnordLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + MovFloat(ctx, tmp, lhs[0]); + ctx.Code().ucomiss(tmp, rhs[0]); + ctx.Code().setb(dest[0]); +} + +void EmitFPUnordLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + MovDouble(ctx, tmp, lhs[0]); + ctx.Code().ucomisd(tmp, rhs[0]); + ctx.Code().setb(dest[0]); +} + +void EmitFPOrdGreaterThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordGreaterThan16(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPOrdGreaterThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordGreaterThan32(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPOrdGreaterThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordGreaterThan64(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPUnordGreaterThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp1 = ctx.TempXmmReg(); + Xmm tmp2 = ctx.TempXmmReg(); + EmitInlineF16ToF32(ctx, tmp1, lhs[0]); + EmitInlineF16ToF32(ctx, tmp2, rhs[0]); + ctx.Code().ucomiss(tmp1, tmp2); + ctx.Code().seta(dest[0]); +} + +void EmitFPUnordGreaterThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + MovFloat(ctx, tmp, lhs[0]); + ctx.Code().ucomiss(tmp, rhs[0]); + ctx.Code().seta(dest[0]); +} + +void EmitFPUnordGreaterThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + MovDouble(ctx, tmp, lhs[0]); + ctx.Code().ucomisd(tmp, rhs[0]); + ctx.Code().seta(dest[0]); +} + +void EmitFPOrdLessThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordLessThanEqual16(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPOrdLessThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordLessThanEqual32(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPOrdLessThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordLessThanEqual64(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPUnordLessThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp1 = ctx.TempXmmReg(); + Xmm tmp2 = ctx.TempXmmReg(); + EmitInlineF16ToF32(ctx, tmp1, lhs[0]); + EmitInlineF16ToF32(ctx, tmp2, rhs[0]); + ctx.Code().ucomiss(tmp1, tmp2); + ctx.Code().setbe(dest[0]); +} + +void EmitFPUnordLessThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + MovFloat(ctx, tmp, lhs[0]); + ctx.Code().ucomiss(tmp, rhs[0]); + ctx.Code().setbe(dest[0]); +} + +void EmitFPUnordLessThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + MovDouble(ctx, tmp, lhs[0]); + ctx.Code().ucomisd(tmp, rhs[0]); + ctx.Code().setbe(dest[0]); +} + +void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordGreaterThanEqual16(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); + ctx.Code().vfpclassss(tmp1, tmp2); +} + +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordGreaterThanEqual32(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Label not_nan; + EmitFPUnordGreaterThanEqual64(ctx, dest, lhs, rhs); + ctx.Code().jnp(not_nan); + ctx.Code().mov(dest[0], 0); + ctx.Code().L(not_nan); +} + +void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp1 = ctx.TempXmmReg(); + Xmm tmp2 = ctx.TempXmmReg(); + EmitInlineF16ToF32(ctx, tmp1, lhs[0]); + EmitInlineF16ToF32(ctx, tmp2, rhs[0]); + ctx.Code().ucomiss(tmp1, tmp2); + ctx.Code().setae(dest[0]); +} + +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + MovFloat(ctx, tmp, lhs[0]); + ctx.Code().ucomiss(tmp, rhs[0]); + ctx.Code().setae(dest[0]); +} + +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + MovDouble(ctx, tmp, lhs[0]); + ctx.Code().ucomisd(tmp, rhs[0]); + ctx.Code().setae(dest[0]); +} + +void EmitFPIsNan16(EmitContext& ctx, const Operands& dest, const Operands& op) { + Xmm tmp = ctx.TempXmmReg(); + EmitInlineF16ToF32(ctx, tmp, op[0]); + ctx.Code().ucomiss(tmp, tmp); + ctx.Code().setp(dest[0]); +} + +void EmitFPIsNan32(EmitContext& ctx, const Operands& dest, const Operands& op) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovFloat(ctx, tmp, op[0]); + ctx.Code().ucomiss(tmp, tmp); + ctx.Code().setp(dest[0]); +} + +void EmitFPIsNan64(EmitContext& ctx, const Operands& dest, const Operands& op) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovDouble(ctx, tmp, op[0]); + ctx.Code().ucomisd(tmp, tmp); + ctx.Code().setp(dest[0]); +} + +void EmitFPIsInf32(EmitContext& ctx) { + throw NotImplementedException("FPIsInf32"); +} + +void EmitFPIsInf64(EmitContext& ctx) { + throw NotImplementedException("FPIsInf64"); +} + +void EmitFPCmpClass32(EmitContext&) { + UNREACHABLE(); +} +} \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h b/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h index 48f0facd4..9b34ff40b 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h @@ -157,7 +157,7 @@ void EmitCompositeInsertF16x4(EmitContext& ctx, const Operands& dest, const Oper void EmitCompositeShuffleF16x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2); void EmitCompositeShuffleF16x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3); void EmitCompositeShuffleF16x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4); -void EmitCompositeConstructF32x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2); +void EmitCompositeConstructF32x2(EmitContext& ctx, const Operands& dest, const OpEmitFPAbs16erands& src1, const Operands& src2); void EmitCompositeConstructF32x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3); void EmitCompositeConstructF32x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4); void EmitCompositeConstructF32x2x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2); @@ -227,103 +227,103 @@ void EmitPackUint2_10_10_10(EmitContext& ctx); void EmitUnpackUint2_10_10_10(EmitContext& ctx); void EmitPackSint2_10_10_10(EmitContext& ctx); void EmitUnpackSint2_10_10_10(EmitContext& ctx); -Id EmitFPAbs16(EmitContext& ctx, Id value); -Id EmitFPAbs32(EmitContext& ctx, Id value); -Id EmitFPAbs64(EmitContext& ctx, Id value); -Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPSub32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); -Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); -Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); -Id EmitFPMax32(EmitContext& ctx, Id a, Id b, bool is_legacy = false); -Id EmitFPMax64(EmitContext& ctx, Id a, Id b); -Id EmitFPMin32(EmitContext& ctx, Id a, Id b, bool is_legacy = false); -Id EmitFPMin64(EmitContext& ctx, Id a, Id b); -Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPDiv32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPDiv64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPNeg16(EmitContext& ctx, Id value); -Id EmitFPNeg32(EmitContext& ctx, Id value); -Id EmitFPNeg64(EmitContext& ctx, Id value); -Id EmitFPSin(EmitContext& ctx, Id value); -Id EmitFPCos(EmitContext& ctx, Id value); -Id EmitFPExp2(EmitContext& ctx, Id value); -Id EmitFPLdexp(EmitContext& ctx, Id value, Id exp); -Id EmitFPLog2(EmitContext& ctx, Id value); -Id EmitFPRecip32(EmitContext& ctx, Id value); -Id EmitFPRecip64(EmitContext& ctx, Id value); -Id EmitFPRecipSqrt32(EmitContext& ctx, Id value); -Id EmitFPRecipSqrt64(EmitContext& ctx, Id value); -Id EmitFPSqrt(EmitContext& ctx, Id value); -Id EmitFPSaturate16(EmitContext& ctx, Id value); -Id EmitFPSaturate32(EmitContext& ctx, Id value); -Id EmitFPSaturate64(EmitContext& ctx, Id value); -Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value); -Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value); -Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value); -Id EmitFPRoundEven16(EmitContext& ctx, Id value); -Id EmitFPRoundEven32(EmitContext& ctx, Id value); -Id EmitFPRoundEven64(EmitContext& ctx, Id value); -Id EmitFPFloor16(EmitContext& ctx, Id value); -Id EmitFPFloor32(EmitContext& ctx, Id value); -Id EmitFPFloor64(EmitContext& ctx, Id value); -Id EmitFPCeil16(EmitContext& ctx, Id value); -Id EmitFPCeil32(EmitContext& ctx, Id value); -Id EmitFPCeil64(EmitContext& ctx, Id value); -Id EmitFPTrunc16(EmitContext& ctx, Id value); -Id EmitFPTrunc32(EmitContext& ctx, Id value); -Id EmitFPTrunc64(EmitContext& ctx, Id value); -Id EmitFPFract32(EmitContext& ctx, Id value); -Id EmitFPFract64(EmitContext& ctx, Id value); -Id EmitFPFrexpSig32(EmitContext& ctx, Id value); -Id EmitFPFrexpSig64(EmitContext& ctx, Id value); -Id EmitFPFrexpExp32(EmitContext& ctx, Id value); -Id EmitFPFrexpExp64(EmitContext& ctx, Id value); -Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPIsNan16(EmitContext& ctx, Id value); -Id EmitFPIsNan32(EmitContext& ctx, Id value); -Id EmitFPIsNan64(EmitContext& ctx, Id value); -Id EmitFPIsInf32(EmitContext& ctx, Id value); -Id EmitFPIsInf64(EmitContext& ctx, Id value); +void EmitFPAbs16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitFPAbs32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitFPAbs64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitFPAdd16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitFPAdd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitFPAdd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitFPSub32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitFPFma16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3); +void EmitFPFma32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3); +void EmitFPFma64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3); +void EmitFPMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, bool is_legacy = false); +void EmitFPMax64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitFPMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, bool is_legacy = false); +void EmitFPMin64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitFPMul16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitFPMul32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitFPMul64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitFPDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitFPDiv64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitFPNeg16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitFPNeg32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitFPNeg64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitFPSin(EmitContext& ctx); +void EmitFPCos(EmitContext& ctx); +void EmitFPExp2(EmitContext& ctx); +void EmitFPLdexp(EmitContext& ctx); +void EmitFPLog2(EmitContext& ctx); +void EmitFPRecip32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitFPRecip64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitFPRecipSqrt32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitFPRecipSqrt64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitFPSqrt(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitFPSaturate16(EmitContext& ctx); +void EmitFPSaturate32(EmitContext& ctx); +void EmitFPSaturate64(EmitContext& ctx); +void EmitFPClamp16(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max); +void EmitFPClamp32(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max); +void EmitFPClamp64(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max); +void EmitFPRoundEven16(EmitContext& ctx, const Operands& dest, const Operands& op1); +void EmitFPRoundEven32(EmitContext& ctx, const Operands& dest, const Operands& op1); +void EmitFPRoundEven64(EmitContext& ctx, const Operands& dest, const Operands& op1); +void EmitFPFloor16(EmitContext& ctx, const Operands& dest, const Operands& op1); +void EmitFPFloor32(EmitContext& ctx, const Operands& dest, const Operands& op1); +void EmitFPFloor64(EmitContext& ctx, const Operands& dest, const Operands& op1); +void EmitFPCeil16(EmitContext& ctx, const Operands& dest, const Operands& op1); +void EmitFPCeil32(EmitContext& ctx, const Operands& dest, const Operands& op1); +void EmitFPCeil64(EmitContext& ctx, const Operands& dest, const Operands& op1); +void EmitFPTrunc16(EmitContext& ctx); +void EmitFPTrunc32(EmitContext& ctx); +void EmitFPTrunc64(EmitContext& ctx); +void EmitFPFract32(EmitContext& ctx); +void EmitFPFract64(EmitContext& ctx); +void EmitFPFrexpSig32(EmitContext& ctx); +void EmitFPFrexpSig64(EmitContext& ctx); +void EmitFPFrexpExp32(EmitContext& ctx); +void EmitFPFrexpExp64(EmitContext& ctx); +void EmitFPOrdEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdNotEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdNotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdNotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordNotEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordNotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordNotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdLessThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordLessThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdGreaterThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdGreaterThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdGreaterThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordGreaterThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordGreaterThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordGreaterThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdLessThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdLessThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdLessThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordLessThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordLessThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordLessThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitFPIsNan16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitFPIsNan32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitFPIsNan64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitFPIsInf32(EmitContext& ctx); +void EmitFPIsInf64(EmitContext& ctx); Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitIAdd64(EmitContext& ctx, Id a, Id b); Id EmitIAddCary32(EmitContext& ctx, Id a, Id b); diff --git a/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp b/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp index 1b706eeeb..5dd7e0b6c 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp +++ b/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp @@ -48,6 +48,16 @@ Xmm& EmitContext::TempXmmReg(bool reserve) { return reg; } +void EmitContext::PopTempGPReg() { + ASSERT(temp_gp_reg_index > 0); + temp_gp_reg_index--; +} + +void EmitContext::PopTempXmmReg() { + ASSERT(temp_xmm_reg_index > 0); + temp_xmm_reg_index--; +} + const Operands& EmitContext::Def(IR::Inst* inst) { return inst_to_operands.at(inst); } diff --git a/src/shader_recompiler/backend/asm_x64/x64_emit_context.h b/src/shader_recompiler/backend/asm_x64/x64_emit_context.h index 43aebc26a..eab5bad70 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_emit_context.h +++ b/src/shader_recompiler/backend/asm_x64/x64_emit_context.h @@ -40,7 +40,9 @@ public: [[nodiscard]] Xbyak::Reg64& TempGPReg(bool reserve = true); [[nodiscard]] Xbyak::Xmm& TempXmmReg(bool reserve = true); - + void PopTempGPReg(); + void PopTempXmmReg(); + [[nodiscard]] const Xbyak::Reg64& UserData() const {return Xbyak::util::r11;} [[nodiscard]] const Operands& Def(IR::Inst* inst); diff --git a/src/shader_recompiler/backend/asm_x64/x64_utils.cpp b/src/shader_recompiler/backend/asm_x64/x64_utils.cpp index 7948a41e8..3d327569b 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_utils.cpp +++ b/src/shader_recompiler/backend/asm_x64/x64_utils.cpp @@ -159,6 +159,9 @@ Reg ResizeRegToType(const Reg& reg, IR::Type type) { void MovFloat(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src) { CodeGenerator& c = ctx.Code(); + if (src == dst) { + return; + } if (src.isMEM() && dst.isMEM()) { Reg tmp = ctx.TempGPReg(false).cvt32(); c.mov(tmp, src); @@ -176,6 +179,9 @@ void MovFloat(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& void MovDouble(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src) { CodeGenerator& c = ctx.Code(); + if (src == dst) { + return; + } if (src.isMEM() && dst.isMEM()) { const Reg64& tmp = ctx.TempGPReg(false); c.mov(tmp, src); @@ -193,6 +199,9 @@ void MovDouble(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand void MovGP(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src) { CodeGenerator& c = ctx.Code(); + if (src == dst) { + return; + } Reg tmp = (src.isMEM() && dst.isMEM()) ? ctx.TempGPReg(false).changeBit(dst.getBit()) : dst.getReg(); if (src.getBit() == dst.getBit()) { c.mov(tmp, src); @@ -288,4 +297,144 @@ void MovValue(EmitContext& ctx, const Operands& dst, const IR::Value& src) { } } +void EmitInlineF16ToF32(EmitContext& ctx, const Operand& dest, const Operand& src) { + CodeGenerator& c = ctx.Code(); + Label nonzero_exp, zero_mantissa, norm_loop, norm_done, normal, done; + Reg sign = ctx.TempGPReg().cvt32(); + Reg exponent = ctx.TempGPReg().cvt32(); + Reg mantissa = ctx.TempGPReg().cvt32(); + + c.movzx(mantissa, src); + + // Extract sign, exponent, and mantissa + c.mov(sign, mantissa); + c.and_(sign, 0x8000); + c.shl(sign, 16); + c.mov(exponent, mantissa); + c.and_(exponent, 0x7C00); + c.shr(exponent, 10); + c.and_(mantissa, 0x03FF); + + // Check for zero exponent and mantissa + c.test(exponent, exponent); + c.jnz(nonzero_exp); + c.test(mantissa, mantissa); + c.jz(zero_mantissa); + + // Nromalize subnormal number + c.mov(exponent, 1); + c.L(norm_loop); + c.test(mantissa, 0x400); + c.jnz(norm_done); + c.shl(mantissa, 1); + c.dec(exponent); + c.jmp(norm_loop); + c.L(norm_done); + c.and_(mantissa, 0x03FF); + c.jmp(normal); + + // Zero mantissa + c.L(zero_mantissa); + c.and_(mantissa, sign); + c.jmp(done); + + // Non-zero exponent + c.L(nonzero_exp); + c.cmp(exponent, 0x1F); + c.jne(normal); + + // Infinite or NaN + c.shl(mantissa, 13); + c.or_(mantissa, sign); + c.or_(mantissa, 0x7F800000); + c.jmp(done); + + // Normal number + c.L(normal); + c.add(exponent, 112); + c.shl(exponent, 23); + c.shl(mantissa, 13); + c.or_(mantissa, sign); + c.or_(mantissa, exponent); + + c.L(done); + if (dest.isMEM()) { + c.mov(dest, mantissa); + } else { + c.movd(dest.getReg().cvt128(), mantissa); + } + + ctx.PopTempGPReg(); + ctx.PopTempGPReg(); + ctx.PopTempGPReg(); +} + +void EmitInlineF32ToF16(EmitContext& ctx, const Operand& dest, const Operand& src) { + CodeGenerator& c = ctx.Code(); + Label zero_exp, underflow, overflow, done; + Reg sign = ctx.TempGPReg().cvt32(); + Reg exponent = ctx.TempGPReg().cvt32(); + Reg mantissa = dest.isMEM() ? ctx.TempGPReg().cvt32() : dest.getReg().cvt32(); + + if (src.isMEM()) { + c.mov(mantissa, src); + } else { + c.movd(mantissa, src.getReg().cvt128()); + } + + // Extract sign, exponent, and mantissa + c.mov(exponent, mantissa); + c.mov(sign, mantissa); + c.and_(exponent, 0x7F800000); + c.and_(mantissa, 0x007FFFFF); + c.shr(exponent, 23); + c.shl(mantissa, 3); + c.shr(sign, 16); + c.and_(sign, 0x8000); + + // Subnormal numbers will be zero + c.test(exponent, exponent); + c.jz(zero_exp); + + // Check for overflow and underflow + c.sub(exponent, 112); + c.cmp(exponent, 0); + c.jle(underflow); + c.cmp(exponent, 0x1F); + c.jge(overflow); + + // Normal number + c.shl(exponent, 10); + c.shr(mantissa, 13); + c.or_(mantissa, exponent); + c.or_(mantissa, sign); + c.jmp(done); + + // Undeflow + c.L(underflow); + c.xor_(mantissa, mantissa); + c.jmp(done); + + // Overflow + c.L(overflow); + c.mov(mantissa, 0x7C00); + c.or_(mantissa, sign); + c.jmp(done); + + // Zero value + c.L(zero_exp); + c.and_(mantissa, sign); + + c.L(done); + if (dest.isMEM()) { + c.mov(dest, mantissa); + } else { + c.and_(mantissa, 0xFFFF); + } + + ctx.PopTempGPReg(); + ctx.PopTempGPReg(); + ctx.PopTempGPReg(); +} + } // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/x64_utils.h b/src/shader_recompiler/backend/asm_x64/x64_utils.h index 2d665653d..c22dbfc77 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_utils.h +++ b/src/shader_recompiler/backend/asm_x64/x64_utils.h @@ -19,5 +19,7 @@ void MovFloat(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& void MovDouble(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src); void MovGP(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src); void MovValue(EmitContext& ctx, const Operands& dst, const IR::Value& src); +void EmitInlineF16ToF32(EmitContext& ctx, const Xbyak::Operand& dest, const Xbyak::Operand& src); +void EmitInlineF32ToF16(EmitContext& ctx, const Xbyak::Operand& dest, const Xbyak::Operand& src); } // namespace Shader::Backend::X64 \ No newline at end of file From a578c7db626ac12af9fa5feab743298c4af43857 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sun, 6 Apr 2025 20:30:38 +0200 Subject: [PATCH 40/49] Finish emits --- CMakeLists.txt | 6 + .../backend/asm_x64/emit_x64.cpp | 29 +- .../backend/asm_x64/emit_x64_barrier.cpp | 2 +- .../asm_x64/emit_x64_bitwise_conversion.cpp | 8 +- .../backend/asm_x64/emit_x64_composite.cpp | 2 +- .../asm_x64/emit_x64_context_get_set.cpp | 4 +- .../backend/asm_x64/emit_x64_convert.cpp | 57 +- .../asm_x64/emit_x64_floating_point.cpp | 106 ++-- .../backend/asm_x64/emit_x64_image.cpp | 2 +- .../backend/asm_x64/emit_x64_instructions.h | 207 ++++---- .../backend/asm_x64/emit_x64_integer.cpp | 502 ++++++++++++++++++ .../backend/asm_x64/emit_x64_logical.cpp | 40 ++ .../backend/asm_x64/emit_x64_select.cpp | 71 +++ .../backend/asm_x64/emit_x64_special.cpp | 55 ++ .../backend/asm_x64/emit_x64_undefined.cpp | 28 + .../backend/asm_x64/emit_x64_warp.cpp | 32 ++ .../backend/asm_x64/x64_emit_context.cpp | 10 +- .../backend/asm_x64/x64_emit_context.h | 15 +- .../compute_value/do_integer_operations.cpp | 3 +- .../ir/compute_value/imm_value.cpp | 56 ++ .../ir/compute_value/imm_value.h | 3 + 21 files changed, 1029 insertions(+), 209 deletions(-) create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_integer.cpp create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_logical.cpp create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_select.cpp create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_special.cpp create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_undefined.cpp create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_warp.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index e067040b9..be2482698 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -908,7 +908,13 @@ if (ARCHITECTURE STREQUAL "x86_64") src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h + src/shader_recompiler/backend/asm_x64/emit_x64_integer.cpp + src/shader_recompiler/backend/asm_x64/emit_x64_logical.cpp + src/shader_recompiler/backend/asm_x64/emit_x64_select.cpp src/shader_recompiler/backend/asm_x64/emit_x64_shared_memory.cpp + src/shader_recompiler/backend/asm_x64/emit_x64_special.cpp + src/shader_recompiler/backend/asm_x64/emit_x64_undefined.cpp + src/shader_recompiler/backend/asm_x64/emit_x64_warp.cpp src/shader_recompiler/backend/asm_x64/emit_x64.cpp src/shader_recompiler/backend/asm_x64/emit_x64.h src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64.cpp index e128216fc..9464bd36b 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64.cpp @@ -38,7 +38,7 @@ static void EmitCondition(EmitContext& ctx, const IR::Inst* ref, Label& label, b } template -ArgType Arg(EmitContext& ctx, const IR::Value& arg) { +std::remove_reference_t Arg(EmitContext& ctx, const IR::Value& arg) { if constexpr (std::is_same_v) { return ctx.Def(arg); } else if constexpr (std::is_same_v) { @@ -114,9 +114,24 @@ void EmitInst(EmitContext& ctx, IR::Inst* inst) { UNREACHABLE_MSG("Invalid opcode {}", inst->GetOpcode()); } +static bool IsLastInst(const IR::AbstractSyntaxList& list, IR::AbstractSyntaxList::const_iterator it) { + for (; it != list.end(); ++it) { + switch (it->type) { + case IR::AbstractSyntaxNode::Type::Return: + case IR::AbstractSyntaxNode::Type::Loop: + case IR::AbstractSyntaxNode::Type::EndIf: + continue; + default: + return false; + } + } + return true; +} + void Traverse(EmitContext& ctx, const IR::Program& program) { CodeGenerator& c = ctx.Code(); - for (const IR::AbstractSyntaxNode& node : program.syntax_list) { + for (auto it = program.syntax_list.begin(); it != program.syntax_list.end(); ++it) { + const IR::AbstractSyntaxNode& node = *it; ctx.ResetTempRegs(); switch (node.type) { case IR::AbstractSyntaxNode::Type::Block: { @@ -130,6 +145,9 @@ void Traverse(EmitContext& ctx, const IR::Program& program) { MovValue(ctx, ctx.Def(phi), value); } } + if (ctx.EndFlag() && IsLastInst(program.syntax_list, it)) { + c.jmp(ctx.EndLabel()); + } break; } case IR::AbstractSyntaxNode::Type::If: { @@ -148,17 +166,14 @@ void Traverse(EmitContext& ctx, const IR::Program& program) { IR::Inst* ref = node.data.break_node.cond.InstRecursive(); Label& merge = ctx.BlockLabel(node.data.break_node.merge); EmitCondition(ctx, ref, merge, true); - +c.jz(merge); - break; - } - case IR::AbstractSyntaxNode::Type::Return: { - c.jmp(ctx.EndLabel()); + c.jz(merge); break; } case IR::AbstractSyntaxNode::Type::Unreachable: { c.int3(); break; } + case IR::AbstractSyntaxNode::Type::Return: case IR::AbstractSyntaxNode::Type::Loop: case IR::AbstractSyntaxNode::Type::EndIf: break; diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_barrier.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_barrier.cpp index 62df58ae9..b610b9c8d 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_barrier.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_barrier.cpp @@ -17,4 +17,4 @@ void EmitDeviceMemoryBarrier(EmitContext& ctx) { } -} \ No newline at end of file +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp index 14d6d77ac..0a4ecc96b 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp @@ -65,7 +65,7 @@ void EmitBitCastF64U64(EmitContext& ctx, const Operands& dest, const Operands& s void EmitPackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& src) { const bool is_mem = dest[0].isMEM() && (src[0].isMEM() || src[1].isMEM()); - Reg tmp = is_mem ? ctx.TempGPReg(false) : dest[0].getReg(); + Reg tmp = is_mem ? ctx.TempGPReg() : dest[0].getReg(); MovGP(ctx, tmp, src[1]); ctx.Code().shl(tmp, 32); ctx.Code().or_(tmp, src[0]); @@ -75,7 +75,7 @@ void EmitPackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& sr void EmitUnpackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& src) { Reg src0 = src[0].isMEM() ? ctx.TempGPReg() : src[0].getReg(); MovGP(ctx, src0, src[0]); - Reg dest1 = dest[1].isMEM() ? ctx.TempGPReg(false) : dest[1].getReg().changeBit(64); + Reg dest1 = dest[1].isMEM() ? ctx.TempGPReg() : dest[1].getReg().changeBit(64); MovGP(ctx, dest1, src0); ctx.Code().shr(dest1, 32); MovGP(ctx, dest[1], dest1); @@ -83,7 +83,7 @@ void EmitUnpackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& } void EmitPackFloat2x32(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovFloat(ctx, tmp, src[0]); ctx.Code().pinsrd(tmp, src[1], 1); MovFloat(ctx, dest[0], tmp); @@ -201,4 +201,4 @@ void EmitUnpackSint2_10_10_10(EmitContext& ctx) { throw NotImplementedException("UnpackSint2_10_10_10"); } -} \ No newline at end of file +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp index 910fd2cec..2421553bd 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp @@ -347,4 +347,4 @@ void EmitCompositeShuffleF64x4(EmitContext& ctx, const Operands& dest, const Ope MovDouble(ctx, dest[3], GetSuffleOperand<4>(composite1, composite2, idx4)); } -} \ No newline at end of file +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp index 3669b3708..169a8d85a 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp @@ -58,7 +58,7 @@ void EmitGetGotoVariable(EmitContext&) { } void EmitReadConst(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset) { - Reg& tmp = ctx.TempGPReg(false); + Reg& tmp = ctx.TempGPReg(); MovGP(ctx, tmp, base[1]); ctx.Code().shl(tmp, 32); ctx.Code().or_(tmp, base[0]); @@ -198,4 +198,4 @@ void EmitStoreBufferFormatF32(EmitContext& ctx) { throw NotImplementedException("StoreBufferFormatF32"); } -} \ No newline at end of file +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp index f9ca78432..48ebf4fa5 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp @@ -10,7 +10,7 @@ using namespace Xbyak; using namespace Xbyak::util; void EmitConvertS16F16(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp_xmm = ctx.TempXmmReg(false); + Xmm tmp_xmm = ctx.TempXmmReg(); Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); EmitInlineF16ToF32(ctx, tmp_xmm, src[0]); ctx.Code().cvttss2si(tmp_reg, tmp_xmm); @@ -19,21 +19,21 @@ void EmitConvertS16F16(EmitContext& ctx, const Operands& dest, const Operands& s } void EmitConvertS16F32(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); ctx.Code().cvttss2si(tmp, src[0]); ctx.Code().and_(tmp, 0xFFFF); MovGP(ctx, dest[0], tmp); } void EmitConvertS16F64(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); ctx.Code().cvttsd2si(tmp, src[0]); ctx.Code().and_(tmp, 0xFFFF); MovGP(ctx, dest[0], tmp); } void EmitConvertS32F16(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp_xmm = ctx.TempXmmReg(false); + Xmm tmp_xmm = ctx.TempXmmReg(); Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); EmitInlineF16ToF32(ctx, tmp_xmm, src[0]); ctx.Code().cvttss2si(tmp_reg, tmp_xmm); @@ -41,19 +41,19 @@ void EmitConvertS32F16(EmitContext& ctx, const Operands& dest, const Operands& s } void EmitConvertS32F32(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); ctx.Code().cvttss2si(tmp, src[0]); MovGP(ctx, dest[0], tmp); } void EmitConvertS32F64(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); ctx.Code().cvttsd2si(tmp, src[0]); MovGP(ctx, dest[0], tmp); } void EmitConvertS64F16(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp_xmm = ctx.TempXmmReg(false); + Xmm tmp_xmm = ctx.TempXmmReg(); Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg() : dest[0].getReg(); EmitInlineF16ToF32(ctx, tmp_xmm, src[0]); ctx.Code().cvttss2si(tmp_reg, tmp_xmm); @@ -61,13 +61,13 @@ void EmitConvertS64F16(EmitContext& ctx, const Operands& dest, const Operands& s } void EmitConvertS64F32(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg() : dest[0].getReg(); ctx.Code().cvttss2si(tmp, src[0]); MovGP(ctx, dest[0], tmp); } void EmitConvertS64F64(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg() : dest[0].getReg(); ctx.Code().cvttsd2si(tmp, src[0]); MovGP(ctx, dest[0], tmp); } @@ -125,20 +125,20 @@ void EmitConvertF32F16(EmitContext& ctx, const Operands& dest, const Operands& s } void EmitConvertF32F64(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().cvtsd2ss(tmp, src[0]); MovFloat(ctx, dest[0], tmp); } void EmitConvertF64F32(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().cvtss2sd(tmp, src[0]); MovDouble(ctx, dest[0], tmp); } void EmitConvertF16S8(EmitContext& ctx, const Operands& dest, const Operands& src) { Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); - Xmm tmp_xmm = ctx.TempXmmReg(false); + Xmm tmp_xmm = ctx.TempXmmReg(); ctx.Code().movsx(tmp_reg, src[0]); ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg); EmitInlineF32ToF16(ctx, dest[0], tmp_xmm); @@ -146,20 +146,20 @@ void EmitConvertF16S8(EmitContext& ctx, const Operands& dest, const Operands& sr void EmitConvertF16S16(EmitContext& ctx, const Operands& dest, const Operands& src) { Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); - Xmm tmp_xmm = ctx.TempXmmReg(false); + Xmm tmp_xmm = ctx.TempXmmReg(); ctx.Code().movsx(tmp_reg, src[0]); ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg); EmitInlineF32ToF16(ctx, dest[0], tmp_xmm); } void EmitConvertF16S32(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp = ctx.TempXmmReg(false); + Xmm tmp = ctx.TempXmmReg(); ctx.Code().cvtsi2ss(tmp, src[0]); EmitInlineF32ToF16(ctx, dest[0], tmp); } void EmitConvertF16S64(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp = ctx.TempXmmReg(false); + Xmm tmp = ctx.TempXmmReg(); ctx.Code().cvtsi2ss(tmp, src[0]); EmitInlineF32ToF16(ctx, dest[0], tmp); } @@ -181,29 +181,29 @@ void EmitConvertF16U64(EmitContext& ctx, const Operands& dest, const Operands& s } void EmitConvertF32S8(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp_reg = ctx.TempGPReg(false).cvt32(); - Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Reg tmp_reg = ctx.TempGPReg().cvt32(); + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().movsx(tmp_reg, src[0]); ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg); MovFloat(ctx, dest[0], tmp_xmm); } void EmitConvertF32S16(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp_reg = ctx.TempGPReg(false).cvt32(); - Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Reg tmp_reg = ctx.TempGPReg().cvt32(); + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().movsx(tmp_reg, src[0]); ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg); MovFloat(ctx, dest[0], tmp_xmm); } void EmitConvertF32S32(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().cvtsi2ss(tmp, src[0]); MovFloat(ctx, dest[0], tmp); } void EmitConvertF32S64(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().cvtsi2ss(tmp, src[0]); MovFloat(ctx, dest[0], tmp); } @@ -225,29 +225,29 @@ void EmitConvertF32U64(EmitContext& ctx, const Operands& dest, const Operands& s } void EmitConvertF64S8(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp_reg = ctx.TempGPReg(false).cvt32(); - Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Reg tmp_reg = ctx.TempGPReg().cvt32(); + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().movsx(tmp_reg, src[0]); ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg); MovDouble(ctx, dest[0], tmp_xmm); } void EmitConvertF64S16(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp_reg = ctx.TempGPReg(false).cvt32(); - Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Reg tmp_reg = ctx.TempGPReg().cvt32(); + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().movsx(tmp_reg, src[0]); ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg); MovDouble(ctx, dest[0], tmp_xmm); } void EmitConvertF64S32(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().cvtsi2sd(tmp, src[0]); MovDouble(ctx, dest[0], tmp); } void EmitConvertF64S64(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().cvtsi2sd(tmp, src[0]); MovDouble(ctx, dest[0], tmp); } @@ -276,5 +276,4 @@ void EmitConvertU32U16(EmitContext& ctx, const Operands& dest, const Operands& s MovGP(ctx, dest[0], src[0]); } -} - +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp index d209b1e36..588b1ed2d 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp @@ -13,15 +13,15 @@ using namespace Xbyak::util; void EmitFPAbs16(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt16() : dest[0].getReg().cvt16(); + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg().cvt16() : dest[0].getReg().cvt16(); MovGP(ctx, tmp, src[0]); ctx.Code().and_(tmp, 0x7FFF); MovGP(ctx, dest[0], tmp); } void EmitFPAbs32(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg reg_tmp = ctx.TempXmmReg(false); - Xmm xmm_tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Reg reg_tmp = ctx.TempXmmReg(); + Xmm xmm_tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().mov(reg_tmp, 0x7FFFFFFF); ctx.Code().movd(xmm_tmp, reg_tmp); ctx.Code().andps(xmm_tmp, src[0]); @@ -29,8 +29,8 @@ void EmitFPAbs32(EmitContext& ctx, const Operands& dest, const Operands& src) { } void EmitFPAbs64(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg reg_tmp = ctx.TempGPReg(false); - Xmm xmm_tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Reg reg_tmp = ctx.TempGPReg(); + Xmm xmm_tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().mov(reg_tmp, 0x7FFFFFFFFFFFFFFF); ctx.Code().movq(xmm_tmp, reg_tmp); ctx.Code().andpd(xmm_tmp, src[0]); @@ -47,21 +47,21 @@ void EmitFPAdd16(EmitContext& ctx, const Operands& dest, const Operands& op1, co } void EmitFPAdd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovFloat(ctx, tmp, op1[0]); ctx.Code().addss(tmp, op2[0]); MovFloat(ctx, dest[0], tmp); } void EmitFPAdd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovDouble(ctx, tmp, op1[0]); ctx.Code().addsd(tmp, op2[0]); MovDouble(ctx, dest[0], tmp); } void EmitFPSub32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovFloat(ctx, tmp, op1[0]); ctx.Code().subss(tmp, op2[0]); MovFloat(ctx, dest[0], tmp); @@ -112,7 +112,7 @@ void EmitFPMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, co ctx.Code().orps(tmp2, tmp1); MovFloat(ctx, dest[0], tmp2); } else { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovFloat(ctx, tmp, op1[0]); ctx.Code().maxss(tmp, op2[0]); MovFloat(ctx, dest[0], tmp); @@ -120,7 +120,7 @@ void EmitFPMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, co } void EmitFPMax64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovDouble(ctx, tmp, op1[0]); ctx.Code().maxsd(tmp, op2[0]); MovDouble(ctx, dest[0], tmp); @@ -138,7 +138,7 @@ void EmitFPMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, co ctx.Code().orps(tmp2, tmp1); MovFloat(ctx, dest[0], tmp2); } else { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovFloat(ctx, tmp, op1[0]); ctx.Code().minss(tmp, op2[0]); MovFloat(ctx, dest[0], tmp); @@ -146,7 +146,7 @@ void EmitFPMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, co } void EmitFPMin64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovDouble(ctx, tmp, op1[0]); ctx.Code().minsd(tmp, op2[0]); MovDouble(ctx, dest[0], tmp); @@ -162,43 +162,43 @@ void EmitFPMul16(EmitContext& ctx, const Operands& dest, const Operands& op1, co } void EmitFPMul32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovFloat(ctx, tmp, op1[0]); ctx.Code().mulss(tmp, op2[0]); MovFloat(ctx, dest[0], tmp); } void EmitFPMul64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovDouble(ctx, tmp, op1[0]); ctx.Code().mulsd(tmp, op2[0]); MovDouble(ctx, dest[0], tmp); } void EmitFPDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovFloat(ctx, tmp, op1[0]); ctx.Code().divss(tmp, op2[0]); MovFloat(ctx, dest[0], tmp); } void EmitFPDiv64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovDouble(ctx, tmp, op1[0]); ctx.Code().divsd(tmp, op2[0]); MovDouble(ctx, dest[0], tmp); } void EmitFPNeg16(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt16() : dest[0].getReg().cvt16(); + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg().cvt16() : dest[0].getReg().cvt16(); MovGP(ctx, tmp, op1[0]); ctx.Code().xor_(tmp, 0x8000); MovGP(ctx, dest[0], tmp); } void EmitFPNeg32(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); - Reg tmp_reg = ctx.TempGPReg(false).cvt32(); + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Reg tmp_reg = ctx.TempGPReg().cvt32(); ctx.Code().mov(tmp_reg, 0x80000000); ctx.Code().movd(tmp_xmm, tmp_reg); ctx.Code().xorps(tmp_xmm, op1[0]); @@ -206,8 +206,8 @@ void EmitFPNeg32(EmitContext& ctx, const Operands& dest, const Operands& op1) { } void EmitFPNeg64(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); - Reg tmp_reg = ctx.TempXmmReg(false); + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Reg tmp_reg = ctx.TempXmmReg(); ctx.Code().mov(tmp_reg, 0x8000000000000000); ctx.Code().movq(tmp_xmm, tmp_reg); ctx.Code().xorpd(tmp_xmm, op1[0]); @@ -236,14 +236,14 @@ void EmitFPLog2(EmitContext& ctx) { } void EmitFPRecip32(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().rcpss(tmp, op1[0]); MovFloat(ctx, dest[0], tmp); } void EmitFPRecip64(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); - Reg tmp_reg = ctx.TempGPReg(false); + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Reg tmp_reg = ctx.TempGPReg(); ctx.Code().mov(tmp_reg, 1); ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg); ctx.Code().divsd(tmp_xmm, op1[0]); @@ -251,14 +251,14 @@ void EmitFPRecip64(EmitContext& ctx, const Operands& dest, const Operands& op1) } void EmitFPRecipSqrt32(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().rsqrtss(tmp, op1[0]); MovFloat(ctx, dest[0], tmp); } void EmitFPRecipSqrt64(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); - Reg tmp_reg = ctx.TempGPReg(false); + Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Reg tmp_reg = ctx.TempGPReg(); ctx.Code().mov(tmp_reg, 1); ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg); ctx.Code().divsd(tmp_xmm, op1[0]); @@ -267,7 +267,7 @@ void EmitFPRecipSqrt64(EmitContext& ctx, const Operands& dest, const Operands& o } void EmitFPSqrt(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().sqrtss(tmp, op1[0]); MovFloat(ctx, dest[0], tmp); } @@ -297,7 +297,7 @@ void EmitFPClamp16(EmitContext& ctx, const Operands& dest, const Operands& op, c } void EmitFPClamp32(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovFloat(ctx, tmp, op[0]); ctx.Code().maxss(tmp, min[0]); ctx.Code().minss(tmp, max[0]); @@ -305,7 +305,7 @@ void EmitFPClamp32(EmitContext& ctx, const Operands& dest, const Operands& op, c } void EmitFPClamp64(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovDouble(ctx, tmp, op[0]); ctx.Code().maxsd(tmp, min[0]); ctx.Code().minsd(tmp, max[0]); @@ -320,13 +320,13 @@ void EmitFPRoundEven16(EmitContext& ctx, const Operands& dest, const Operands& o } void EmitFPRoundEven32(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().roundss(tmp, op1[0], 0x00); MovFloat(ctx, dest[0], tmp); } void EmitFPRoundEven64(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().roundsd(tmp, op1[0], 0x00); MovDouble(ctx, dest[0], tmp); } @@ -339,13 +339,13 @@ void EmitFPFloor16(EmitContext& ctx, const Operands& dest, const Operands& op1) } void EmitFPFloor32(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().roundss(tmp, op1[0], 0x01); MovFloat(ctx, dest[0], tmp); } void EmitFPFloor64(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().roundsd(tmp, op1[0], 0x01); MovDouble(ctx, dest[0], tmp); } @@ -358,13 +358,13 @@ void EmitFPCeil16(EmitContext& ctx, const Operands& dest, const Operands& op1) { } void EmitFPCeil32(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().roundss(tmp, op1[0], 0x02); MovFloat(ctx, dest[0], tmp); } void EmitFPCeil64(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); ctx.Code().roundsd(tmp, op1[0], 0x02); MovDouble(ctx, dest[0], tmp); } @@ -439,14 +439,14 @@ void EmitFPUnordEqual16(EmitContext& ctx, const Operands& dest, const Operands& } void EmitFPUnordEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); MovFloat(ctx, tmp, lhs[0]); ctx.Code().ucomiss(tmp, rhs[0]); ctx.Code().sete(dest[0]); } void EmitFPUnordEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); MovDouble(ctx, tmp, lhs[0]); ctx.Code().ucomisd(tmp, rhs[0]); ctx.Code().sete(dest[0]); @@ -462,7 +462,7 @@ void EmitFPOrdNotEqual16(EmitContext& ctx, const Operands& dest, const Operands& void EmitFPOrdNotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { Label not_nan; - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); ctx.Code().jnp(not_nan); ctx.Code().mov(dest[0], 0); ctx.Code().L(not_nan); @@ -486,14 +486,14 @@ void EmitFPUnordNotEqual16(EmitContext& ctx, const Operands& dest, const Operand } void EmitFPUnordNotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); MovFloat(ctx, tmp, lhs[0]); ctx.Code().ucomiss(tmp, rhs[0]); ctx.Code().setne(dest[0]); } void EmitFPUnordNotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); MovDouble(ctx, tmp, lhs[0]); ctx.Code().ucomisd(tmp, rhs[0]); ctx.Code().setne(dest[0]); @@ -533,14 +533,14 @@ void EmitFPUnordLessThan16(EmitContext& ctx, const Operands& dest, const Operand } void EmitFPUnordLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); MovFloat(ctx, tmp, lhs[0]); ctx.Code().ucomiss(tmp, rhs[0]); ctx.Code().setb(dest[0]); } void EmitFPUnordLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); MovDouble(ctx, tmp, lhs[0]); ctx.Code().ucomisd(tmp, rhs[0]); ctx.Code().setb(dest[0]); @@ -580,14 +580,14 @@ void EmitFPUnordGreaterThan16(EmitContext& ctx, const Operands& dest, const Oper } void EmitFPUnordGreaterThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); MovFloat(ctx, tmp, lhs[0]); ctx.Code().ucomiss(tmp, rhs[0]); ctx.Code().seta(dest[0]); } void EmitFPUnordGreaterThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); MovDouble(ctx, tmp, lhs[0]); ctx.Code().ucomisd(tmp, rhs[0]); ctx.Code().seta(dest[0]); @@ -627,14 +627,14 @@ void EmitFPUnordLessThanEqual16(EmitContext& ctx, const Operands& dest, const Op } void EmitFPUnordLessThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); MovFloat(ctx, tmp, lhs[0]); ctx.Code().ucomiss(tmp, rhs[0]); ctx.Code().setbe(dest[0]); } void EmitFPUnordLessThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); MovDouble(ctx, tmp, lhs[0]); ctx.Code().ucomisd(tmp, rhs[0]); ctx.Code().setbe(dest[0]); @@ -646,7 +646,6 @@ void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, const Operands& dest, const O ctx.Code().jnp(not_nan); ctx.Code().mov(dest[0], 0); ctx.Code().L(not_nan); - ctx.Code().vfpclassss(tmp1, tmp2); } void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { @@ -675,14 +674,14 @@ void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, const Operands& dest, const } void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); MovFloat(ctx, tmp, lhs[0]); ctx.Code().ucomiss(tmp, rhs[0]); ctx.Code().setae(dest[0]); } void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg(false) : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); MovDouble(ctx, tmp, lhs[0]); ctx.Code().ucomisd(tmp, rhs[0]); ctx.Code().setae(dest[0]); @@ -696,14 +695,14 @@ void EmitFPIsNan16(EmitContext& ctx, const Operands& dest, const Operands& op) { } void EmitFPIsNan32(EmitContext& ctx, const Operands& dest, const Operands& op) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovFloat(ctx, tmp, op[0]); ctx.Code().ucomiss(tmp, tmp); ctx.Code().setp(dest[0]); } void EmitFPIsNan64(EmitContext& ctx, const Operands& dest, const Operands& op) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); MovDouble(ctx, tmp, op[0]); ctx.Code().ucomisd(tmp, tmp); ctx.Code().setp(dest[0]); @@ -720,4 +719,5 @@ void EmitFPIsInf64(EmitContext& ctx) { void EmitFPCmpClass32(EmitContext&) { UNREACHABLE(); } -} \ No newline at end of file + +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp index bc0e436e1..33b53e6ce 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp @@ -59,4 +59,4 @@ void EmitCubeFaceIndex(EmitContext& ctx) { throw NotImplementedException("CubeFaceIndex"); } -} \ No newline at end of file +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h b/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h index 9b34ff40b..5725bbc56 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h @@ -25,7 +25,7 @@ class EmitContext; void EmitPhi(EmitContext& ctx); void EmitVoid(EmitContext& ctx); void EmitIdentity(EmitContext& ctx); -void EmitConditionRef(EmitContext& ctx, const IR::Value& value); +void EmitConditionRef(EmitContext& ctx); void EmitReference(EmitContext&); void EmitPhiMove(EmitContext&); void EmitJoin(EmitContext& ctx); @@ -47,8 +47,8 @@ void EmitFPCmpClass32(EmitContext& ctx); void EmitPrologue(EmitContext& ctx); void EmitEpilogue(EmitContext& ctx); void EmitDiscard(EmitContext& ctx); -void EmitDiscardCond(EmitContext& ctx, Id condition); -void EmitDebugPrint(EmitContext& ctx, IR::Inst* inst, Id arg0, Id arg1, Id arg2, Id arg3, Id arg4); +void EmitDiscardCond(EmitContext& ctx, const Operands& condition); +void EmitDebugPrint(EmitContext& ctx); void EmitBarrier(EmitContext& ctx); void EmitWorkgroupMemoryBarrier(EmitContext& ctx); void EmitDeviceMemoryBarrier(EmitContext& ctx); @@ -98,28 +98,27 @@ void EmitBufferAtomicAnd32(EmitContext& ctx); void EmitBufferAtomicOr32(EmitContext& ctx); void EmitBufferAtomicXor32(EmitContext& ctx); void EmitBufferAtomicSwap32(EmitContext& ctx); -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index); -Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp); -void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp); -Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index); -void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index); -Id EmitReadTcsGenericOuputAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, - Id comp_index); -Id EmitGetPatch(EmitContext& ctx, IR::Patch patch); -void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value); -void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); -void EmitSetSampleMask(EmitContext& ctx, Id value); -void EmitSetFragDepth(EmitContext& ctx, Id value); -Id EmitWorkgroupId(EmitContext& ctx); -Id EmitLocalInvocationId(EmitContext& ctx); -Id EmitInvocationId(EmitContext& ctx); -Id EmitInvocationInfo(EmitContext& ctx); -Id EmitSampleId(EmitContext& ctx); -Id EmitUndefU1(EmitContext& ctx); -Id EmitUndefU8(EmitContext& ctx); -Id EmitUndefU16(EmitContext& ctx); -Id EmitUndefU32(EmitContext& ctx); -Id EmitUndefU64(EmitContext& ctx); +void EmitGetAttribute(EmitContext& ctx); +void EmitGetAttributeU32(EmitContext& ctx); +void EmitSetAttribute(EmitContext& ctx); +void EmitGetTessGenericAttribute(EmitContext& ctx); +void EmitSetTcsGenericAttribute(EmitContext& ctx); +void EmitReadTcsGenericOuputAttribute(EmitContext& ctx); +void EmitGetPatch(EmitContext& ctx); +void EmitSetPatch(EmitContext& ctx); +void EmitSetFragColor(EmitContext& ctx); +void EmitSetSampleMask(EmitContext& ctx); +void EmitSetFragDepth(EmitContext& ctx); +void EmitWorkgroupId(EmitContext& ctx); +void EmitLocalInvocationId(EmitContext& ctx); +void EmitInvocationId(EmitContext& ctx); +void EmitInvocationInfo(EmitContext& ctx); +void EmitSampleId(EmitContext& ctx); +void EmitUndefU1(EmitContext& ctx); +void EmitUndefU8(EmitContext& ctx); +void EmitUndefU16(EmitContext& ctx); +void EmitUndefU32(EmitContext& ctx); +void EmitUndefU64(EmitContext& ctx); void EmitLoadSharedU32(EmitContext& ctx); void EmitLoadSharedU64(EmitContext& ctx); void EmitWriteSharedU32(EmitContext& ctx); @@ -157,7 +156,7 @@ void EmitCompositeInsertF16x4(EmitContext& ctx, const Operands& dest, const Oper void EmitCompositeShuffleF16x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2); void EmitCompositeShuffleF16x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3); void EmitCompositeShuffleF16x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4); -void EmitCompositeConstructF32x2(EmitContext& ctx, const Operands& dest, const OpEmitFPAbs16erands& src1, const Operands& src2); +void EmitCompositeConstructF32x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2); void EmitCompositeConstructF32x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3); void EmitCompositeConstructF32x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4); void EmitCompositeConstructF32x2x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2); @@ -182,14 +181,14 @@ void EmitCompositeInsertF64x4(EmitContext& ctx, const Operands& dest, const Oper void EmitCompositeShuffleF64x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2); void EmitCompositeShuffleF64x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3); void EmitCompositeShuffleF64x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4); -Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value); +void EmitSelectU1(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value); +void EmitSelectU8(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value); +void EmitSelectU16(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value); +void EmitSelectU32(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value); +void EmitSelectU64(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value); +void EmitSelectF16(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value); +void EmitSelectF32(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value); +void EmitSelectF64(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value); void EmitBitCastU16F16(EmitContext& ctx, const Operands& dest, const Operands& src); void EmitBitCastU32F32(EmitContext& ctx, const Operands& dest, const Operands& src); void EmitBitCastU64F64(EmitContext& ctx, const Operands& dest, const Operands& src); @@ -324,68 +323,68 @@ void EmitFPIsNan32(EmitContext& ctx, const Operands& dest, const Operands& src); void EmitFPIsNan64(EmitContext& ctx, const Operands& dest, const Operands& src); void EmitFPIsInf32(EmitContext& ctx); void EmitFPIsInf64(EmitContext& ctx); -Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitIAdd64(EmitContext& ctx, Id a, Id b); -Id EmitIAddCary32(EmitContext& ctx, Id a, Id b); -Id EmitISub32(EmitContext& ctx, Id a, Id b); -Id EmitISub64(EmitContext& ctx, Id a, Id b); -Id EmitSMulExt(EmitContext& ctx, Id a, Id b); -Id EmitUMulExt(EmitContext& ctx, Id a, Id b); -Id EmitIMul32(EmitContext& ctx, Id a, Id b); -Id EmitIMul64(EmitContext& ctx, Id a, Id b); -Id EmitSDiv32(EmitContext& ctx, Id a, Id b); -Id EmitUDiv32(EmitContext& ctx, Id a, Id b); -Id EmitSMod32(EmitContext& ctx, Id a, Id b); -Id EmitUMod32(EmitContext& ctx, Id a, Id b); -Id EmitINeg32(EmitContext& ctx, Id value); -Id EmitINeg64(EmitContext& ctx, Id value); -Id EmitIAbs32(EmitContext& ctx, Id value); -Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); -Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift); -Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift); -Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift); -Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift); -Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift); -Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitBitwiseAnd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitBitwiseOr64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count); -Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); -Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); -Id EmitBitReverse32(EmitContext& ctx, Id value); -Id EmitBitCount32(EmitContext& ctx, Id value); -Id EmitBitCount64(EmitContext& ctx, Id value); -Id EmitBitwiseNot32(EmitContext& ctx, Id value); -Id EmitFindSMsb32(EmitContext& ctx, Id value); -Id EmitFindUMsb32(EmitContext& ctx, Id value); -Id EmitFindILsb32(EmitContext& ctx, Id value); -Id EmitFindILsb64(EmitContext& ctx, Id value); -Id EmitSMin32(EmitContext& ctx, Id a, Id b); -Id EmitUMin32(EmitContext& ctx, Id a, Id b); -Id EmitSMax32(EmitContext& ctx, Id a, Id b); -Id EmitUMax32(EmitContext& ctx, Id a, Id b); -Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); -Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); -Id EmitSLessThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSLessThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitULessThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitULessThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitIEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitIEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); -Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); -Id EmitINotEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitINotEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); -Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); -Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); -Id EmitLogicalNot(EmitContext& ctx, Id value); +void EmitIAdd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitIAdd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitIAddCary32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitISub32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitISub64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitSMulExt(EmitContext& ctx,const Operands& dest, const Operands& op1, const Operands& op2); +void EmitUMulExt(EmitContext& ctx,const Operands& dest, const Operands& op1, const Operands& op2); +void EmitIMul32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitIMul64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitSDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitUDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitSMod32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitUMod32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitINeg32(EmitContext& ctx, const Operands& dest, const Operands& op); +void EmitINeg64(EmitContext& ctx, const Operands& dest, const Operands& op); +void EmitIAbs32(EmitContext& ctx, const Operands& dest, const Operands& op); +void EmitShiftLeftLogical32(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift); +void EmitShiftLeftLogical64(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift); +void EmitShiftRightLogical32(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift);; +void EmitShiftRightLogical64(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift);; +void EmitShiftRightArithmetic32(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift); +void EmitShiftRightArithmetic64(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift); +void EmitBitwiseAnd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitBitwiseAnd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitBitwiseOr32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitBitwiseOr64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitBitwiseXor32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitBitFieldInsert(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& insert, const Operands& offset, const Operands& count); +void EmitBitFieldSExtract(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset, const Operands& count); +void EmitBitFieldUExtract(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset, const Operands& count); +void EmitBitReverse32(EmitContext& ctx); +void EmitBitCount32(EmitContext& ctx); +void EmitBitCount64(EmitContext& ctx); +void EmitBitwiseNot32(EmitContext& ctx, const Operands& dest, const Operands& op); +void EmitFindSMsb32(EmitContext& ctx); +void EmitFindUMsb32(EmitContext& ctx); +void EmitFindILsb32(EmitContext& ctx); +void EmitFindILsb64(EmitContext& ctx); +void EmitSMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitUMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitSMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitUMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitSClamp32(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max); +void EmitUClamp32(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max); +void EmitSLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitSLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitULessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitULessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitIEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitIEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitSLessThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitULessThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitSGreaterThan(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitUGreaterThan(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitINotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitINotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitSGreaterThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitUGreaterThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); +void EmitLogicalOr(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitLogicalAnd(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitLogicalXor(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitLogicalNot(EmitContext& ctx, const Operands& dest, const Operands& op); void EmitConvertS16F16(EmitContext& ctx, const Operands& dest, const Operands& src); void EmitConvertS16F32(EmitContext& ctx, const Operands& dest, const Operands& src); void EmitConvertS16F64(EmitContext& ctx, const Operands& dest, const Operands& src); @@ -461,17 +460,17 @@ void EmitImageAtomicAnd32(EmitContext& ctx); void EmitImageAtomicOr32(EmitContext& ctx); void EmitImageAtomicXor32(EmitContext& ctx); void EmitImageAtomicExchange32(EmitContext& ctx); -Id EmitCubeFaceIndex(EmitContext& ctx, IR::Inst* inst, Id cube_coords); -Id EmitLaneId(EmitContext& ctx); -Id EmitWarpId(EmitContext& ctx); -Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index); -Id EmitReadFirstLane(EmitContext& ctx, Id value); -Id EmitReadLane(EmitContext& ctx, Id value, u32 lane); -Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane); +void EmitCubeFaceIndex(EmitContext& ctx); +void EmitLaneId(EmitContext& ctx); +void EmitWarpId(EmitContext& ctx); +void EmitQuadShuffle(EmitContext& ctx); +void EmitReadFirstLane(EmitContext& ctx); +void EmitReadLane(EmitContext& ctx); +void EmitWriteLane(EmitContext& ctx); void EmitDataAppend(EmitContext& ctx); void EmitDataConsume(EmitContext& ctx); void EmitEmitVertex(EmitContext& ctx); void EmitEmitPrimitive(EmitContext& ctx); -} \ No newline at end of file +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_integer.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_integer.cpp new file mode 100644 index 000000000..2cc3b7c7e --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_integer.cpp @@ -0,0 +1,502 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" +#include "shader_recompiler/backend/asm_x64/x64_utils.h" + +namespace Shader::Backend::X64 { + +using namespace Xbyak; +using namespace Xbyak::util; + +namespace { + +static bool EmitSaveRegTemp(EmitContext ctx, const Reg& save, const Operand& dest) { + if (dest.getIdx() == save.getIdx()) { + // Destination is reg, no need to save + return false; + } + ctx.Code().push(save); + return true; +} + +static void EmitRestoreRegTemp(EmitContext ctx, const Reg& save) { + ctx.Code().pop(save); +} + +} // namespace + +void EmitIAdd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + if (dest[0].isREG() && op1[0].isREG() && op2[0].isREG()) { + ctx.Code().lea(dest[0].getReg(), ptr[op1[0].getReg() + op2[0].getReg()]); + } else { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().add(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); + } +} + +void EmitIAdd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + if (dest[0].isREG() && op1[0].isREG() && op2[0].isREG()) { + ctx.Code().lea(dest[0].getReg(), ptr[op1[0].getReg() + op2[0].getReg()]); + } else { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().add(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); + } +} + +void EmitIAddCary32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + Operand carry = dest[1]; + carry.setBit(1); + MovGP(ctx, tmp, op1[0]); + ctx.Code().add(tmp, op2[0]); + ctx.Code().setc(carry); +} + +void EmitISub32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().sub(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitISub64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().sub(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitSMulExt(EmitContext& ctx) { + throw NotImplementedException("SMulExtended"); +} + +void EmitUMulExt(EmitContext& ctx) { + throw NotImplementedException("UMulExtended"); +} + +void EmitIMul32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().imul(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitIMul64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().imul(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitSDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + bool rax_saved = EmitSaveRegTemp(ctx, rax, dest[0]); + bool rdx_saved = EmitSaveRegTemp(ctx, rdx, dest[0]); + Reg tmp = op2[0].getReg().cvt32(); + while (tmp.getIdx() == rax.getIdx()) { + tmp = ctx.TempGPReg().cvt32(); + } + MovGP(ctx, tmp, op2[0]); + MovGP(ctx, eax, op1[0]); + ctx.Code().idiv(tmp); + MovGP(ctx, dest[0], eax); + if (rdx_saved) { + EmitRestoreRegTemp(ctx, rdx); + } + if (rax_saved) { + EmitRestoreRegTemp(ctx, rax); + } +} + +void EmitUDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + bool rax_saved = EmitSaveRegTemp(ctx, rax, dest[0]); + bool rdx_saved = EmitSaveRegTemp(ctx, rdx, dest[0]); + Reg tmp = op2[0].getReg().cvt32(); + while (tmp.getIdx() == rax.getIdx()) { + tmp = ctx.TempGPReg().cvt32(); + } + MovGP(ctx, tmp, op2[0]); + MovGP(ctx, eax, op1[0]); + ctx.Code().div(tmp); + MovGP(ctx, dest[0], eax); + if (rdx_saved) { + EmitRestoreRegTemp(ctx, rdx); + } + if (rax_saved) { + EmitRestoreRegTemp(ctx, rax); + } +} + +void EmitSMod32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + bool rax_saved = EmitSaveRegTemp(ctx, rax, dest[0]); + bool rdx_saved = EmitSaveRegTemp(ctx, rdx, dest[0]); + Reg tmp = op2[0].getReg().cvt32(); + while (tmp.getIdx() == rax.getIdx()) { + tmp = ctx.TempGPReg().cvt32(); + } + MovGP(ctx, tmp, op2[0]); + MovGP(ctx, eax, op1[0]); + ctx.Code().idiv(tmp); + MovGP(ctx, dest[0], edx); + if (rdx_saved) { + EmitRestoreRegTemp(ctx, rdx); + } + if (rax_saved) { + EmitRestoreRegTemp(ctx, rax); + } +} + +void EmitUMod32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + bool rax_saved = EmitSaveRegTemp(ctx, rax, dest[0]); + bool rdx_saved = EmitSaveRegTemp(ctx, rdx, dest[0]); + Reg tmp = op2[0].getReg().cvt32(); + while (tmp.getIdx() == rax.getIdx()) { + tmp = ctx.TempGPReg().cvt32(); + } + MovGP(ctx, tmp, op2[0]); + MovGP(ctx, eax, op1[0]); + ctx.Code().div(tmp); + MovGP(ctx, dest[0], edx); + if (rdx_saved) { + EmitRestoreRegTemp(ctx, rdx); + } + if (rax_saved) { + EmitRestoreRegTemp(ctx, rax); + } +} + +void EmitINeg32(EmitContext& ctx, const Operands& dest, const Operands& op) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, op[0]); + ctx.Code().neg(tmp); + MovGP(ctx, dest[0], tmp); +} + +void EmitINeg64(EmitContext& ctx, const Operands& dest, const Operands& op) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + MovGP(ctx, tmp, op[0]); + ctx.Code().neg(tmp); + MovGP(ctx, dest[0], tmp); +} + +void EmitIAbs32(EmitContext& ctx, const Operands& dest, const Operands& op) { + Label done; + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, op[0]); + ctx.Code().cmp(tmp, 0); + ctx.Code().jns(done); + ctx.Code().neg(tmp); + ctx.Code().L(done); + MovGP(ctx, dest[0], tmp); +} + +void EmitShiftLeftLogical32(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift) { + bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]); + Reg tmp = dest[0].getIdx() == rcx.getIdx() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, base[0]); + MovGP(ctx, cl, shift[0]); + ctx.Code().shl(tmp, cl); + MovGP(ctx, dest[0], tmp); + if (rcx_saved) { + EmitRestoreRegTemp(ctx, rcx); + } +} + +void EmitShiftLeftLogical64(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift) { + bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]); + Reg tmp = dest[0].getIdx() == rcx.getIdx() ? ctx.TempGPReg(false) : dest[0].getReg(); + MovGP(ctx, tmp, base[0]); + MovGP(ctx, cl, shift[0]); + ctx.Code().shl(tmp, cl); + MovGP(ctx, dest[0], tmp); + if (rcx_saved) { + EmitRestoreRegTemp(ctx, rcx); + } +} + +void EmitShiftRightLogical32(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift) { + bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]); + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, base[0]); + MovGP(ctx, cl, shift[0]); + ctx.Code().shr(tmp, cl); + MovGP(ctx, dest[0], tmp); + if (rcx_saved) { + EmitRestoreRegTemp(ctx, rcx); + } +} + +void EmitShiftRightLogical64(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift) { + bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]); + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + MovGP(ctx, tmp, base[0]); + MovGP(ctx, cl, shift[0]); + ctx.Code().shr(tmp, cl); + MovGP(ctx, dest[0], tmp); + if (rcx_saved) { + EmitRestoreRegTemp(ctx, rcx); + } +} + +void EmitShiftRightArithmetic32(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift) { + bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]); + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, base[0]); + MovGP(ctx, cl, shift[0]); + ctx.Code().sar(tmp, cl); + MovGP(ctx, dest[0], tmp); + if (rcx_saved) { + EmitRestoreRegTemp(ctx, rcx); + } +} + +void EmitShiftRightArithmetic64(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift) { + bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]); + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + MovGP(ctx, tmp, base[0]); + MovGP(ctx, cl, shift[0]); + ctx.Code().sar(tmp, cl); + MovGP(ctx, dest[0], tmp); + if (rcx_saved) { + EmitRestoreRegTemp(ctx, rcx); + } +} + +void EmitBitwiseAnd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().and_(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitBitwiseAnd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().and_(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitBitwiseOr32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().or_(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitBitwiseOr64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().or_(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitBitwiseXor32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().xor_(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitBitFieldInsert(EmitContext& ctx) { + throw NotImplementedException("BitFieldInsert"); +} + +void EmitBitFieldSExtract(EmitContext& ctx) { + throw NotImplementedException("BitFieldSExtract"); +} + +void EmitBitFieldUExtract(EmitContext& ctx) { + throw NotImplementedException("BitFieldUExtract"); +} + +void EmitBitReverse32(EmitContext& ctx) { + throw NotImplementedException("BitReverse32"); +} + +void EmitBitCount32(EmitContext& ctx) { + throw NotImplementedException("BitCount32"); +} + +void EmitBitCount64(EmitContext& ctx) { + throw NotImplementedException("BitCount64"); +} + +void EmitBitwiseNot32(EmitContext& ctx, const Operands& dest, const Operands& op) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, op[0]); + ctx.Code().not_(tmp); + MovGP(ctx, dest[0], tmp); +} + +void EmitFindSMsb32(EmitContext& ctx) { + throw NotImplementedException("FindSMsb32"); +} + +void EmitFindUMsb32(EmitContext& ctx) { + throw NotImplementedException("FindUMsb32"); +} + +void EmitFindILsb32(EmitContext& ctx) { + throw NotImplementedException("FindILsb32"); +} + +void EmitFindILsb64(EmitContext& ctx) { + throw NotImplementedException("FindILsb64"); +} + +void EmitSMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().cmp(tmp, op2[0]); + ctx.Code().cmovg(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitUMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().cmp(tmp, op2[0]); + ctx.Code().cmova(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitSMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().cmp(tmp, op2[0]); + ctx.Code().cmovl(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitUMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().cmp(tmp, op2[0]); + ctx.Code().cmovb(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitSClamp32(EmitContext& ctx, const Operands& dest, const Operands& value, const Operands& min, const Operands& max) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, value[0]); + ctx.Code().cmp(tmp, min[0]); + ctx.Code().cmovl(tmp, min[0]); + ctx.Code().cmp(tmp, max[0]); + ctx.Code().cmovg(tmp, max[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitUClamp32(EmitContext& ctx, const Operands& dest, const Operands& value, const Operands& min, const Operands& max) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + MovGP(ctx, tmp, value[0]); + ctx.Code().cmp(tmp, min[0]); + ctx.Code().cmovb(tmp, min[0]); + ctx.Code().cmp(tmp, max[0]); + ctx.Code().cmova(tmp, max[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitSLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false).cvt32() : lhs[0].getReg().cvt32(); + MovGP(ctx, tmp, lhs[0]); + ctx.Code().cmp(tmp, rhs[0]); + ctx.Code().setl(dest[0]); +} + +void EmitSLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false) : lhs[0].getReg(); + MovGP(ctx, tmp, lhs[0]); + ctx.Code().cmp(tmp, rhs[0]); + ctx.Code().setl(dest[0]); +} + +void EmitULessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false).cvt32() : lhs[0].getReg().cvt32(); + MovGP(ctx, tmp, lhs[0]); + ctx.Code().cmp(tmp, rhs[0]); + ctx.Code().setb(dest[0]); +} + +void EmitULessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false) : lhs[0].getReg(); + MovGP(ctx, tmp, lhs[0]); + ctx.Code().cmp(tmp, rhs[0]); + ctx.Code().setb(dest[0]); +} + +void EmitIEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false).cvt32() : lhs[0].getReg().cvt32(); + MovGP(ctx, tmp, lhs[0]); + ctx.Code().cmp(tmp, rhs[0]); + ctx.Code().sete(dest[0]); +} + +void EmitIEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false) : lhs[0].getReg(); + MovGP(ctx, tmp, lhs[0]); + ctx.Code().cmp(tmp, rhs[0]); + ctx.Code().sete(dest[0]); +} + +void EmitSLessThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false).cvt32() : lhs[0].getReg().cvt32(); + MovGP(ctx, tmp, lhs[0]); + ctx.Code().cmp(tmp, rhs[0]); + ctx.Code().setle(dest[0]); +} + +void EmitULessThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false).cvt32() : lhs[0].getReg().cvt32(); + MovGP(ctx, tmp, lhs[0]); + ctx.Code().cmp(tmp, rhs[0]); + ctx.Code().setbe(dest[0]); +} + +void EmitSGreaterThan(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false).cvt32() : lhs[0].getReg().cvt32(); + MovGP(ctx, tmp, lhs[0]); + ctx.Code().cmp(tmp, rhs[0]); + ctx.Code().setg(dest[0]); +} + +void EmitUGreaterThan(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false).cvt32() : lhs[0].getReg().cvt32(); + MovGP(ctx, tmp, lhs[0]); + ctx.Code().cmp(tmp, rhs[0]); + ctx.Code().seta(dest[0]); +} + +void EmitINotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false).cvt32() : lhs[0].getReg().cvt32(); + MovGP(ctx, tmp, lhs[0]); + ctx.Code().cmp(tmp, rhs[0]); + ctx.Code().setne(dest[0]); +} + +void EmitINotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false) : lhs[0].getReg(); + MovGP(ctx, tmp, lhs[0]); + ctx.Code().cmp(tmp, rhs[0]); + ctx.Code().setne(dest[0]); +} + +void EmitSGreaterThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false).cvt32() : lhs[0].getReg().cvt32(); + MovGP(ctx, tmp, lhs[0]); + ctx.Code().cmp(tmp, rhs[0]); + ctx.Code().setge(dest[0]); +} + +void EmitUGreaterThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { + Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false).cvt32() : lhs[0].getReg().cvt32(); + MovGP(ctx, tmp, lhs[0]); + ctx.Code().cmp(tmp, rhs[0]); + ctx.Code().setae(dest[0]); +} + +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_logical.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_logical.cpp new file mode 100644 index 000000000..30ec2eeeb --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_logical.cpp @@ -0,0 +1,40 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" +#include "shader_recompiler/backend/asm_x64/x64_utils.h" + +namespace Shader::Backend::X64 { + +using namespace Xbyak; +using namespace Xbyak::util; + +void EmitLogicalOr(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg().cvt8() : dest[0].getReg().cvt8(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().or_(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitLogicalAnd(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg().cvt8() : dest[0].getReg().cvt8(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().and_(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitLogicalXor(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg().cvt8() : dest[0].getReg().cvt8(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().xor_(tmp, op2[0]); + MovGP(ctx, dest[0], tmp); +} + +void EmitLogicalNot(EmitContext& ctx, const Operands& dest, const Operands& op) { + Reg tmp = dest[0].isMEM() ? ctx.TempGPReg().cvt8() : dest[0].getReg().cvt8(); + MovGP(ctx, tmp, op[0]); + ctx.Code().not_(tmp); + MovGP(ctx, dest[0], tmp); +} + +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_select.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_select.cpp new file mode 100644 index 000000000..56ecaee03 --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_select.cpp @@ -0,0 +1,71 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" +#include "shader_recompiler/backend/asm_x64/x64_utils.h" + +namespace Shader::Backend::X64 { + +using namespace Xbyak; +using namespace Xbyak::util; + +void EmitSelectU1(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) { + Label false_label, end_label; + Reg tmp = cond[0].isMEM() ? ctx.TempGPReg().cvt8() : cond[0].getReg().cvt8(); + MovGP(ctx, tmp, cond[0]); + ctx.Code().test(tmp, tmp); + ctx.Code().jz(false_label); + MovGP(ctx, dest[0], true_value[0]); + ctx.Code().jmp(end_label); + ctx.Code().L(false_label); + MovGP(ctx, dest[0], false_value[0]); + ctx.Code().L(end_label); +} + +void EmitSelectU8(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) { + EmitSelectU1(ctx, dest, cond, true_value, false_value); +} + +void EmitSelectU16(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) { + EmitSelectU1(ctx, dest, cond, true_value, false_value); +} + +void EmitSelectU32(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) { + EmitSelectU1(ctx, dest, cond, true_value, false_value); +} + +void EmitSelectU64(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) { + EmitSelectU1(ctx, dest, cond, true_value, false_value); +} + +void EmitSelectF16(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) { + EmitSelectU1(ctx, dest, cond, true_value, false_value); +} + +void EmitSelectF32(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) { + Label false_label, end_label; + Reg tmp = cond[0].isMEM() ? ctx.TempGPReg().cvt8() : cond[0].getReg().cvt8(); + MovGP(ctx, tmp, cond[0]); + ctx.Code().test(tmp, tmp); + ctx.Code().jz(false_label); + MovFloat(ctx, dest[0], true_value[0]); + ctx.Code().jmp(end_label); + ctx.Code().L(false_label); + MovFloat(ctx, dest[0], false_value[0]); + ctx.Code().L(end_label); +} + +void EmitSelectF64(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) { + Label false_label, end_label; + Reg tmp = cond[0].isMEM() ? ctx.TempGPReg().cvt8() : cond[0].getReg().cvt8(); + MovGP(ctx, tmp, cond[0]); + ctx.Code().test(tmp, tmp); + ctx.Code().jz(false_label); + MovDouble(ctx, dest[0], true_value[0]); + ctx.Code().jmp(end_label); + ctx.Code().L(false_label); + MovDouble(ctx, dest[0], false_value[0]); + ctx.Code().L(end_label); +} + +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_special.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_special.cpp new file mode 100644 index 000000000..acae51f66 --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_special.cpp @@ -0,0 +1,55 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" +#include "shader_recompiler/backend/asm_x64/x64_utils.h" + +namespace Shader::Backend::X64 { + +using namespace Xbyak; +using namespace Xbyak::util; + +void EmitPrologue(EmitContext& ctx) { + ctx.Prologue(); +} + +void ConvertDepthMode(EmitContext& ctx) { + +} + +void ConvertPositionToClipSpace(EmitContext& ctx) { + +} + +void EmitEpilogue(EmitContext& ctx) { + ctx.SetEndFlag(); +} + +void EmitDiscard(EmitContext& ctx) { + ctx.SetEndFlag(); +} + +void EmitDiscardCond(EmitContext& ctx, const Operands& condition) { + Reg tmp = condition[0].isMEM() ? ctx.TempGPReg().cvt8() : condition[0].getReg().cvt8(); + MovGP(ctx, tmp, condition[0]); + ctx.Code().test(tmp, tmp); + ctx.Code().jnz(ctx.EndLabel()); +} + +void EmitEmitVertex(EmitContext& ctx) { + +} + +void EmitEmitPrimitive(EmitContext& ctx) { + +} + +void EmitEndPrimitive(EmitContext& ctx) { + +} + +void EmitDebugPrint(EmitContext& ctx) { + +} + +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_undefined.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_undefined.cpp new file mode 100644 index 000000000..b1f87d61f --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_undefined.cpp @@ -0,0 +1,28 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" + +namespace Shader::Backend::X64 { + +void EmitUndefU1(EmitContext& ctx) { + UNREACHABLE_MSG("x64 Instruction"); +} + +void EmitUndefU8(EmitContext&) { + UNREACHABLE_MSG("x64 Instruction"); +} + +void EmitUndefU16(EmitContext&) { + UNREACHABLE_MSG("x64 Instruction"); +} + +void EmitUndefU32(EmitContext& ctx) { + UNREACHABLE_MSG("x64 Instruction"); +} + +void EmitUndefU64(EmitContext&) { + UNREACHABLE_MSG("x64 Instruction"); +} + +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_warp.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_warp.cpp new file mode 100644 index 000000000..1498345de --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_warp.cpp @@ -0,0 +1,32 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" + +namespace Shader::Backend::X64 { + +void EmitWarpId(EmitContext& ctx) { + +} + +void EmitLaneId(EmitContext& ctx) { + +} + +void EmitQuadShuffle(EmitContext& ctx) { + +} + +void EmitReadFirstLane(EmitContext& ctx) { + +} + +void EmitReadLane(EmitContext& ctx) { + +} + +void EmitWriteLane(EmitContext& ctx) { + +} + +} // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp b/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp index 5dd7e0b6c..4aaea8cd4 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp +++ b/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp @@ -58,6 +58,11 @@ void EmitContext::PopTempXmmReg() { temp_xmm_reg_index--; } +void EmitContext::ResetTempRegs() { + temp_gp_reg_index = 0; + temp_xmm_reg_index = 0; +} + const Operands& EmitContext::Def(IR::Inst* inst) { return inst_to_operands.at(inst); } @@ -135,11 +140,6 @@ EmitContext::PhiAssignments(IR::Block* block) const { return std::nullopt; } -void EmitContext::ResetTempRegs() { - temp_gp_reg_index = 0; - temp_xmm_reg_index = 0; -} - void EmitContext::Prologue() { if (inst_stack_space > 0) { code.sub(rsp, inst_stack_space); diff --git a/src/shader_recompiler/backend/asm_x64/x64_emit_context.h b/src/shader_recompiler/backend/asm_x64/x64_emit_context.h index eab5bad70..5c907f0ca 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_emit_context.h +++ b/src/shader_recompiler/backend/asm_x64/x64_emit_context.h @@ -38,10 +38,21 @@ public: return block_labels.at(block); } + void SetEndFlag() { + end_flag = true; + } + + [[nodiscard]] bool EndFlag() { + bool flag = end_flag; + end_flag = false; + return flag; + } + [[nodiscard]] Xbyak::Reg64& TempGPReg(bool reserve = true); [[nodiscard]] Xbyak::Xmm& TempXmmReg(bool reserve = true); void PopTempGPReg(); void PopTempXmmReg(); + void ResetTempRegs(); [[nodiscard]] const Xbyak::Reg64& UserData() const {return Xbyak::util::r11;} @@ -50,7 +61,6 @@ public: [[nodiscard]] std::optional> PhiAssignments(IR::Block* block) const; - void ResetTempRegs(); void Prologue(); void Epilogue(); @@ -108,6 +118,9 @@ private: boost::container::small_flat_map block_labels; Xbyak::Label end_label; + // End flag, used to defer jump to end label + bool end_flag = false; + void SpillInst(RegAllocContext& ctx, const ActiveInstInterval& interval, ActiveIntervalList& active_intervals); void AdjustInstInterval(InstInterval& interval, const FlatInstList& insts); diff --git a/src/shader_recompiler/ir/compute_value/do_integer_operations.cpp b/src/shader_recompiler/ir/compute_value/do_integer_operations.cpp index 4e5f29e73..dacdaae14 100644 --- a/src/shader_recompiler/ir/compute_value/do_integer_operations.cpp +++ b/src/shader_recompiler/ir/compute_value/do_integer_operations.cpp @@ -17,7 +17,8 @@ void DoIAdd64(ImmValueList& inst_values, const ImmValueList& args0, const ImmVal } void DoIAddCary32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { - UNREACHABLE_MSG("IAddCary32 not implemented"); + Common::CartesianInvoke(ImmValue::AddCarry, + std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } void DoISub32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { diff --git a/src/shader_recompiler/ir/compute_value/imm_value.cpp b/src/shader_recompiler/ir/compute_value/imm_value.cpp index c9ebf1519..2000bdfba 100644 --- a/src/shader_recompiler/ir/compute_value/imm_value.cpp +++ b/src/shader_recompiler/ir/compute_value/imm_value.cpp @@ -385,6 +385,62 @@ ImmValue ImmValue::Add(const ImmValue& a, const ImmValue& b) no a.imm_values[3].imm_f64 + b.imm_values[3].imm_f64); } +template <> +ImmValue ImmValue::AddCarry(const ImmValue& a, const ImmValue& b) noexcept { + u8 result = a.imm_values[0].imm_u8 + b.imm_values[0].imm_u8; + u8 carry = (result < a.imm_values[0].imm_u8) ? 1 : 0; + return ImmValue(result, carry); +} + +template <> +ImmValue ImmValue::AddCarry(const ImmValue& a, const ImmValue& b) noexcept { + u8 result = a.imm_values[0].imm_u8 + b.imm_values[0].imm_u8; + u8 carry = (result < a.imm_values[0].imm_u8) ? 1 : 0; + return ImmValue(result, carry); +} + +template <> +ImmValue ImmValue::AddCarry(const ImmValue& a, const ImmValue& b) noexcept { + u16 result = a.imm_values[0].imm_u16 + b.imm_values[0].imm_u16; + u16 carry = (result < a.imm_values[0].imm_u16) ? 1 : 0; + return ImmValue(result, carry); +} + +template <> +ImmValue ImmValue::AddCarry(const ImmValue& a, const ImmValue& b) noexcept { + s16 result = a.imm_values[0].imm_s16 + b.imm_values[0].imm_s16; + s16 carry = (result < a.imm_values[0].imm_s16) ? 1 : 0; + return ImmValue(result, carry); +} + +template <> +ImmValue ImmValue::AddCarry(const ImmValue& a, const ImmValue& b) noexcept { + u32 result = a.imm_values[0].imm_u32 + b.imm_values[0].imm_u32; + u32 carry = (result < a.imm_values[0].imm_u32) ? 1 : 0; + return ImmValue(result, carry); +} + +template <> +ImmValue ImmValue::AddCarry(const ImmValue& a, const ImmValue& b) noexcept { + s32 result = a.imm_values[0].imm_s32 + b.imm_values[0].imm_s32; + s32 carry = (result < a.imm_values[0].imm_s32) ? 1 : 0; + return ImmValue(result, carry); +} + +template <> +ImmValue ImmValue::AddCarry(const ImmValue& a, const ImmValue& b) noexcept { + u64 result = a.imm_values[0].imm_u64 + b.imm_values[0].imm_u64; + u64 carry = (result < a.imm_values[0].imm_u64) ? 1 : 0; + return ImmValue(result, carry); +} + +template <> +ImmValue ImmValue::AddCarry(const ImmValue& a, const ImmValue& b) noexcept { + s64 result = a.imm_values[0].imm_s64 + b.imm_values[0].imm_s64; + s64 carry = (result < a.imm_values[0].imm_s64) ? 1 : 0; + return ImmValue(result, carry); +} + template <> ImmValue ImmValue::Sub(const ImmValue& a, const ImmValue& b) noexcept { return ImmValue(a.imm_values[0].imm_u8 - b.imm_values[0].imm_u8, diff --git a/src/shader_recompiler/ir/compute_value/imm_value.h b/src/shader_recompiler/ir/compute_value/imm_value.h index a46712ce5..800ee4b16 100644 --- a/src/shader_recompiler/ir/compute_value/imm_value.h +++ b/src/shader_recompiler/ir/compute_value/imm_value.h @@ -97,6 +97,9 @@ public: template [[nodiscard]] static ImmValue Add(const ImmValue& a, const ImmValue& b) noexcept; + template + [[nodiscard]] static ImmValue AddCarry(const ImmValue& a, const ImmValue& b) noexcept; + template [[nodiscard]] static ImmValue Sub(const ImmValue& a, const ImmValue& b) noexcept; From 5307eeca020b88410ee2f7f39c21bba51d0739f9 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sun, 6 Apr 2025 20:56:48 +0200 Subject: [PATCH 41/49] Add break conditions to subprogram --- src/shader_recompiler/ir/subprogram.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/ir/subprogram.cpp b/src/shader_recompiler/ir/subprogram.cpp index bb944e3ef..7be3622d4 100644 --- a/src/shader_recompiler/ir/subprogram.cpp +++ b/src/shader_recompiler/ir/subprogram.cpp @@ -124,7 +124,15 @@ void SubProgram::AddPhi(Inst* orig_phi, Inst* phi) { if (cond->asl_node->type == AbstractSyntaxNode::Type::If) { AddInst(cond->asl_node->data.if_node.cond.InstRecursive()); } else if (cond->asl_node->type == AbstractSyntaxNode::Type::Loop) { - AddInst(&cond->asl_node->data.loop.continue_block->back()); + // In case of loop, we need to add the loop itself and also + // the break conditions. + Block* loop_merge = cond->asl_node->data.loop.merge; + for (Block* pred : loop_merge->ImmPredecessors()) { + if (pred->CondData().asl_node == cond->asl_node) { + ASSERT(pred->back().Type() == Inst::Type::ConditionRef); + AddInst(pred->back().InstRecursive()); + } + } } if (orig_phi->GetParent()->CondData().asl_node == cond->asl_node) { break; From f516ab2dec9b6c8c570738b49efc5331217aeb88 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sun, 6 Apr 2025 21:13:45 +0200 Subject: [PATCH 42/49] Fix build --- src/shader_recompiler/ir/subprogram.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/ir/subprogram.cpp b/src/shader_recompiler/ir/subprogram.cpp index 7be3622d4..757420da2 100644 --- a/src/shader_recompiler/ir/subprogram.cpp +++ b/src/shader_recompiler/ir/subprogram.cpp @@ -129,8 +129,8 @@ void SubProgram::AddPhi(Inst* orig_phi, Inst* phi) { Block* loop_merge = cond->asl_node->data.loop.merge; for (Block* pred : loop_merge->ImmPredecessors()) { if (pred->CondData().asl_node == cond->asl_node) { - ASSERT(pred->back().Type() == Inst::Type::ConditionRef); - AddInst(pred->back().InstRecursive()); + ASSERT(pred->back().GetOpcode() == IR::Opcode::ConditionRef); + AddInst(&pred->back()); } } } From 4fec1c7fce2cc3f61121c88c4020e955cd2e1c4f Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Mon, 7 Apr 2025 11:07:14 +0200 Subject: [PATCH 43/49] x64 fixes --- .../backend/asm_x64/emit_x64.cpp | 5 +- .../asm_x64/emit_x64_context_get_set.cpp | 8 +- .../backend/asm_x64/emit_x64_instructions.h | 10 +- .../backend/asm_x64/x64_emit_context.cpp | 53 +++++----- .../backend/asm_x64/x64_emit_context.h | 2 +- .../backend/asm_x64/x64_utils.cpp | 99 +++++-------------- .../backend/asm_x64/x64_utils.h | 25 ++++- .../passes/flatten_extended_userdata_pass.cpp | 3 +- 8 files changed, 88 insertions(+), 117 deletions(-) diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64.cpp index 9464bd36b..254b69ffe 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64.cpp @@ -107,7 +107,8 @@ void EmitInst(EmitContext& ctx, IR::Inst* inst) { switch (inst->GetOpcode()) { #define OPCODE(name, result_type, ...) \ case IR::Opcode::name: \ - Invoke<&Emit##name, IR::Type::result_type != IR::Type::Void>(ctx, inst); + Invoke<&Emit##name, IR::Type::result_type != IR::Type::Void>(ctx, inst); \ + return; #include "shader_recompiler/ir/opcodes.inc" #undef OPCODE } @@ -138,6 +139,8 @@ void Traverse(EmitContext& ctx, const IR::Program& program) { IR::Block* block = node.data.block; c.L(ctx.BlockLabel(block)); for (IR::Inst& inst : *block) { + ctx.ResetTempRegs(); + EmitInst(ctx, &inst); } const auto& phi_assignments = ctx.PhiAssignments(block); if (phi_assignments) { diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp index 169a8d85a..1eea0e7ee 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp @@ -14,7 +14,7 @@ void EmitGetUserData(EmitContext& ctx, const Operands& dest, IR::ScalarReg reg) const u32 offset = static_cast(reg) << 2; Reg& tmp = ctx.TempGPReg(); ctx.Code().lea(tmp, ptr[ctx.UserData() + offset]); - MovGP( ctx, dest[0], ptr[tmp]); + MovGP( ctx, dest[0], dword[tmp]); } void EmitSetUserData(EmitContext& ctx, const Operands& offset, const Operands& value) { @@ -22,7 +22,7 @@ void EmitSetUserData(EmitContext& ctx, const Operands& offset, const Operands& v MovGP(ctx, tmp, offset[0]); ctx.Code().shl(tmp, 2); ctx.Code().lea(tmp, ptr[ctx.UserData() + tmp]); - MovGP(ctx, ptr[tmp], value[0]); + MovGP(ctx, dword[tmp], value[0]); } void EmitGetThreadBitScalarReg(EmitContext& ctx) { @@ -65,9 +65,9 @@ void EmitReadConst(EmitContext& ctx, const Operands& dest, const Operands& base, if (offset[0].isMEM()) { ctx.Code().add(tmp, offset[0]); } else { - ctx.Code().lea(tmp, ptr[tmp + offset[0].getReg()]); + ctx.Code().lea(tmp, ptr[tmp + offset[0].getReg().cvt64()]); } - MovGP(ctx, dest[0], ptr[tmp]); + MovGP(ctx, dest[0], dword[tmp]); } void EmitReadConstBuffer(EmitContext& ctx) { diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h b/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h index 5725bbc56..0e88727b2 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h @@ -328,8 +328,8 @@ void EmitIAdd64(EmitContext& ctx, const Operands& dest, const Operands& op1, con void EmitIAddCary32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); void EmitISub32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); void EmitISub64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); -void EmitSMulExt(EmitContext& ctx,const Operands& dest, const Operands& op1, const Operands& op2); -void EmitUMulExt(EmitContext& ctx,const Operands& dest, const Operands& op1, const Operands& op2); +void EmitSMulExt(EmitContext& ctx); +void EmitUMulExt(EmitContext& ctx); void EmitIMul32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); void EmitIMul64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); void EmitSDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); @@ -350,9 +350,9 @@ void EmitBitwiseAnd64(EmitContext& ctx, const Operands& dest, const Operands& op void EmitBitwiseOr32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); void EmitBitwiseOr64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); void EmitBitwiseXor32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); -void EmitBitFieldInsert(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& insert, const Operands& offset, const Operands& count); -void EmitBitFieldSExtract(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset, const Operands& count); -void EmitBitFieldUExtract(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset, const Operands& count); +void EmitBitFieldInsert(EmitContext& ctx); +void EmitBitFieldSExtract(EmitContext& ctx); +void EmitBitFieldUExtract(EmitContext& ctx); void EmitBitReverse32(EmitContext& ctx); void EmitBitCount32(EmitContext& ctx); void EmitBitCount64(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp b/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp index 4aaea8cd4..0b03b2e75 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp +++ b/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp @@ -125,7 +125,7 @@ Operands EmitContext::Def(const IR::Value& value) { code.mov(operands.back(), std::bit_cast(value.Patch())); break; default: - UNREACHABLE_MSG("Unsupported value type: %s", IR::NameOf(value.Type())); + UNREACHABLE_MSG("Unsupported value type: {}", IR::NameOf(value.Type())); break; } return operands; @@ -173,17 +173,17 @@ void EmitContext::SpillInst(RegAllocContext& ctx, const ActiveInstInterval& inte current_sp += ctx.free_stack_slots.back(); ctx.free_stack_slots.pop_back(); } - switch (GetRegBytesOfType(inst->Type())) { - case 8: + switch (GetRegBytesOfType(IR::Value(inst))) { + case 1: return byte[r11 + current_sp]; - case 16: + case 2: return word[r11 + current_sp]; - case 32: + case 4: return dword[r11 + current_sp]; - case 64: + case 8: return qword[r11 + current_sp]; default: - UNREACHABLE_MSG("Unsupported register size: %zu", GetRegBytesOfType(inst->Type())); + UNREACHABLE_MSG("Unsupported register size: {}", GetRegBytesOfType(inst)); return {}; } }; @@ -197,7 +197,7 @@ void EmitContext::SpillInst(RegAllocContext& ctx, const ActiveInstInterval& inte Operands& operands = inst_to_operands[spill_candidate->inst]; Reg reg = operands[spill_candidate->component].getReg(); inst_to_operands[interval.inst][interval.component] = - reg.isXMM() ? reg : ResizeRegToType(reg, interval.inst->Type()); + reg.isXMM() ? reg : ResizeRegToType(reg, interval.inst); operands[spill_candidate->component] = get_operand(spill_candidate->inst); ctx.active_spill_intervals.push_back(*spill_candidate); *spill_candidate = interval; @@ -252,8 +252,8 @@ void EmitContext::AllocateRegisters() { const std::array initial_gp_inst_regs = {rcx, rdx, rsi, r8, r9, r10}; const std::array initial_xmm_inst_regs = {xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6}; const std::array initial_gp_temp_regs = {rax, rbx, r12, r13, r14, r15}; - const std::array initial_xmm_temp_regs = {xmm7, xmm7, xmm8, xmm9, xmm10, - xmm11, xmm12, xmm13, xmm14, xmm15}; + const std::array initial_xmm_temp_regs = {xmm7, xmm8, xmm9, xmm10, xmm11, + xmm12, xmm13, xmm14, xmm15}; boost::container::small_vector intervals; FlatInstList insts; @@ -274,10 +274,10 @@ void EmitContext::AllocateRegisters() { std::sort(intervals.begin(), intervals.end(), [](const InstInterval& a, const InstInterval& b) { return a.start < b.start; }); RegAllocContext ctx; - ctx.free_gp_regs.insert(ctx.free_gp_regs.end(), initial_gp_temp_regs.begin(), - initial_gp_temp_regs.end()); - ctx.free_xmm_regs.insert(ctx.free_xmm_regs.end(), initial_xmm_temp_regs.begin(), - initial_xmm_temp_regs.end()); + ctx.free_gp_regs.insert(ctx.free_gp_regs.end(), initial_gp_inst_regs.begin(), + initial_gp_inst_regs.end()); + ctx.free_xmm_regs.insert(ctx.free_xmm_regs.end(), initial_xmm_inst_regs.begin(), + initial_xmm_inst_regs.end()); boost::container::static_vector unused_gp_inst_regs; boost::container::static_vector unused_xmm_inst_regs; unused_gp_inst_regs.insert(unused_gp_inst_regs.end(), ctx.free_gp_regs.begin(), @@ -287,7 +287,7 @@ void EmitContext::AllocateRegisters() { for (const InstInterval& interval : intervals) { // Free old interval resources for (auto it = ctx.active_gp_intervals.begin(); it != ctx.active_gp_intervals.end();) { - if (it->end <= interval.start) { + if (it->end < interval.start) { Reg64 reg = inst_to_operands[it->inst][it->component].getReg().cvt64(); ctx.free_gp_regs.push_back(reg); it = ctx.active_gp_intervals.erase(it); @@ -296,7 +296,7 @@ void EmitContext::AllocateRegisters() { } } for (auto it = ctx.active_xmm_intervals.begin(); it != ctx.active_xmm_intervals.end();) { - if (it->end <= interval.start) { + if (it->end < interval.start) { Xmm reg = inst_to_operands[it->inst][it->component].getReg().cvt128(); ctx.free_xmm_regs.push_back(reg); it = ctx.active_xmm_intervals.erase(it); @@ -306,7 +306,7 @@ void EmitContext::AllocateRegisters() { } for (auto it = ctx.active_spill_intervals.begin(); it != ctx.active_spill_intervals.end();) { - if (it->end <= interval.start) { + if (it->end < interval.start) { const Address& addr = inst_to_operands[it->inst][it->component].getAddress(); ctx.free_stack_slots.push_back(addr.getDisp()); it = ctx.active_spill_intervals.erase(it); @@ -314,15 +314,17 @@ void EmitContext::AllocateRegisters() { ++it; } } - u8 num_components = GetNumComponentsOfType(interval.inst->Type()); - bool is_floating = IsFloatingType(interval.inst->Type()); + u8 num_components = GetNumComponentsOfType(interval.inst); + bool is_floating = IsFloatingType(interval.inst); + auto& operands = inst_to_operands[interval.inst]; + operands.resize(num_components); if (is_floating) { for (size_t i = 0; i < num_components; ++i) { ActiveInstInterval active(interval, i); if (!ctx.free_xmm_regs.empty()) { Xmm& reg = ctx.free_xmm_regs.back(); ctx.free_xmm_regs.pop_back(); - inst_to_operands[active.inst][active.component] = reg; + operands[active.component] = reg; unused_xmm_inst_regs.erase( std::remove(unused_xmm_inst_regs.begin(), unused_xmm_inst_regs.end(), reg), unused_xmm_inst_regs.end()); @@ -337,8 +339,7 @@ void EmitContext::AllocateRegisters() { if (!ctx.free_gp_regs.empty()) { Reg64& reg = ctx.free_gp_regs.back(); ctx.free_gp_regs.pop_back(); - inst_to_operands[active.inst][active.component] = - ResizeRegToType(reg, active.inst->Type()); + operands[active.component] = ResizeRegToType(reg, active.inst); unused_gp_inst_regs.erase( std::remove(unused_gp_inst_regs.begin(), unused_gp_inst_regs.end(), reg), unused_gp_inst_regs.end()); @@ -354,10 +355,10 @@ void EmitContext::AllocateRegisters() { unused_xmm_inst_regs.end()); num_scratch_gp_regs = unused_gp_inst_regs.size() + 1; // rax is scratch num_scratch_xmm_regs = unused_xmm_inst_regs.size() + 1; // xmm7 is scratch - temp_gp_regs.insert(temp_gp_regs.end(), initial_gp_inst_regs.begin(), - initial_gp_inst_regs.end()); - temp_xmm_regs.insert(temp_xmm_regs.end(), initial_xmm_inst_regs.begin(), - initial_xmm_inst_regs.end()); + temp_gp_regs.insert(temp_gp_regs.end(), initial_gp_temp_regs.begin(), + initial_gp_temp_regs.end()); + temp_xmm_regs.insert(temp_xmm_regs.end(), initial_xmm_temp_regs.begin(), + initial_xmm_temp_regs.end()); } } // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/x64_emit_context.h b/src/shader_recompiler/backend/asm_x64/x64_emit_context.h index 5c907f0ca..994fc6f6a 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_emit_context.h +++ b/src/shader_recompiler/backend/asm_x64/x64_emit_context.h @@ -54,7 +54,7 @@ public: void PopTempXmmReg(); void ResetTempRegs(); - [[nodiscard]] const Xbyak::Reg64& UserData() const {return Xbyak::util::r11;} + [[nodiscard]] const Xbyak::Reg64& UserData() const {return Xbyak::util::rdi;} [[nodiscard]] const Operands& Def(IR::Inst* inst); [[nodiscard]] Operands Def(const IR::Value& value); diff --git a/src/shader_recompiler/backend/asm_x64/x64_utils.cpp b/src/shader_recompiler/backend/asm_x64/x64_utils.cpp index 3d327569b..b93583696 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_utils.cpp +++ b/src/shader_recompiler/backend/asm_x64/x64_utils.cpp @@ -8,65 +8,15 @@ using namespace Xbyak::util; namespace Shader::Backend::X64 { -bool IsFloatingType(IR::Type type) { +bool IsFloatingType(const IR::Value& value) { // We store F16 on general purpose registers since we don't do // arithmetic on them + IR::Type type = value.Type(); return type == IR::Type::F32 || type == IR::Type::F64; } -bool IsConditionalOpcode(IR::Opcode opcode) { - switch (opcode) { - case IR::Opcode::FPOrdEqual32: - case IR::Opcode::FPOrdEqual64: - case IR::Opcode::FPUnordEqual32: - case IR::Opcode::FPUnordEqual64: - case IR::Opcode::FPOrdNotEqual32: - case IR::Opcode::FPOrdNotEqual64: - case IR::Opcode::FPUnordNotEqual32: - case IR::Opcode::FPUnordNotEqual64: - case IR::Opcode::FPOrdLessThan32: - case IR::Opcode::FPOrdLessThan64: - case IR::Opcode::FPUnordLessThan32: - case IR::Opcode::FPUnordLessThan64: - case IR::Opcode::FPOrdGreaterThan32: - case IR::Opcode::FPOrdGreaterThan64: - case IR::Opcode::FPUnordGreaterThan32: - case IR::Opcode::FPUnordGreaterThan64: - case IR::Opcode::FPOrdLessThanEqual32: - case IR::Opcode::FPOrdLessThanEqual64: - case IR::Opcode::FPUnordLessThanEqual32: - case IR::Opcode::FPUnordLessThanEqual64: - case IR::Opcode::FPOrdGreaterThanEqual32: - case IR::Opcode::FPOrdGreaterThanEqual64: - case IR::Opcode::FPUnordGreaterThanEqual32: - case IR::Opcode::FPUnordGreaterThanEqual64: - case IR::Opcode::FPIsNan32: - case IR::Opcode::FPIsNan64: - case IR::Opcode::FPIsInf32: - case IR::Opcode::FPIsInf64: - case IR::Opcode::FPCmpClass32: - case IR::Opcode::SLessThan32: - case IR::Opcode::SLessThan64: - case IR::Opcode::ULessThan32: - case IR::Opcode::ULessThan64: - case IR::Opcode::IEqual32: - case IR::Opcode::IEqual64: - case IR::Opcode::SLessThanEqual: - case IR::Opcode::ULessThanEqual: - case IR::Opcode::SGreaterThan: - case IR::Opcode::UGreaterThan: - case IR::Opcode::INotEqual32: - case IR::Opcode::INotEqual64: - case IR::Opcode::SGreaterThanEqual: - case IR::Opcode::UGreaterThanEqual: - return true; - default: - return false; - } -} - -size_t GetRegBytesOfType(IR::Type type) { - switch (type) { +size_t GetRegBytesOfType(const IR::Value& value) { + switch (value.Type()) { case IR::Type::U1: case IR::Type::U8: return 1; @@ -98,12 +48,12 @@ size_t GetRegBytesOfType(IR::Type type) { default: break; } - UNREACHABLE_MSG("Unsupported type %s", IR::NameOf(type)); + UNREACHABLE_MSG("Unsupported type {}", IR::NameOf(value.Type())); return 0; } -u8 GetNumComponentsOfType(IR::Type type) { - switch (type) { +u8 GetNumComponentsOfType(const IR::Value& value) { + switch (value.Type()) { case IR::Type::U1: case IR::Type::U8: case IR::Type::U16: @@ -135,13 +85,13 @@ u8 GetNumComponentsOfType(IR::Type type) { default: break; } - UNREACHABLE_MSG("Unsupported type %s", IR::NameOf(type)); + UNREACHABLE_MSG("Unsupported type {}", IR::NameOf(value.Type())); return 0; } -Reg ResizeRegToType(const Reg& reg, IR::Type type) { +Reg ResizeRegToType(const Reg& reg, const IR::Value& value) { ASSERT(reg.getKind() == Operand::Kind::REG); - switch (GetRegBytesOfType(type)) { + switch (GetRegBytesOfType(value)) { case 1: return reg.cvt8(); case 2: @@ -153,7 +103,7 @@ Reg ResizeRegToType(const Reg& reg, IR::Type type) { default: break; } - UNREACHABLE_MSG("Unsupported type %s", IR::NameOf(type)); + UNREACHABLE_MSG("Unsupported type {}", IR::NameOf(value.Type())); return reg; } @@ -173,7 +123,7 @@ void MovFloat(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& } else if (src.isXMM() && dst.isXMM()) { c.movaps(dst.getReg().cvt128(), src.getReg().cvt128()); } else { - UNREACHABLE_MSG("Unsupported mov float %s %s", src.toString(), dst.toString()); + UNREACHABLE_MSG("Unsupported mov float {} {}", src.toString(), dst.toString()); } } @@ -193,7 +143,7 @@ void MovDouble(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand } else if (src.isXMM() && dst.isXMM()) { c.movapd(dst.getReg().cvt128(), src.getReg().cvt128()); } else { - UNREACHABLE_MSG("Unsupported mov double %s %s", src.toString(), dst.toString()); + UNREACHABLE_MSG("Unsupported mov double {} {}", src.toString(), dst.toString()); } } @@ -202,26 +152,27 @@ void MovGP(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& sr if (src == dst) { return; } - Reg tmp = (src.isMEM() && dst.isMEM()) ? ctx.TempGPReg(false).changeBit(dst.getBit()) : dst.getReg(); - if (src.getBit() == dst.getBit()) { - c.mov(tmp, src); - } else if (src.getBit() < dst.getBit()) { + Reg tmp = dst.isMEM() ? ctx.TempGPReg(false).changeBit(dst.getBit()) : dst.getReg(); + if (src.getBit() < dst.getBit() && !src.isBit(32)) { c.movzx(tmp, src); - } else { + } else if (src.getBit() > dst.getBit()) { Operand src_tmp = src; src_tmp.setBit(dst.getBit()); c.mov(tmp, src_tmp); + } else { + c.mov(tmp, src); } - if (src.isMEM() && dst.isMEM()) { + if (dst.isMEM()) { c.mov(dst, tmp); } } void MovValue(EmitContext& ctx, const Operands& dst, const IR::Value& src) { if (!src.IsImmediate()) { - const Operands& src_op = ctx.Def(src); - if (IsFloatingType(src.Type())) { - switch (GetRegBytesOfType(src.Type())) { + IR::Inst* src_inst = src.InstRecursive(); + const Operands& src_op = ctx.Def(src_inst); + if (IsFloatingType(src)) { + switch (GetRegBytesOfType(src)) { case 32: for (size_t i = 0; i < src_op.size(); i++) { MovFloat(ctx, dst[i], src_op[i]); @@ -233,7 +184,7 @@ void MovValue(EmitContext& ctx, const Operands& dst, const IR::Value& src) { } break; default: - UNREACHABLE_MSG("Unsupported type %s", IR::NameOf(src.Type())); + UNREACHABLE_MSG("Unsupported type {}", IR::NameOf(src.Type())); break; } } else { @@ -288,7 +239,7 @@ void MovValue(EmitContext& ctx, const Operands& dst, const IR::Value& src) { c.mov(is_mem ? tmp : dst[0], std::bit_cast(src.Patch())); break; default: - UNREACHABLE_MSG("Unsupported type %s", IR::NameOf(src.Type())); + UNREACHABLE_MSG("Unsupported type {}", IR::NameOf(src.Type())); break; } if (is_mem) { diff --git a/src/shader_recompiler/backend/asm_x64/x64_utils.h b/src/shader_recompiler/backend/asm_x64/x64_utils.h index c22dbfc77..1c513234a 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_utils.h +++ b/src/shader_recompiler/backend/asm_x64/x64_utils.h @@ -10,11 +10,10 @@ namespace Shader::Backend::X64 { -bool IsFloatingType(IR::Type type); -bool IsConditionalOpcode(IR::Opcode opcode); -size_t GetRegBytesOfType(IR::Type type); -u8 GetNumComponentsOfType(IR::Type type); -Xbyak::Reg ResizeRegToType(const Xbyak::Reg& reg, IR::Type type); +bool IsFloatingType(const IR::Value& value); +size_t GetRegBytesOfType(const IR::Value& value); +u8 GetNumComponentsOfType(const IR::Value& value); +Xbyak::Reg ResizeRegToType(const Xbyak::Reg& reg, const IR::Value& value); void MovFloat(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src); void MovDouble(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src); void MovGP(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src); @@ -22,4 +21,20 @@ void MovValue(EmitContext& ctx, const Operands& dst, const IR::Value& src); void EmitInlineF16ToF32(EmitContext& ctx, const Xbyak::Operand& dest, const Xbyak::Operand& src); void EmitInlineF32ToF16(EmitContext& ctx, const Xbyak::Operand& dest, const Xbyak::Operand& src); +inline bool IsFloatingType(IR::Inst* inst) { + return IsFloatingType(IR::Value(inst)); +} + +inline size_t GetRegBytesOfType(IR::Inst* inst) { + return GetRegBytesOfType(IR::Value(inst)); +} + +inline u8 GetNumComponentsOfType(IR::Inst* inst) { + return GetNumComponentsOfType(IR::Value(inst)); +} + +inline Xbyak::Reg ResizeRegToType(const Xbyak::Reg& reg, IR::Inst* inst) { + return ResizeRegToType(reg, IR::Value(inst)); +} + } // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp index 307e72b45..8e90ea28d 100644 --- a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp +++ b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp @@ -214,7 +214,8 @@ static void GenerateSrtProgram(IR::Program& program, PassInfo& pass_info, Pools& ASSERT(pass_info.dst_off_dw == info.srt_info.flattened_bufsize_dw); if (!pass_info.all_readconsts.empty()) { - GenerateSrtReadConstsSubProgram(program, pass_info, pools); + IR::Program sub_program = GenerateSrtReadConstsSubProgram(program, pass_info, pools); + Backend::X64::EmitX64(sub_program, c); } info.srt_info.flattened_bufsize_dw = pass_info.dst_off_dw; From c47ef83cc00de90777e32dff9d1e654df2e67429 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Mon, 7 Apr 2025 21:18:30 +0200 Subject: [PATCH 44/49] Operand holder / fix memory corruption --- .../backend/asm_x64/emit_x64.cpp | 8 +- .../asm_x64/emit_x64_bitwise_conversion.cpp | 46 +-- .../backend/asm_x64/emit_x64_composite.cpp | 2 +- .../asm_x64/emit_x64_context_get_set.cpp | 8 +- .../backend/asm_x64/emit_x64_convert.cpp | 100 ++--- .../asm_x64/emit_x64_floating_point.cpp | 366 +++++++++--------- .../backend/asm_x64/emit_x64_instructions.h | 5 +- .../backend/asm_x64/emit_x64_integer.cpp | 272 +++++++------ .../backend/asm_x64/emit_x64_logical.cpp | 18 +- .../backend/asm_x64/emit_x64_select.cpp | 6 +- .../backend/asm_x64/emit_x64_special.cpp | 2 +- .../backend/asm_x64/x64_emit_context.cpp | 26 +- .../backend/asm_x64/x64_emit_context.h | 73 +++- .../backend/asm_x64/x64_utils.cpp | 112 +++--- .../backend/asm_x64/x64_utils.h | 6 +- 15 files changed, 557 insertions(+), 493 deletions(-) diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64.cpp index 254b69ffe..6ac901991 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64.cpp @@ -22,12 +22,12 @@ static void EmitCondition(EmitContext& ctx, const IR::Inst* ref, Label& label, b ctx.Code().jmp(label); } } else { - const Operand& op = ctx.Def(cond.InstRecursive())[0]; - if (op.isREG()) { - Reg8 reg = op.getReg().cvt8(); + const OperandHolder& op = ctx.Def(cond.InstRecursive())[0]; + if (op.IsReg()) { + Reg8 reg = op.Reg().cvt8(); ctx.Code().test(reg, reg); } else { - ctx.Code().test(op, 0xff); + ctx.Code().test(op.Mem(), 0xff); } if (invert) { ctx.Code().jz(label); diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp index 0a4ecc96b..751a78475 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp @@ -16,23 +16,23 @@ void EmitBitCastU16F16(EmitContext& ctx, const Operands& dest, const Operands& s } void EmitBitCastU32F32(EmitContext& ctx, const Operands& dest, const Operands& src) { - if (src[0].isMEM()) { + if (src[0].IsMem()) { MovGP(ctx, dest[0], src[0]); - } else if (dest[0].isMEM()) { - ctx.Code().movd(dest[0].getAddress(), src[0].getReg().cvt128()); + } else if (dest[0].IsMem()) { + ctx.Code().movd(dest[0].Mem(), src[0].Xmm()); } else { - ctx.Code().movd(dword[rsp - 4], src[0].getReg().cvt128()); + ctx.Code().movd(dword[rsp - 4], src[0].Xmm()); MovGP(ctx, dest[0], dword[rsp - 4]); } } void EmitBitCastU64F64(EmitContext& ctx, const Operands& dest, const Operands& src) { - if (src[0].isMEM()) { + if (src[0].IsMem()) { MovGP(ctx, dest[0], src[0]); - } else if (dest[0].isMEM()) { - ctx.Code().movq(dest[0].getAddress(), src[0].getReg().cvt128()); + } else if (dest[0].IsMem()) { + ctx.Code().movq(dest[0].Mem(), src[0].Xmm()); } else { - ctx.Code().movq(qword[rsp - 8], src[0].getReg().cvt128()); + ctx.Code().movq(qword[rsp - 8], src[0].Xmm()); MovGP(ctx, dest[0], qword[rsp - 8]); } } @@ -42,40 +42,40 @@ void EmitBitCastF16U16(EmitContext& ctx, const Operands& dest, const Operands& s } void EmitBitCastF32U32(EmitContext& ctx, const Operands& dest, const Operands& src) { - if (dest[0].isMEM()) { + if (dest[0].IsMem()) { MovGP(ctx, dest[0], src[0]); - } else if (src[0].isMEM()) { - ctx.Code().movd(dest[0].getReg().cvt128(), src[0].getAddress()); + } else if (src[0].IsMem()) { + ctx.Code().movd(dest[0].Xmm(), src[0].Mem()); } else { MovGP(ctx, dword[rsp - 4], src[0]); - ctx.Code().movd(dest[0].getReg().cvt128(), dword[rsp - 4]); + ctx.Code().movd(dest[0].Xmm(), dword[rsp - 4]); } } void EmitBitCastF64U64(EmitContext& ctx, const Operands& dest, const Operands& src) { - if (dest[0].isMEM()) { + if (dest[0].IsMem()) { MovGP(ctx, dest[0], src[0]); - } else if (src[0].isMEM()) { - ctx.Code().movq(dest[0].getReg().cvt128(), src[0].getAddress()); + } else if (src[0].IsMem()) { + ctx.Code().movq(dest[0].Xmm(), src[0].Mem()); } else { MovGP(ctx, qword[rsp - 8], src[0]); - ctx.Code().mov(dest[0].getReg().cvt128(), qword[rsp - 8]); + ctx.Code().mov(dest[0].Xmm(), qword[rsp - 8]); } } void EmitPackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& src) { - const bool is_mem = dest[0].isMEM() && (src[0].isMEM() || src[1].isMEM()); - Reg tmp = is_mem ? ctx.TempGPReg() : dest[0].getReg(); + const bool is_mem = dest[0].IsMem() && (src[0].IsMem() || src[1].IsMem()); + Reg tmp = is_mem ? ctx.TempGPReg() : dest[0].Reg(); MovGP(ctx, tmp, src[1]); ctx.Code().shl(tmp, 32); - ctx.Code().or_(tmp, src[0]); + ctx.Code().or_(tmp, src[0].Op()); MovGP(ctx, dest[0], tmp); } void EmitUnpackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg src0 = src[0].isMEM() ? ctx.TempGPReg() : src[0].getReg(); + Reg src0 = src[0].IsMem() ? ctx.TempGPReg() : src[0].Reg(); MovGP(ctx, src0, src[0]); - Reg dest1 = dest[1].isMEM() ? ctx.TempGPReg() : dest[1].getReg().changeBit(64); + Reg dest1 = dest[1].IsMem() ? ctx.TempGPReg() : dest[1].Reg().changeBit(64); MovGP(ctx, dest1, src0); ctx.Code().shr(dest1, 32); MovGP(ctx, dest[1], dest1); @@ -83,9 +83,9 @@ void EmitUnpackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& } void EmitPackFloat2x32(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); MovFloat(ctx, tmp, src[0]); - ctx.Code().pinsrd(tmp, src[1], 1); + ctx.Code().pinsrd(tmp, src[1].Op(), 1); MovFloat(ctx, dest[0], tmp); } diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp index 2421553bd..1c7dd4730 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp @@ -12,7 +12,7 @@ using namespace Xbyak::util; namespace { template -static const Operand& GetSuffleOperand(const Operands& comp1, const Operands& comp2, u32 index) { +static const OperandHolder& GetSuffleOperand(const Operands& comp1, const Operands& comp2, u32 index) { if (index < N) { return comp1[index]; } else { diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp index 1eea0e7ee..f097d68ae 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp @@ -61,11 +61,11 @@ void EmitReadConst(EmitContext& ctx, const Operands& dest, const Operands& base, Reg& tmp = ctx.TempGPReg(); MovGP(ctx, tmp, base[1]); ctx.Code().shl(tmp, 32); - ctx.Code().or_(tmp, base[0]); - if (offset[0].isMEM()) { - ctx.Code().add(tmp, offset[0]); + ctx.Code().or_(tmp, base[0].Op()); + if (offset[0].IsMem()) { + ctx.Code().add(tmp, offset[0].Mem()); } else { - ctx.Code().lea(tmp, ptr[tmp + offset[0].getReg().cvt64()]); + ctx.Code().lea(tmp, ptr[tmp + offset[0].Reg().cvt64()]); } MovGP(ctx, dest[0], dword[tmp]); } diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp index 48ebf4fa5..69fc004d9 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_convert.cpp @@ -11,64 +11,64 @@ using namespace Xbyak::util; void EmitConvertS16F16(EmitContext& ctx, const Operands& dest, const Operands& src) { Xmm tmp_xmm = ctx.TempXmmReg(); - Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); - EmitInlineF16ToF32(ctx, tmp_xmm, src[0]); + Reg tmp_reg = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg().cvt32(); + EmitInlineF16ToF32(ctx, tmp_xmm, src[0].Op()); ctx.Code().cvttss2si(tmp_reg, tmp_xmm); ctx.Code().and_(tmp_reg, 0xFFFF); MovGP(ctx, dest[0], tmp_reg); } void EmitConvertS16F32(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); - ctx.Code().cvttss2si(tmp, src[0]); + Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg().cvt32(); + ctx.Code().cvttss2si(tmp, src[0].Op()); ctx.Code().and_(tmp, 0xFFFF); MovGP(ctx, dest[0], tmp); } void EmitConvertS16F64(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); - ctx.Code().cvttsd2si(tmp, src[0]); + Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg().cvt32(); + ctx.Code().cvttsd2si(tmp, src[0].Op()); ctx.Code().and_(tmp, 0xFFFF); MovGP(ctx, dest[0], tmp); } void EmitConvertS32F16(EmitContext& ctx, const Operands& dest, const Operands& src) { Xmm tmp_xmm = ctx.TempXmmReg(); - Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); - EmitInlineF16ToF32(ctx, tmp_xmm, src[0]); + Reg tmp_reg = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg(); + EmitInlineF16ToF32(ctx, tmp_xmm, src[0].Op()); ctx.Code().cvttss2si(tmp_reg, tmp_xmm); MovGP(ctx, dest[0], tmp_reg); } void EmitConvertS32F32(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); - ctx.Code().cvttss2si(tmp, src[0]); + Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg(); + ctx.Code().cvttss2si(tmp, src[0].Op()); MovGP(ctx, dest[0], tmp); } void EmitConvertS32F64(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); - ctx.Code().cvttsd2si(tmp, src[0]); + Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg(); + ctx.Code().cvttsd2si(tmp, src[0].Op()); MovGP(ctx, dest[0], tmp); } void EmitConvertS64F16(EmitContext& ctx, const Operands& dest, const Operands& src) { Xmm tmp_xmm = ctx.TempXmmReg(); - Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg() : dest[0].getReg(); - EmitInlineF16ToF32(ctx, tmp_xmm, src[0]); + Reg tmp_reg = dest[0].IsMem() ? ctx.TempGPReg() : dest[0].Reg(); + EmitInlineF16ToF32(ctx, tmp_xmm, src[0].Op()); ctx.Code().cvttss2si(tmp_reg, tmp_xmm); MovGP(ctx, dest[0], tmp_reg); } void EmitConvertS64F32(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg() : dest[0].getReg(); - ctx.Code().cvttss2si(tmp, src[0]); + Reg tmp = dest[0].IsMem() ? ctx.TempGPReg() : dest[0].Reg(); + ctx.Code().cvttss2si(tmp, src[0].Op()); MovGP(ctx, dest[0], tmp); } void EmitConvertS64F64(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg() : dest[0].getReg(); - ctx.Code().cvttsd2si(tmp, src[0]); + Reg tmp = dest[0].IsMem() ? ctx.TempGPReg() : dest[0].Reg(); + ctx.Code().cvttsd2si(tmp, src[0].Op()); MovGP(ctx, dest[0], tmp); } @@ -117,51 +117,51 @@ void EmitConvertU32U64(EmitContext& ctx, const Operands& dest, const Operands& s } void EmitConvertF16F32(EmitContext& ctx, const Operands& dest, const Operands& src) { - EmitInlineF32ToF16(ctx, dest[0], src[0]); + EmitInlineF32ToF16(ctx, dest[0].Op(), src[0].Op()); } void EmitConvertF32F16(EmitContext& ctx, const Operands& dest, const Operands& src) { - EmitInlineF16ToF32(ctx, dest[0], src[0]); + EmitInlineF16ToF32(ctx, dest[0].Op(), src[0].Op()); } void EmitConvertF32F64(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); - ctx.Code().cvtsd2ss(tmp, src[0]); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); + ctx.Code().cvtsd2ss(tmp, src[0].Op()); MovFloat(ctx, dest[0], tmp); } void EmitConvertF64F32(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); - ctx.Code().cvtss2sd(tmp, src[0]); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); + ctx.Code().cvtss2sd(tmp, src[0].Op()); MovDouble(ctx, dest[0], tmp); } void EmitConvertF16S8(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); + Reg tmp_reg = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg().cvt32(); Xmm tmp_xmm = ctx.TempXmmReg(); - ctx.Code().movsx(tmp_reg, src[0]); + MovGP(ctx, tmp_reg, src[0]); ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg); - EmitInlineF32ToF16(ctx, dest[0], tmp_xmm); + EmitInlineF32ToF16(ctx, dest[0].Op(), tmp_xmm); } void EmitConvertF16S16(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp_reg = dest[0].isMEM() ? ctx.TempGPReg().cvt32() : dest[0].getReg().cvt32(); + Reg tmp_reg = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg().cvt32(); Xmm tmp_xmm = ctx.TempXmmReg(); - ctx.Code().movsx(tmp_reg, src[0]); + MovGP(ctx, tmp_reg, src[0]); ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg); - EmitInlineF32ToF16(ctx, dest[0], tmp_xmm); + EmitInlineF32ToF16(ctx, dest[0].Op(), tmp_xmm); } void EmitConvertF16S32(EmitContext& ctx, const Operands& dest, const Operands& src) { Xmm tmp = ctx.TempXmmReg(); - ctx.Code().cvtsi2ss(tmp, src[0]); - EmitInlineF32ToF16(ctx, dest[0], tmp); + ctx.Code().cvtsi2ss(tmp, src[0].Op()); + EmitInlineF32ToF16(ctx, dest[0].Op(), tmp); } void EmitConvertF16S64(EmitContext& ctx, const Operands& dest, const Operands& src) { Xmm tmp = ctx.TempXmmReg(); - ctx.Code().cvtsi2ss(tmp, src[0]); - EmitInlineF32ToF16(ctx, dest[0], tmp); + ctx.Code().cvtsi2ss(tmp, src[0].Op()); + EmitInlineF32ToF16(ctx, dest[0].Op(), tmp); } void EmitConvertF16U8(EmitContext& ctx, const Operands& dest, const Operands& src) { @@ -182,29 +182,29 @@ void EmitConvertF16U64(EmitContext& ctx, const Operands& dest, const Operands& s void EmitConvertF32S8(EmitContext& ctx, const Operands& dest, const Operands& src) { Reg tmp_reg = ctx.TempGPReg().cvt32(); - Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); - ctx.Code().movsx(tmp_reg, src[0]); + Xmm tmp_xmm = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); + MovGP(ctx, tmp_reg, src[0]); ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg); MovFloat(ctx, dest[0], tmp_xmm); } void EmitConvertF32S16(EmitContext& ctx, const Operands& dest, const Operands& src) { Reg tmp_reg = ctx.TempGPReg().cvt32(); - Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); - ctx.Code().movsx(tmp_reg, src[0]); + Xmm tmp_xmm = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); + MovGP(ctx, tmp_reg, src[0]); ctx.Code().cvtsi2ss(tmp_xmm, tmp_reg); MovFloat(ctx, dest[0], tmp_xmm); } void EmitConvertF32S32(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); - ctx.Code().cvtsi2ss(tmp, src[0]); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); + ctx.Code().cvtsi2ss(tmp, src[0].Op()); MovFloat(ctx, dest[0], tmp); } void EmitConvertF32S64(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); - ctx.Code().cvtsi2ss(tmp, src[0]); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); + ctx.Code().cvtsi2ss(tmp, src[0].Op()); MovFloat(ctx, dest[0], tmp); } @@ -226,29 +226,29 @@ void EmitConvertF32U64(EmitContext& ctx, const Operands& dest, const Operands& s void EmitConvertF64S8(EmitContext& ctx, const Operands& dest, const Operands& src) { Reg tmp_reg = ctx.TempGPReg().cvt32(); - Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); - ctx.Code().movsx(tmp_reg, src[0]); + Xmm tmp_xmm = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); + MovGP(ctx, tmp_reg, src[0]); ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg); MovDouble(ctx, dest[0], tmp_xmm); } void EmitConvertF64S16(EmitContext& ctx, const Operands& dest, const Operands& src) { Reg tmp_reg = ctx.TempGPReg().cvt32(); - Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); - ctx.Code().movsx(tmp_reg, src[0]); + Xmm tmp_xmm = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); + MovGP(ctx, tmp_reg, src[0]); ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg); MovDouble(ctx, dest[0], tmp_xmm); } void EmitConvertF64S32(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); - ctx.Code().cvtsi2sd(tmp, src[0]); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); + ctx.Code().cvtsi2sd(tmp, src[0].Op()); MovDouble(ctx, dest[0], tmp); } void EmitConvertF64S64(EmitContext& ctx, const Operands& dest, const Operands& src) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); - ctx.Code().cvtsi2sd(tmp, src[0]); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); + ctx.Code().cvtsi2sd(tmp, src[0].Op()); MovDouble(ctx, dest[0], tmp); } diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp index 588b1ed2d..2a048dbcb 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp @@ -13,57 +13,55 @@ using namespace Xbyak::util; void EmitFPAbs16(EmitContext& ctx, const Operands& dest, const Operands& src) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg().cvt16() : dest[0].getReg().cvt16(); - MovGP(ctx, tmp, src[0]); - ctx.Code().and_(tmp, 0x7FFF); - MovGP(ctx, dest[0], tmp); + MovGP(ctx, dest[0], src[0]); + ctx.Code().and_(dest[0].Op(), 0x7FFF); } void EmitFPAbs32(EmitContext& ctx, const Operands& dest, const Operands& src) { Reg reg_tmp = ctx.TempXmmReg(); - Xmm xmm_tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Xmm xmm_tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); ctx.Code().mov(reg_tmp, 0x7FFFFFFF); ctx.Code().movd(xmm_tmp, reg_tmp); - ctx.Code().andps(xmm_tmp, src[0]); + ctx.Code().andps(xmm_tmp, src[0].Op()); MovFloat(ctx, dest[0], xmm_tmp); } void EmitFPAbs64(EmitContext& ctx, const Operands& dest, const Operands& src) { Reg reg_tmp = ctx.TempGPReg(); - Xmm xmm_tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Xmm xmm_tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); ctx.Code().mov(reg_tmp, 0x7FFFFFFFFFFFFFFF); ctx.Code().movq(xmm_tmp, reg_tmp); - ctx.Code().andpd(xmm_tmp, src[0]); + ctx.Code().andpd(xmm_tmp, src[0].Op()); MovFloat(ctx, dest[0], xmm_tmp); } void EmitFPAdd16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { Xmm tmp1 = ctx.TempXmmReg(); Xmm tmp2 = ctx.TempXmmReg(); - EmitInlineF16ToF32(ctx, tmp1, op1[0]); - EmitInlineF16ToF32(ctx, tmp2, op2[0]); + EmitInlineF16ToF32(ctx, tmp1, op1[0].Op()); + EmitInlineF16ToF32(ctx, tmp2, op2[0].Op()); ctx.Code().addss(tmp1, tmp2); - EmitInlineF32ToF16(ctx, dest[0], tmp1); + EmitInlineF32ToF16(ctx, dest[0].Op(), tmp1); } void EmitFPAdd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); MovFloat(ctx, tmp, op1[0]); - ctx.Code().addss(tmp, op2[0]); + ctx.Code().addss(tmp, op2[0].Op()); MovFloat(ctx, dest[0], tmp); } void EmitFPAdd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); MovDouble(ctx, tmp, op1[0]); - ctx.Code().addsd(tmp, op2[0]); + ctx.Code().addsd(tmp, op2[0].Op()); MovDouble(ctx, dest[0], tmp); } void EmitFPSub32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); MovFloat(ctx, tmp, op1[0]); - ctx.Code().subss(tmp, op2[0]); + ctx.Code().subss(tmp, op2[0].Op()); MovFloat(ctx, dest[0], tmp); } @@ -71,58 +69,54 @@ void EmitFPFma16(EmitContext& ctx, const Operands& dest, const Operands& op1, co Xmm tmp1 = ctx.TempXmmReg(); Xmm tmp2 = ctx.TempXmmReg(); Xmm tmp3 = ctx.TempXmmReg(); - EmitInlineF16ToF32(ctx, tmp1, op1[0]); - EmitInlineF16ToF32(ctx, tmp2, op2[0]); - EmitInlineF16ToF32(ctx, tmp3, op3[0]); + EmitInlineF16ToF32(ctx, tmp1, op1[0].Op()); + EmitInlineF16ToF32(ctx, tmp2, op2[0].Op()); + EmitInlineF16ToF32(ctx, tmp3, op3[0].Op()); ctx.Code().vfmadd132ss(tmp3, tmp1, tmp2); - EmitInlineF32ToF16(ctx, dest[0], tmp3); + EmitInlineF32ToF16(ctx, dest[0].Op(), tmp3); } void EmitFPFma32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) { - Xmm tmp1 = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); - Xmm tmp2 = op1[0].isMEM() ? ctx.TempXmmReg() : op1[0].getReg().cvt128(); - Xmm tmp3 = op2[0].isMEM() ? ctx.TempXmmReg() : op2[0].getReg().cvt128(); + Xmm tmp1 = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); + Xmm tmp2 = op2[0].IsMem() ? ctx.TempXmmReg() : op2[0].Xmm(); MovFloat(ctx, tmp1, op3[0]); - MovFloat(ctx, tmp2, op1[0]); - MovFloat(ctx, tmp3, op2[0]); - ctx.Code().vfmadd132ss(tmp3, tmp1, tmp2); - MovFloat(ctx, dest[0], tmp3); + MovFloat(ctx, tmp2, op2[0]); + ctx.Code().vfmadd132ss(tmp2, tmp1, op1[0].Op()); + MovFloat(ctx, dest[0], tmp2); } void EmitFPFma64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) { - Xmm tmp1 = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); - Xmm tmp2 = op1[0].isMEM() ? ctx.TempXmmReg() : op1[0].getReg().cvt128(); - Xmm tmp3 = op2[0].isMEM() ? ctx.TempXmmReg() : op2[0].getReg().cvt128(); + Xmm tmp1 = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); + Xmm tmp2 = op2[0].IsMem() ? ctx.TempXmmReg() : op2[0].Xmm(); MovDouble(ctx, tmp1, op3[0]); - MovDouble(ctx, tmp2, op1[0]); - MovDouble(ctx, tmp3, op2[0]); - ctx.Code().vfmadd132sd(tmp3, tmp1, tmp2); - MovDouble(ctx, dest[0], tmp3); + MovDouble(ctx, tmp2, op2[0]); + ctx.Code().vfmadd132sd(tmp2, tmp1, op1[0].Op()); + MovDouble(ctx, dest[0], tmp2); } void EmitFPMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, bool is_legacy) { if (is_legacy) { Xmm tmp1 = ctx.TempXmmReg(); Xmm tmp2 = ctx.TempXmmReg(); - MovFloat(ctx, tmp1, op1[0]); - MovFloat(ctx, tmp2, op1[0]); - ctx.Code().maxss(tmp2, op2[0]); + MovFloat(ctx, tmp1, op1[0].Op()); + MovFloat(ctx, tmp2, op1[0].Op()); + ctx.Code().maxss(tmp2, op2[0].Op()); ctx.Code().cmpunordss(tmp1, tmp1); - ctx.Code().andps(tmp1, op2[0]); + ctx.Code().andps(tmp1, op2[0].Op()); ctx.Code().orps(tmp2, tmp1); MovFloat(ctx, dest[0], tmp2); } else { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); MovFloat(ctx, tmp, op1[0]); - ctx.Code().maxss(tmp, op2[0]); + ctx.Code().maxss(tmp, op2[0].Op()); MovFloat(ctx, dest[0], tmp); } } void EmitFPMax64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); MovDouble(ctx, tmp, op1[0]); - ctx.Code().maxsd(tmp, op2[0]); + ctx.Code().maxsd(tmp, op2[0].Op()); MovDouble(ctx, dest[0], tmp); } @@ -130,87 +124,85 @@ void EmitFPMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, co if (is_legacy) { Xmm tmp1 = ctx.TempXmmReg(); Xmm tmp2 = ctx.TempXmmReg(); - MovFloat(ctx, tmp1, op1[0]); - MovFloat(ctx, tmp2, op1[0]); - ctx.Code().minss(tmp2, op2[0]); + MovFloat(ctx, tmp1, op1[0].Op()); + MovFloat(ctx, tmp2, op1[0].Op()); + ctx.Code().minss(tmp2, op2[0].Op()); ctx.Code().cmpunordss(tmp1, tmp1); - ctx.Code().andps(tmp1, op2[0]); + ctx.Code().andps(tmp1, op2[0].Op()); ctx.Code().orps(tmp2, tmp1); MovFloat(ctx, dest[0], tmp2); } else { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); MovFloat(ctx, tmp, op1[0]); - ctx.Code().minss(tmp, op2[0]); + ctx.Code().minss(tmp, op2[0].Op()); MovFloat(ctx, dest[0], tmp); } } void EmitFPMin64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); MovDouble(ctx, tmp, op1[0]); - ctx.Code().minsd(tmp, op2[0]); + ctx.Code().minsd(tmp, op2[0].Op()); MovDouble(ctx, dest[0], tmp); } void EmitFPMul16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { Xmm tmp1 = ctx.TempXmmReg(); Xmm tmp2 = ctx.TempXmmReg(); - EmitInlineF16ToF32(ctx, tmp1, op1[0]); - EmitInlineF16ToF32(ctx, tmp2, op2[0]); + EmitInlineF16ToF32(ctx, tmp1, op1[0].Op()); + EmitInlineF16ToF32(ctx, tmp2, op2[0].Op()); ctx.Code().mulss(tmp1, tmp2); - EmitInlineF32ToF16(ctx, dest[0], tmp1); + EmitInlineF32ToF16(ctx, dest[0].Op(), tmp1); } void EmitFPMul32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); MovFloat(ctx, tmp, op1[0]); - ctx.Code().mulss(tmp, op2[0]); + ctx.Code().mulss(tmp, op2[0].Op()); MovFloat(ctx, dest[0], tmp); } void EmitFPMul64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); MovDouble(ctx, tmp, op1[0]); - ctx.Code().mulsd(tmp, op2[0]); + ctx.Code().mulsd(tmp, op2[0].Op()); MovDouble(ctx, dest[0], tmp); } void EmitFPDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); MovFloat(ctx, tmp, op1[0]); - ctx.Code().divss(tmp, op2[0]); + ctx.Code().divss(tmp, op2[0].Op()); MovFloat(ctx, dest[0], tmp); } void EmitFPDiv64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); MovDouble(ctx, tmp, op1[0]); - ctx.Code().divsd(tmp, op2[0]); + ctx.Code().divsd(tmp, op2[0].Op()); MovDouble(ctx, dest[0], tmp); } void EmitFPNeg16(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg().cvt16() : dest[0].getReg().cvt16(); - MovGP(ctx, tmp, op1[0]); - ctx.Code().xor_(tmp, 0x8000); - MovGP(ctx, dest[0], tmp); + MovGP(ctx, dest[0], op1[0]); + ctx.Code().xor_(dest[0].Op(), 0x8000); } void EmitFPNeg32(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Xmm tmp_xmm = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); Reg tmp_reg = ctx.TempGPReg().cvt32(); ctx.Code().mov(tmp_reg, 0x80000000); ctx.Code().movd(tmp_xmm, tmp_reg); - ctx.Code().xorps(tmp_xmm, op1[0]); + ctx.Code().xorps(tmp_xmm, op1[0].Op()); MovFloat(ctx, dest[0], tmp_xmm); } void EmitFPNeg64(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Xmm tmp_xmm = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); Reg tmp_reg = ctx.TempXmmReg(); ctx.Code().mov(tmp_reg, 0x8000000000000000); ctx.Code().movq(tmp_xmm, tmp_reg); - ctx.Code().xorpd(tmp_xmm, op1[0]); + ctx.Code().xorpd(tmp_xmm, op1[0].Op()); MovDouble(ctx, dest[0], tmp_xmm); } @@ -236,39 +228,39 @@ void EmitFPLog2(EmitContext& ctx) { } void EmitFPRecip32(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); - ctx.Code().rcpss(tmp, op1[0]); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); + ctx.Code().rcpss(tmp, op1[0].Op()); MovFloat(ctx, dest[0], tmp); } void EmitFPRecip64(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Xmm tmp_xmm = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); Reg tmp_reg = ctx.TempGPReg(); ctx.Code().mov(tmp_reg, 1); ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg); - ctx.Code().divsd(tmp_xmm, op1[0]); + ctx.Code().divsd(tmp_xmm, op1[0].Op()); MovDouble(ctx, dest[0], tmp_xmm); } void EmitFPRecipSqrt32(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); - ctx.Code().rsqrtss(tmp, op1[0]); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); + ctx.Code().rsqrtss(tmp, op1[0].Op()); MovFloat(ctx, dest[0], tmp); } void EmitFPRecipSqrt64(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp_xmm = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Xmm tmp_xmm = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); Reg tmp_reg = ctx.TempGPReg(); ctx.Code().mov(tmp_reg, 1); ctx.Code().cvtsi2sd(tmp_xmm, tmp_reg); - ctx.Code().divsd(tmp_xmm, op1[0]); + ctx.Code().divsd(tmp_xmm, op1[0].Op()); ctx.Code().sqrtsd(tmp_xmm, tmp_xmm); MovDouble(ctx, dest[0], tmp_xmm); } void EmitFPSqrt(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); - ctx.Code().sqrtss(tmp, op1[0]); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); + ctx.Code().sqrtss(tmp, op1[0].Op()); MovFloat(ctx, dest[0], tmp); } @@ -288,84 +280,84 @@ void EmitFPClamp16(EmitContext& ctx, const Operands& dest, const Operands& op, c Xmm tmp1 = ctx.TempXmmReg(); Xmm tmp2 = ctx.TempXmmReg(); Xmm tmp3 = ctx.TempXmmReg(); - EmitInlineF16ToF32(ctx, tmp1, op[0]); - EmitInlineF16ToF32(ctx, tmp2, min[0]); - EmitInlineF16ToF32(ctx, tmp3, max[0]); + EmitInlineF16ToF32(ctx, tmp1, op[0].Op()); + EmitInlineF16ToF32(ctx, tmp2, min[0].Op()); + EmitInlineF16ToF32(ctx, tmp3, max[0].Op()); ctx.Code().maxss(tmp1, tmp2); ctx.Code().minss(tmp1, tmp3); - EmitInlineF32ToF16(ctx, dest[0], tmp1); + EmitInlineF32ToF16(ctx, dest[0].Op(), tmp1); } void EmitFPClamp32(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); MovFloat(ctx, tmp, op[0]); - ctx.Code().maxss(tmp, min[0]); - ctx.Code().minss(tmp, max[0]); + ctx.Code().maxss(tmp, min[0].Op()); + ctx.Code().minss(tmp, max[0].Op()); MovFloat(ctx, dest[0], tmp); } void EmitFPClamp64(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); MovDouble(ctx, tmp, op[0]); - ctx.Code().maxsd(tmp, min[0]); - ctx.Code().minsd(tmp, max[0]); + ctx.Code().maxsd(tmp, min[0].Op()); + ctx.Code().minsd(tmp, max[0].Op()); MovDouble(ctx, dest[0], tmp); } void EmitFPRoundEven16(EmitContext& ctx, const Operands& dest, const Operands& op1) { Xmm tmp = ctx.TempXmmReg(); - EmitInlineF16ToF32(ctx, tmp, op1[0]); + EmitInlineF16ToF32(ctx, tmp, op1[0].Op()); ctx.Code().roundss(tmp, tmp, 0x00); - EmitInlineF32ToF16(ctx, dest[0], tmp); + EmitInlineF32ToF16(ctx, dest[0].Op(), tmp); } void EmitFPRoundEven32(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); - ctx.Code().roundss(tmp, op1[0], 0x00); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); + ctx.Code().roundss(tmp, op1[0].Op(), 0x00); MovFloat(ctx, dest[0], tmp); } void EmitFPRoundEven64(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); - ctx.Code().roundsd(tmp, op1[0], 0x00); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); + ctx.Code().roundsd(tmp, op1[0].Op(), 0x00); MovDouble(ctx, dest[0], tmp); } void EmitFPFloor16(EmitContext& ctx, const Operands& dest, const Operands& op1) { Xmm tmp = ctx.TempXmmReg(); - EmitInlineF16ToF32(ctx, tmp, op1[0]); + EmitInlineF16ToF32(ctx, tmp, op1[0].Op()); ctx.Code().roundss(tmp, tmp, 0x01); - EmitInlineF32ToF16(ctx, dest[0], tmp); + EmitInlineF32ToF16(ctx, dest[0].Op(), tmp); } void EmitFPFloor32(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); - ctx.Code().roundss(tmp, op1[0], 0x01); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); + ctx.Code().roundss(tmp, op1[0].Op(), 0x01); MovFloat(ctx, dest[0], tmp); } void EmitFPFloor64(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); - ctx.Code().roundsd(tmp, op1[0], 0x01); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); + ctx.Code().roundsd(tmp, op1[0].Op(), 0x01); MovDouble(ctx, dest[0], tmp); } void EmitFPCeil16(EmitContext& ctx, const Operands& dest, const Operands& op1) { Xmm tmp = ctx.TempXmmReg(); - EmitInlineF16ToF32(ctx, tmp, op1[0]); + EmitInlineF16ToF32(ctx, tmp, op1[0].Op()); ctx.Code().roundss(tmp, tmp, 0x02); - EmitInlineF32ToF16(ctx, dest[0], tmp); + EmitInlineF32ToF16(ctx, dest[0].Op(), tmp); } void EmitFPCeil32(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); - ctx.Code().roundss(tmp, op1[0], 0x02); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); + ctx.Code().roundss(tmp, op1[0].Op(), 0x02); MovFloat(ctx, dest[0], tmp); } void EmitFPCeil64(EmitContext& ctx, const Operands& dest, const Operands& op1) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); - ctx.Code().roundsd(tmp, op1[0], 0x02); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); + ctx.Code().roundsd(tmp, op1[0].Op(), 0x02); MovDouble(ctx, dest[0], tmp); } @@ -409,7 +401,7 @@ void EmitFPOrdEqual16(EmitContext& ctx, const Operands& dest, const Operands& lh Label not_nan; EmitFPUnordEqual16(ctx, dest, lhs, rhs); ctx.Code().jnp(not_nan); - ctx.Code().mov(dest[0], 0); + ctx.Code().mov(dest[0].Op(), 0); ctx.Code().L(not_nan); } @@ -417,7 +409,7 @@ void EmitFPOrdEqual32(EmitContext& ctx, const Operands& dest, const Operands& lh Label not_nan; EmitFPUnordEqual32(ctx, dest, lhs, rhs); ctx.Code().jnp(not_nan); - ctx.Code().mov(dest[0], 0); + ctx.Code().mov(dest[0].Op(), 0); ctx.Code().L(not_nan); } @@ -425,46 +417,46 @@ void EmitFPOrdEqual64(EmitContext& ctx, const Operands& dest, const Operands& lh Label not_nan; EmitFPUnordEqual64(ctx, dest, lhs, rhs); ctx.Code().jnp(not_nan); - ctx.Code().mov(dest[0], 0); + ctx.Code().mov(dest[0].Op(), 0); ctx.Code().L(not_nan); } void EmitFPUnordEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { Xmm tmp1 = ctx.TempXmmReg(); Xmm tmp2 = ctx.TempXmmReg(); - EmitInlineF16ToF32(ctx, tmp1, lhs[0]); - EmitInlineF16ToF32(ctx, tmp2, rhs[0]); + EmitInlineF16ToF32(ctx, tmp1, lhs[0].Op()); + EmitInlineF16ToF32(ctx, tmp2, rhs[0].Op()); ctx.Code().ucomiss(tmp1, tmp2); - ctx.Code().sete(dest[0]); + ctx.Code().sete(dest[0].Op()); } void EmitFPUnordEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm(); MovFloat(ctx, tmp, lhs[0]); - ctx.Code().ucomiss(tmp, rhs[0]); - ctx.Code().sete(dest[0]); + ctx.Code().ucomiss(tmp, rhs[0].Op()); + ctx.Code().sete(dest[0].Op()); } void EmitFPUnordEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm(); MovDouble(ctx, tmp, lhs[0]); - ctx.Code().ucomisd(tmp, rhs[0]); - ctx.Code().sete(dest[0]); + ctx.Code().ucomisd(tmp, rhs[0].Op()); + ctx.Code().sete(dest[0].Op()); } void EmitFPOrdNotEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { Label not_nan; EmitFPUnordNotEqual16(ctx, dest, lhs, rhs); ctx.Code().jnp(not_nan); - ctx.Code().mov(dest[0], 0); + ctx.Code().mov(dest[0].Op(), 0); ctx.Code().L(not_nan); } void EmitFPOrdNotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { Label not_nan; - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm(); ctx.Code().jnp(not_nan); - ctx.Code().mov(dest[0], 0); + ctx.Code().mov(dest[0].Op(), 0); ctx.Code().L(not_nan); } @@ -472,38 +464,38 @@ void EmitFPOrdNotEqual64(EmitContext& ctx, const Operands& dest, const Operands& Label not_nan; EmitFPUnordNotEqual64(ctx, dest, lhs, rhs); ctx.Code().jnp(not_nan); - ctx.Code().mov(dest[0], 0); + ctx.Code().mov(dest[0].Op(), 0); ctx.Code().L(not_nan); } void EmitFPUnordNotEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { Xmm tmp1 = ctx.TempXmmReg(); Xmm tmp2 = ctx.TempXmmReg(); - EmitInlineF16ToF32(ctx, tmp1, lhs[0]); - EmitInlineF16ToF32(ctx, tmp2, rhs[0]); + EmitInlineF16ToF32(ctx, tmp1, lhs[0].Op()); + EmitInlineF16ToF32(ctx, tmp2, rhs[0].Op()); ctx.Code().ucomiss(tmp1, tmp2); - ctx.Code().setne(dest[0]); + ctx.Code().setne(dest[0].Op()); } void EmitFPUnordNotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm(); MovFloat(ctx, tmp, lhs[0]); - ctx.Code().ucomiss(tmp, rhs[0]); - ctx.Code().setne(dest[0]); + ctx.Code().ucomiss(tmp, rhs[0].Op()); + ctx.Code().setne(dest[0].Op()); } void EmitFPUnordNotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm(); MovDouble(ctx, tmp, lhs[0]); - ctx.Code().ucomisd(tmp, rhs[0]); - ctx.Code().setne(dest[0]); + ctx.Code().ucomisd(tmp, rhs[0].Op()); + ctx.Code().setne(dest[0].Op()); } void EmitFPOrdLessThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { Label not_nan; EmitFPUnordLessThan16(ctx, dest, lhs, rhs); ctx.Code().jnp(not_nan); - ctx.Code().mov(dest[0], 0); + ctx.Code().mov(dest[0].Op(), 0); ctx.Code().L(not_nan); } @@ -511,7 +503,7 @@ void EmitFPOrdLessThan32(EmitContext& ctx, const Operands& dest, const Operands& Label not_nan; EmitFPUnordLessThan32(ctx, dest, lhs, rhs); ctx.Code().jnp(not_nan); - ctx.Code().mov(dest[0], 0); + ctx.Code().mov(dest[0].Op(), 0); ctx.Code().L(not_nan); } @@ -519,38 +511,38 @@ void EmitFPOrdLessThan64(EmitContext& ctx, const Operands& dest, const Operands& Label not_nan; EmitFPUnordLessThan64(ctx, dest, lhs, rhs); ctx.Code().jnp(not_nan); - ctx.Code().mov(dest[0], 0); + ctx.Code().mov(dest[0].Op(), 0); ctx.Code().L(not_nan); } void EmitFPUnordLessThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { Xmm tmp1 = ctx.TempXmmReg(); Xmm tmp2 = ctx.TempXmmReg(); - EmitInlineF16ToF32(ctx, tmp1, lhs[0]); - EmitInlineF16ToF32(ctx, tmp2, rhs[0]); + EmitInlineF16ToF32(ctx, tmp1, lhs[0].Op()); + EmitInlineF16ToF32(ctx, tmp2, rhs[0].Op()); ctx.Code().ucomiss(tmp1, tmp2); - ctx.Code().setb(dest[0]); + ctx.Code().setb(dest[0].Op()); } void EmitFPUnordLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm(); MovFloat(ctx, tmp, lhs[0]); - ctx.Code().ucomiss(tmp, rhs[0]); - ctx.Code().setb(dest[0]); + ctx.Code().ucomiss(tmp, rhs[0].Op()); + ctx.Code().setb(dest[0].Op()); } void EmitFPUnordLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm(); MovDouble(ctx, tmp, lhs[0]); - ctx.Code().ucomisd(tmp, rhs[0]); - ctx.Code().setb(dest[0]); + ctx.Code().ucomisd(tmp, rhs[0].Op()); + ctx.Code().setb(dest[0].Op()); } void EmitFPOrdGreaterThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { Label not_nan; EmitFPUnordGreaterThan16(ctx, dest, lhs, rhs); ctx.Code().jnp(not_nan); - ctx.Code().mov(dest[0], 0); + ctx.Code().mov(dest[0].Op(), 0); ctx.Code().L(not_nan); } @@ -558,7 +550,7 @@ void EmitFPOrdGreaterThan32(EmitContext& ctx, const Operands& dest, const Operan Label not_nan; EmitFPUnordGreaterThan32(ctx, dest, lhs, rhs); ctx.Code().jnp(not_nan); - ctx.Code().mov(dest[0], 0); + ctx.Code().mov(dest[0].Op(), 0); ctx.Code().L(not_nan); } @@ -566,38 +558,38 @@ void EmitFPOrdGreaterThan64(EmitContext& ctx, const Operands& dest, const Operan Label not_nan; EmitFPUnordGreaterThan64(ctx, dest, lhs, rhs); ctx.Code().jnp(not_nan); - ctx.Code().mov(dest[0], 0); + ctx.Code().mov(dest[0].Op(), 0); ctx.Code().L(not_nan); } void EmitFPUnordGreaterThan16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { Xmm tmp1 = ctx.TempXmmReg(); Xmm tmp2 = ctx.TempXmmReg(); - EmitInlineF16ToF32(ctx, tmp1, lhs[0]); - EmitInlineF16ToF32(ctx, tmp2, rhs[0]); + EmitInlineF16ToF32(ctx, tmp1, lhs[0].Op()); + EmitInlineF16ToF32(ctx, tmp2, rhs[0].Op()); ctx.Code().ucomiss(tmp1, tmp2); - ctx.Code().seta(dest[0]); + ctx.Code().seta(dest[0].Op()); } void EmitFPUnordGreaterThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm(); MovFloat(ctx, tmp, lhs[0]); - ctx.Code().ucomiss(tmp, rhs[0]); - ctx.Code().seta(dest[0]); + ctx.Code().ucomiss(tmp, rhs[0].Op()); + ctx.Code().seta(dest[0].Op()); } void EmitFPUnordGreaterThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm(); MovDouble(ctx, tmp, lhs[0]); - ctx.Code().ucomisd(tmp, rhs[0]); - ctx.Code().seta(dest[0]); + ctx.Code().ucomisd(tmp, rhs[0].Op()); + ctx.Code().seta(dest[0].Op()); } void EmitFPOrdLessThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { Label not_nan; EmitFPUnordLessThanEqual16(ctx, dest, lhs, rhs); ctx.Code().jnp(not_nan); - ctx.Code().mov(dest[0], 0); + ctx.Code().mov(dest[0].Op(), 0); ctx.Code().L(not_nan); } @@ -605,7 +597,7 @@ void EmitFPOrdLessThanEqual32(EmitContext& ctx, const Operands& dest, const Oper Label not_nan; EmitFPUnordLessThanEqual32(ctx, dest, lhs, rhs); ctx.Code().jnp(not_nan); - ctx.Code().mov(dest[0], 0); + ctx.Code().mov(dest[0].Op(), 0); ctx.Code().L(not_nan); } @@ -613,38 +605,38 @@ void EmitFPOrdLessThanEqual64(EmitContext& ctx, const Operands& dest, const Oper Label not_nan; EmitFPUnordLessThanEqual64(ctx, dest, lhs, rhs); ctx.Code().jnp(not_nan); - ctx.Code().mov(dest[0], 0); + ctx.Code().mov(dest[0].Op(), 0); ctx.Code().L(not_nan); } void EmitFPUnordLessThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { Xmm tmp1 = ctx.TempXmmReg(); Xmm tmp2 = ctx.TempXmmReg(); - EmitInlineF16ToF32(ctx, tmp1, lhs[0]); - EmitInlineF16ToF32(ctx, tmp2, rhs[0]); + EmitInlineF16ToF32(ctx, tmp1, lhs[0].Op()); + EmitInlineF16ToF32(ctx, tmp2, rhs[0].Op()); ctx.Code().ucomiss(tmp1, tmp2); - ctx.Code().setbe(dest[0]); + ctx.Code().setbe(dest[0].Op()); } void EmitFPUnordLessThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm(); MovFloat(ctx, tmp, lhs[0]); - ctx.Code().ucomiss(tmp, rhs[0]); - ctx.Code().setbe(dest[0]); + ctx.Code().ucomiss(tmp, rhs[0].Op()); + ctx.Code().setbe(dest[0].Op()); } void EmitFPUnordLessThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm(); MovDouble(ctx, tmp, lhs[0]); - ctx.Code().ucomisd(tmp, rhs[0]); - ctx.Code().setbe(dest[0]); + ctx.Code().ucomisd(tmp, rhs[0].Op()); + ctx.Code().setbe(dest[0].Op()); } void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { Label not_nan; EmitFPUnordGreaterThanEqual16(ctx, dest, lhs, rhs); ctx.Code().jnp(not_nan); - ctx.Code().mov(dest[0], 0); + ctx.Code().mov(dest[0].Op(), 0); ctx.Code().L(not_nan); } @@ -652,7 +644,7 @@ void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, const Operands& dest, const O Label not_nan; EmitFPUnordGreaterThanEqual32(ctx, dest, lhs, rhs); ctx.Code().jnp(not_nan); - ctx.Code().mov(dest[0], 0); + ctx.Code().mov(dest[0].Op(), 0); ctx.Code().L(not_nan); } @@ -660,52 +652,52 @@ void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, const Operands& dest, const O Label not_nan; EmitFPUnordGreaterThanEqual64(ctx, dest, lhs, rhs); ctx.Code().jnp(not_nan); - ctx.Code().mov(dest[0], 0); + ctx.Code().mov(dest[0].Op(), 0); ctx.Code().L(not_nan); } void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { Xmm tmp1 = ctx.TempXmmReg(); Xmm tmp2 = ctx.TempXmmReg(); - EmitInlineF16ToF32(ctx, tmp1, lhs[0]); - EmitInlineF16ToF32(ctx, tmp2, rhs[0]); + EmitInlineF16ToF32(ctx, tmp1, lhs[0].Op()); + EmitInlineF16ToF32(ctx, tmp2, rhs[0].Op()); ctx.Code().ucomiss(tmp1, tmp2); - ctx.Code().setae(dest[0]); + ctx.Code().setae(dest[0].Op()); } void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm(); MovFloat(ctx, tmp, lhs[0]); - ctx.Code().ucomiss(tmp, rhs[0]); - ctx.Code().setae(dest[0]); + ctx.Code().ucomiss(tmp, rhs[0].Op()); + ctx.Code().setae(dest[0].Op()); } void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Xmm tmp = lhs[0].isMEM() ? ctx.TempXmmReg() : lhs[0].getReg().cvt128(); + Xmm tmp = lhs[0].IsMem() ? ctx.TempXmmReg() : lhs[0].Xmm(); MovDouble(ctx, tmp, lhs[0]); - ctx.Code().ucomisd(tmp, rhs[0]); - ctx.Code().setae(dest[0]); + ctx.Code().ucomisd(tmp, rhs[0].Op()); + ctx.Code().setae(dest[0].Op()); } void EmitFPIsNan16(EmitContext& ctx, const Operands& dest, const Operands& op) { Xmm tmp = ctx.TempXmmReg(); - EmitInlineF16ToF32(ctx, tmp, op[0]); + EmitInlineF16ToF32(ctx, tmp, op[0].Op()); ctx.Code().ucomiss(tmp, tmp); - ctx.Code().setp(dest[0]); + ctx.Code().setp(dest[0].Op()); } void EmitFPIsNan32(EmitContext& ctx, const Operands& dest, const Operands& op) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); MovFloat(ctx, tmp, op[0]); ctx.Code().ucomiss(tmp, tmp); - ctx.Code().setp(dest[0]); + ctx.Code().setp(dest[0].Op()); } void EmitFPIsNan64(EmitContext& ctx, const Operands& dest, const Operands& op) { - Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg() : dest[0].getReg().cvt128(); + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); MovDouble(ctx, tmp, op[0]); ctx.Code().ucomisd(tmp, tmp); - ctx.Code().setp(dest[0]); + ctx.Code().setp(dest[0].Op()); } void EmitFPIsInf32(EmitContext& ctx) { diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h b/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h index 0e88727b2..c85da6890 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h @@ -6,6 +6,7 @@ #include #include #include "common/types.h" +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" namespace Shader::IR { enum class Attribute : u64; @@ -16,10 +17,6 @@ class Value; } // namespace Shader::IR namespace Shader::Backend::X64 { - -using Operands = boost::container::static_vector; - -class EmitContext; // Microinstruction emitters void EmitPhi(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_integer.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_integer.cpp index 2cc3b7c7e..6251a174a 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_integer.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_integer.cpp @@ -12,8 +12,12 @@ using namespace Xbyak::util; namespace { -static bool EmitSaveRegTemp(EmitContext ctx, const Reg& save, const Operand& dest) { - if (dest.getIdx() == save.getIdx()) { +static bool IsReg(const OperandHolder& op, const Reg& reg) { + return op.IsReg() && op.Reg().getIdx() == reg.getIdx(); +} + +static bool EmitSaveRegTemp(EmitContext ctx, const Reg& save, const OperandHolder& dest) { + if (IsReg(dest, save)) { // Destination is reg, no need to save return false; } @@ -28,47 +32,47 @@ static void EmitRestoreRegTemp(EmitContext ctx, const Reg& save) { } // namespace void EmitIAdd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - if (dest[0].isREG() && op1[0].isREG() && op2[0].isREG()) { - ctx.Code().lea(dest[0].getReg(), ptr[op1[0].getReg() + op2[0].getReg()]); + if (dest[0].IsReg() && op1[0].IsReg() && op2[0].IsReg()) { + ctx.Code().lea(dest[0].Reg(), ptr[op1[0].Reg() + op2[0].Reg()]); } else { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0]; MovGP(ctx, tmp, op1[0]); - ctx.Code().add(tmp, op2[0]); + ctx.Code().add(tmp.Op(), op2[0].Op()); MovGP(ctx, dest[0], tmp); } } void EmitIAdd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - if (dest[0].isREG() && op1[0].isREG() && op2[0].isREG()) { - ctx.Code().lea(dest[0].getReg(), ptr[op1[0].getReg() + op2[0].getReg()]); + if (dest[0].IsReg() && op1[0].IsReg() && op2[0].IsReg()) { + ctx.Code().lea(dest[0].Reg(), ptr[op1[0].Reg() + op2[0].Reg()]); } else { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg() : dest[0]; MovGP(ctx, tmp, op1[0]); - ctx.Code().add(tmp, op2[0]); + ctx.Code().add(tmp.Op(), op2[0].Op()); MovGP(ctx, dest[0], tmp); } } void EmitIAddCary32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); - Operand carry = dest[1]; - carry.setBit(1); + OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0]; + OperandHolder carry = dest[1]; + carry.Op().setBit(1); MovGP(ctx, tmp, op1[0]); - ctx.Code().add(tmp, op2[0]); - ctx.Code().setc(carry); + ctx.Code().add(tmp.Op(), op2[0].Op()); + ctx.Code().setc(carry.Op()); } void EmitISub32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0]; MovGP(ctx, tmp, op1[0]); - ctx.Code().sub(tmp, op2[0]); + ctx.Code().sub(tmp.Op(), op2[0].Op()); MovGP(ctx, dest[0], tmp); } void EmitISub64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg() : dest[0]; MovGP(ctx, tmp, op1[0]); - ctx.Code().sub(tmp, op2[0]); + ctx.Code().sub(tmp.Op(), op2[0].Op()); MovGP(ctx, dest[0], tmp); } @@ -81,29 +85,29 @@ void EmitUMulExt(EmitContext& ctx) { } void EmitIMul32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg(); MovGP(ctx, tmp, op1[0]); - ctx.Code().imul(tmp, op2[0]); + ctx.Code().imul(tmp, op2[0].Op()); MovGP(ctx, dest[0], tmp); } void EmitIMul64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + Reg tmp = dest[0].IsMem() ? ctx.TempGPReg() : dest[0].Reg(); MovGP(ctx, tmp, op1[0]); - ctx.Code().imul(tmp, op2[0]); + ctx.Code().imul(tmp, op2[0].Op()); MovGP(ctx, dest[0], tmp); } void EmitSDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { bool rax_saved = EmitSaveRegTemp(ctx, rax, dest[0]); bool rdx_saved = EmitSaveRegTemp(ctx, rdx, dest[0]); - Reg tmp = op2[0].getReg().cvt32(); - while (tmp.getIdx() == rax.getIdx()) { + OperandHolder tmp = op2[0]; + while (IsReg(tmp, rax)) { tmp = ctx.TempGPReg().cvt32(); } MovGP(ctx, tmp, op2[0]); MovGP(ctx, eax, op1[0]); - ctx.Code().idiv(tmp); + ctx.Code().idiv(tmp.Op()); MovGP(ctx, dest[0], eax); if (rdx_saved) { EmitRestoreRegTemp(ctx, rdx); @@ -116,13 +120,13 @@ void EmitSDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, con void EmitUDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { bool rax_saved = EmitSaveRegTemp(ctx, rax, dest[0]); bool rdx_saved = EmitSaveRegTemp(ctx, rdx, dest[0]); - Reg tmp = op2[0].getReg().cvt32(); - while (tmp.getIdx() == rax.getIdx()) { + OperandHolder tmp = op2[0]; + while (IsReg(tmp, rax)) { tmp = ctx.TempGPReg().cvt32(); } MovGP(ctx, tmp, op2[0]); MovGP(ctx, eax, op1[0]); - ctx.Code().div(tmp); + ctx.Code().div(tmp.Op()); MovGP(ctx, dest[0], eax); if (rdx_saved) { EmitRestoreRegTemp(ctx, rdx); @@ -135,13 +139,13 @@ void EmitUDiv32(EmitContext& ctx, const Operands& dest, const Operands& op1, con void EmitSMod32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { bool rax_saved = EmitSaveRegTemp(ctx, rax, dest[0]); bool rdx_saved = EmitSaveRegTemp(ctx, rdx, dest[0]); - Reg tmp = op2[0].getReg().cvt32(); - while (tmp.getIdx() == rax.getIdx()) { + OperandHolder tmp = op2[0]; + while (IsReg(tmp, rax)) { tmp = ctx.TempGPReg().cvt32(); } MovGP(ctx, tmp, op2[0]); MovGP(ctx, eax, op1[0]); - ctx.Code().idiv(tmp); + ctx.Code().idiv(tmp.Op()); MovGP(ctx, dest[0], edx); if (rdx_saved) { EmitRestoreRegTemp(ctx, rdx); @@ -154,13 +158,13 @@ void EmitSMod32(EmitContext& ctx, const Operands& dest, const Operands& op1, con void EmitUMod32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { bool rax_saved = EmitSaveRegTemp(ctx, rax, dest[0]); bool rdx_saved = EmitSaveRegTemp(ctx, rdx, dest[0]); - Reg tmp = op2[0].getReg().cvt32(); - while (tmp.getIdx() == rax.getIdx()) { + OperandHolder tmp = op2[0]; + while (IsReg(tmp, rax)) { tmp = ctx.TempGPReg().cvt32(); } MovGP(ctx, tmp, op2[0]); MovGP(ctx, eax, op1[0]); - ctx.Code().div(tmp); + ctx.Code().div(tmp.Op()); MovGP(ctx, dest[0], edx); if (rdx_saved) { EmitRestoreRegTemp(ctx, rdx); @@ -171,36 +175,30 @@ void EmitUMod32(EmitContext& ctx, const Operands& dest, const Operands& op1, con } void EmitINeg32(EmitContext& ctx, const Operands& dest, const Operands& op) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); - MovGP(ctx, tmp, op[0]); - ctx.Code().neg(tmp); - MovGP(ctx, dest[0], tmp); + MovGP(ctx, dest[0], op[0]); + ctx.Code().neg(dest[0].Op()); } void EmitINeg64(EmitContext& ctx, const Operands& dest, const Operands& op) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); - MovGP(ctx, tmp, op[0]); - ctx.Code().neg(tmp); - MovGP(ctx, dest[0], tmp); + MovGP(ctx, dest[0], op[0]); + ctx.Code().neg(dest[0].Op()); } void EmitIAbs32(EmitContext& ctx, const Operands& dest, const Operands& op) { Label done; - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); - MovGP(ctx, tmp, op[0]); - ctx.Code().cmp(tmp, 0); + MovGP(ctx, dest[0], op[0]); + ctx.Code().cmp(dest[0].Op(), 0); ctx.Code().jns(done); - ctx.Code().neg(tmp); + ctx.Code().neg(dest[0].Op()); ctx.Code().L(done); - MovGP(ctx, dest[0], tmp); } void EmitShiftLeftLogical32(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift) { bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]); - Reg tmp = dest[0].getIdx() == rcx.getIdx() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + OperandHolder tmp = IsReg(dest[0], rcx) ? ctx.TempGPReg().cvt32() : dest[0]; MovGP(ctx, tmp, base[0]); MovGP(ctx, cl, shift[0]); - ctx.Code().shl(tmp, cl); + ctx.Code().shl(tmp.Op(), cl); MovGP(ctx, dest[0], tmp); if (rcx_saved) { EmitRestoreRegTemp(ctx, rcx); @@ -209,10 +207,10 @@ void EmitShiftLeftLogical32(EmitContext& ctx, const Operands& dest, const Operan void EmitShiftLeftLogical64(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift) { bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]); - Reg tmp = dest[0].getIdx() == rcx.getIdx() ? ctx.TempGPReg(false) : dest[0].getReg(); + OperandHolder tmp = IsReg(dest[0], rcx) ? ctx.TempGPReg() : dest[0]; MovGP(ctx, tmp, base[0]); MovGP(ctx, cl, shift[0]); - ctx.Code().shl(tmp, cl); + ctx.Code().shl(tmp.Op(), cl); MovGP(ctx, dest[0], tmp); if (rcx_saved) { EmitRestoreRegTemp(ctx, rcx); @@ -221,10 +219,10 @@ void EmitShiftLeftLogical64(EmitContext& ctx, const Operands& dest, const Operan void EmitShiftRightLogical32(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift) { bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]); - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + OperandHolder tmp = IsReg(dest[0], rcx) ? ctx.TempGPReg().cvt32() : dest[0]; MovGP(ctx, tmp, base[0]); MovGP(ctx, cl, shift[0]); - ctx.Code().shr(tmp, cl); + ctx.Code().shr(tmp.Op(), cl); MovGP(ctx, dest[0], tmp); if (rcx_saved) { EmitRestoreRegTemp(ctx, rcx); @@ -233,10 +231,10 @@ void EmitShiftRightLogical32(EmitContext& ctx, const Operands& dest, const Opera void EmitShiftRightLogical64(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift) { bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]); - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + OperandHolder tmp = IsReg(dest[0], rcx) ? ctx.TempGPReg() : dest[0]; MovGP(ctx, tmp, base[0]); MovGP(ctx, cl, shift[0]); - ctx.Code().shr(tmp, cl); + ctx.Code().shr(tmp.Op(), cl); MovGP(ctx, dest[0], tmp); if (rcx_saved) { EmitRestoreRegTemp(ctx, rcx); @@ -245,10 +243,10 @@ void EmitShiftRightLogical64(EmitContext& ctx, const Operands& dest, const Opera void EmitShiftRightArithmetic32(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift) { bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]); - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + OperandHolder tmp = IsReg(dest[0], rcx) ? ctx.TempGPReg().cvt32() : dest[0]; MovGP(ctx, tmp, base[0]); MovGP(ctx, cl, shift[0]); - ctx.Code().sar(tmp, cl); + ctx.Code().sar(tmp.Op(), cl); MovGP(ctx, dest[0], tmp); if (rcx_saved) { EmitRestoreRegTemp(ctx, rcx); @@ -257,10 +255,10 @@ void EmitShiftRightArithmetic32(EmitContext& ctx, const Operands& dest, const Op void EmitShiftRightArithmetic64(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& shift) { bool rcx_saved = EmitSaveRegTemp(ctx, rcx, dest[0]); - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + OperandHolder tmp = IsReg(dest[0], rcx) ? ctx.TempGPReg() : dest[0]; MovGP(ctx, tmp, base[0]); MovGP(ctx, cl, shift[0]); - ctx.Code().sar(tmp, cl); + ctx.Code().sar(tmp.Op(), cl); MovGP(ctx, dest[0], tmp); if (rcx_saved) { EmitRestoreRegTemp(ctx, rcx); @@ -268,37 +266,37 @@ void EmitShiftRightArithmetic64(EmitContext& ctx, const Operands& dest, const Op } void EmitBitwiseAnd32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0]; MovGP(ctx, tmp, op1[0]); - ctx.Code().and_(tmp, op2[0]); + ctx.Code().and_(tmp.Op(), op2[0].Op()); MovGP(ctx, dest[0], tmp); } void EmitBitwiseAnd64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg() : dest[0]; MovGP(ctx, tmp, op1[0]); - ctx.Code().and_(tmp, op2[0]); + ctx.Code().and_(tmp.Op(), op2[0].Op()); MovGP(ctx, dest[0], tmp); } void EmitBitwiseOr32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0]; MovGP(ctx, tmp, op1[0]); - ctx.Code().or_(tmp, op2[0]); + ctx.Code().or_(tmp.Op(), op2[0].Op()); MovGP(ctx, dest[0], tmp); } void EmitBitwiseOr64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false) : dest[0].getReg(); + OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg() : dest[0]; MovGP(ctx, tmp, op1[0]); - ctx.Code().or_(tmp, op2[0]); + ctx.Code().or_(tmp.Op(), op2[0].Op()); MovGP(ctx, dest[0], tmp); } void EmitBitwiseXor32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0]; MovGP(ctx, tmp, op1[0]); - ctx.Code().xor_(tmp, op2[0]); + ctx.Code().xor_(tmp.Op(), op2[0].Op()); MovGP(ctx, dest[0], tmp); } @@ -327,10 +325,8 @@ void EmitBitCount64(EmitContext& ctx) { } void EmitBitwiseNot32(EmitContext& ctx, const Operands& dest, const Operands& op) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); - MovGP(ctx, tmp, op[0]); - ctx.Code().not_(tmp); - MovGP(ctx, dest[0], tmp); + MovGP(ctx, dest[0], op[0]); + ctx.Code().not_(dest[0].Op()); } void EmitFindSMsb32(EmitContext& ctx) { @@ -350,153 +346,153 @@ void EmitFindILsb64(EmitContext& ctx) { } void EmitSMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg(); MovGP(ctx, tmp, op1[0]); - ctx.Code().cmp(tmp, op2[0]); - ctx.Code().cmovg(tmp, op2[0]); + ctx.Code().cmp(tmp, op2[0].Op()); + ctx.Code().cmovg(tmp, op2[0].Op()); MovGP(ctx, dest[0], tmp); } void EmitUMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg(); MovGP(ctx, tmp, op1[0]); - ctx.Code().cmp(tmp, op2[0]); - ctx.Code().cmova(tmp, op2[0]); + ctx.Code().cmp(tmp, op2[0].Op()); + ctx.Code().cmova(tmp, op2[0].Op()); MovGP(ctx, dest[0], tmp); } void EmitSMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg(); MovGP(ctx, tmp, op1[0]); - ctx.Code().cmp(tmp, op2[0]); - ctx.Code().cmovl(tmp, op2[0]); + ctx.Code().cmp(tmp, op2[0].Op()); + ctx.Code().cmovl(tmp, op2[0].Op()); MovGP(ctx, dest[0], tmp); } void EmitUMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg(); MovGP(ctx, tmp, op1[0]); - ctx.Code().cmp(tmp, op2[0]); - ctx.Code().cmovb(tmp, op2[0]); + ctx.Code().cmp(tmp, op2[0].Op()); + ctx.Code().cmovb(tmp, op2[0].Op()); MovGP(ctx, dest[0], tmp); } void EmitSClamp32(EmitContext& ctx, const Operands& dest, const Operands& value, const Operands& min, const Operands& max) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg(); MovGP(ctx, tmp, value[0]); - ctx.Code().cmp(tmp, min[0]); - ctx.Code().cmovl(tmp, min[0]); - ctx.Code().cmp(tmp, max[0]); - ctx.Code().cmovg(tmp, max[0]); + ctx.Code().cmp(tmp, min[0].Op()); + ctx.Code().cmovl(tmp, min[0].Op()); + ctx.Code().cmp(tmp, max[0].Op()); + ctx.Code().cmovg(tmp, max[0].Op()); MovGP(ctx, dest[0], tmp); } void EmitUClamp32(EmitContext& ctx, const Operands& dest, const Operands& value, const Operands& min, const Operands& max) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg(false).cvt32() : dest[0].getReg().cvt32(); + Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg(); MovGP(ctx, tmp, value[0]); - ctx.Code().cmp(tmp, min[0]); - ctx.Code().cmovb(tmp, min[0]); - ctx.Code().cmp(tmp, max[0]); - ctx.Code().cmova(tmp, max[0]); + ctx.Code().cmp(tmp, min[0].Op()); + ctx.Code().cmovb(tmp, min[0].Op()); + ctx.Code().cmp(tmp, max[0].Op()); + ctx.Code().cmova(tmp, max[0].Op()); MovGP(ctx, dest[0], tmp); } void EmitSLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false).cvt32() : lhs[0].getReg().cvt32(); + OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg().cvt32() : lhs[0]; MovGP(ctx, tmp, lhs[0]); - ctx.Code().cmp(tmp, rhs[0]); - ctx.Code().setl(dest[0]); + ctx.Code().cmp(tmp.Op(), rhs[0].Op()); + ctx.Code().setl(dest[0].Op()); } void EmitSLessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false) : lhs[0].getReg(); + OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg() : lhs[0]; MovGP(ctx, tmp, lhs[0]); - ctx.Code().cmp(tmp, rhs[0]); - ctx.Code().setl(dest[0]); + ctx.Code().cmp(tmp.Op(), rhs[0].Op()); + ctx.Code().setl(dest[0].Op()); } void EmitULessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false).cvt32() : lhs[0].getReg().cvt32(); + OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg().cvt32() : lhs[0]; MovGP(ctx, tmp, lhs[0]); - ctx.Code().cmp(tmp, rhs[0]); - ctx.Code().setb(dest[0]); + ctx.Code().cmp(tmp.Op(), rhs[0].Op()); + ctx.Code().setb(dest[0].Op()); } void EmitULessThan64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false) : lhs[0].getReg(); + OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg() : lhs[0]; MovGP(ctx, tmp, lhs[0]); - ctx.Code().cmp(tmp, rhs[0]); - ctx.Code().setb(dest[0]); + ctx.Code().cmp(tmp.Op(), rhs[0].Op()); + ctx.Code().setb(dest[0].Op()); } void EmitIEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false).cvt32() : lhs[0].getReg().cvt32(); + OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg().cvt32() : lhs[0]; MovGP(ctx, tmp, lhs[0]); - ctx.Code().cmp(tmp, rhs[0]); - ctx.Code().sete(dest[0]); + ctx.Code().cmp(tmp.Op(), rhs[0].Op()); + ctx.Code().sete(dest[0].Op()); } void EmitIEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false) : lhs[0].getReg(); + OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg() : lhs[0]; MovGP(ctx, tmp, lhs[0]); - ctx.Code().cmp(tmp, rhs[0]); - ctx.Code().sete(dest[0]); + ctx.Code().cmp(tmp.Op(), rhs[0].Op()); + ctx.Code().sete(dest[0].Op()); } void EmitSLessThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false).cvt32() : lhs[0].getReg().cvt32(); + OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg().cvt32() : lhs[0]; MovGP(ctx, tmp, lhs[0]); - ctx.Code().cmp(tmp, rhs[0]); - ctx.Code().setle(dest[0]); + ctx.Code().cmp(tmp.Op(), rhs[0].Op()); + ctx.Code().setle(dest[0].Op()); } void EmitULessThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false).cvt32() : lhs[0].getReg().cvt32(); + OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg().cvt32() : lhs[0]; MovGP(ctx, tmp, lhs[0]); - ctx.Code().cmp(tmp, rhs[0]); - ctx.Code().setbe(dest[0]); + ctx.Code().cmp(tmp.Op(), rhs[0].Op()); + ctx.Code().setbe(dest[0].Op()); } void EmitSGreaterThan(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false).cvt32() : lhs[0].getReg().cvt32(); + OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg().cvt32() : lhs[0]; MovGP(ctx, tmp, lhs[0]); - ctx.Code().cmp(tmp, rhs[0]); - ctx.Code().setg(dest[0]); + ctx.Code().cmp(tmp.Op(), rhs[0].Op()); + ctx.Code().setg(dest[0].Op()); } void EmitUGreaterThan(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false).cvt32() : lhs[0].getReg().cvt32(); + OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg().cvt32() : lhs[0]; MovGP(ctx, tmp, lhs[0]); - ctx.Code().cmp(tmp, rhs[0]); - ctx.Code().seta(dest[0]); + ctx.Code().cmp(tmp.Op(), rhs[0].Op()); + ctx.Code().seta(dest[0].Op()); } void EmitINotEqual32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false).cvt32() : lhs[0].getReg().cvt32(); + OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg().cvt32() : lhs[0]; MovGP(ctx, tmp, lhs[0]); - ctx.Code().cmp(tmp, rhs[0]); - ctx.Code().setne(dest[0]); + ctx.Code().cmp(tmp.Op(), rhs[0].Op()); + ctx.Code().setne(dest[0].Op()); } void EmitINotEqual64(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false) : lhs[0].getReg(); + OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg() : lhs[0]; MovGP(ctx, tmp, lhs[0]); - ctx.Code().cmp(tmp, rhs[0]); - ctx.Code().setne(dest[0]); + ctx.Code().cmp(tmp.Op(), rhs[0].Op()); + ctx.Code().setne(dest[0].Op()); } void EmitSGreaterThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false).cvt32() : lhs[0].getReg().cvt32(); + OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg().cvt32() : lhs[0]; MovGP(ctx, tmp, lhs[0]); - ctx.Code().cmp(tmp, rhs[0]); - ctx.Code().setge(dest[0]); + ctx.Code().cmp(tmp.Op(), rhs[0].Op()); + ctx.Code().setge(dest[0].Op()); } void EmitUGreaterThanEqual(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs) { - Reg tmp = lhs[0].isMEM() && rhs[0].isMEM() ? ctx.TempGPReg(false).cvt32() : lhs[0].getReg().cvt32(); + OperandHolder tmp = lhs[0].IsMem() && rhs[0].IsMem() ? ctx.TempGPReg().cvt32() : lhs[0]; MovGP(ctx, tmp, lhs[0]); - ctx.Code().cmp(tmp, rhs[0]); - ctx.Code().setae(dest[0]); + ctx.Code().cmp(tmp.Op(), rhs[0].Op()); + ctx.Code().setae(dest[0].Op()); } } // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_logical.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_logical.cpp index 30ec2eeeb..d1d7cfb74 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_logical.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_logical.cpp @@ -10,31 +10,29 @@ using namespace Xbyak; using namespace Xbyak::util; void EmitLogicalOr(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg().cvt8() : dest[0].getReg().cvt8(); + Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt8() : dest[0].Reg().cvt8(); MovGP(ctx, tmp, op1[0]); - ctx.Code().or_(tmp, op2[0]); + ctx.Code().or_(tmp, op2[0].Op()); MovGP(ctx, dest[0], tmp); } void EmitLogicalAnd(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg().cvt8() : dest[0].getReg().cvt8(); + Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt8() : dest[0].Reg().cvt8(); MovGP(ctx, tmp, op1[0]); - ctx.Code().and_(tmp, op2[0]); + ctx.Code().and_(tmp, op2[0].Op()); MovGP(ctx, dest[0], tmp); } void EmitLogicalXor(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg().cvt8() : dest[0].getReg().cvt8(); + Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt8() : dest[0].Reg().cvt8(); MovGP(ctx, tmp, op1[0]); - ctx.Code().xor_(tmp, op2[0]); + ctx.Code().xor_(tmp, op2[0].Op()); MovGP(ctx, dest[0], tmp); } void EmitLogicalNot(EmitContext& ctx, const Operands& dest, const Operands& op) { - Reg tmp = dest[0].isMEM() ? ctx.TempGPReg().cvt8() : dest[0].getReg().cvt8(); - MovGP(ctx, tmp, op[0]); - ctx.Code().not_(tmp); - MovGP(ctx, dest[0], tmp); + MovGP(ctx, dest[0], op[0]); + ctx.Code().not_(dest[0].Op()); } } // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_select.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_select.cpp index 56ecaee03..bc86ffcad 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_select.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_select.cpp @@ -11,7 +11,7 @@ using namespace Xbyak::util; void EmitSelectU1(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) { Label false_label, end_label; - Reg tmp = cond[0].isMEM() ? ctx.TempGPReg().cvt8() : cond[0].getReg().cvt8(); + Reg tmp = cond[0].IsMem() ? ctx.TempGPReg().cvt8() : cond[0].Reg().cvt8(); MovGP(ctx, tmp, cond[0]); ctx.Code().test(tmp, tmp); ctx.Code().jz(false_label); @@ -44,7 +44,7 @@ void EmitSelectF16(EmitContext& ctx, const Operands& dest, const Operands& cond, void EmitSelectF32(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) { Label false_label, end_label; - Reg tmp = cond[0].isMEM() ? ctx.TempGPReg().cvt8() : cond[0].getReg().cvt8(); + Reg tmp = cond[0].IsMem() ? ctx.TempGPReg().cvt8() : cond[0].Reg().cvt8(); MovGP(ctx, tmp, cond[0]); ctx.Code().test(tmp, tmp); ctx.Code().jz(false_label); @@ -57,7 +57,7 @@ void EmitSelectF32(EmitContext& ctx, const Operands& dest, const Operands& cond, void EmitSelectF64(EmitContext& ctx, const Operands& dest, const Operands& cond, const Operands& true_value, const Operands& false_value) { Label false_label, end_label; - Reg tmp = cond[0].isMEM() ? ctx.TempGPReg().cvt8() : cond[0].getReg().cvt8(); + Reg tmp = cond[0].IsMem() ? ctx.TempGPReg().cvt8() : cond[0].Reg().cvt8(); MovGP(ctx, tmp, cond[0]); ctx.Code().test(tmp, tmp); ctx.Code().jz(false_label); diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_special.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_special.cpp index acae51f66..16a406a81 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_special.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_special.cpp @@ -30,7 +30,7 @@ void EmitDiscard(EmitContext& ctx) { } void EmitDiscardCond(EmitContext& ctx, const Operands& condition) { - Reg tmp = condition[0].isMEM() ? ctx.TempGPReg().cvt8() : condition[0].getReg().cvt8(); + Reg tmp = condition[0].IsMem() ? ctx.TempGPReg().cvt8() : condition[0].Reg().cvt8(); MovGP(ctx, tmp, condition[0]); ctx.Code().test(tmp, tmp); ctx.Code().jnz(ctx.EndLabel()); diff --git a/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp b/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp index 0b03b2e75..ee104fe89 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp +++ b/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp @@ -76,19 +76,19 @@ Operands EmitContext::Def(const IR::Value& value) { switch (value.Type()) { case IR::Type::U1: operands.push_back(TempGPReg().cvt8()); - code.mov(operands.back(), value.U1()); + code.mov(operands.back().Reg(), value.U1()); break; case IR::Type::U8: operands.push_back(TempGPReg().cvt8()); - code.mov(operands.back(), value.U8()); + code.mov(operands.back().Reg(), value.U8()); break; case IR::Type::U16: operands.push_back(TempGPReg().cvt16()); - code.mov(operands.back(), value.U16()); + code.mov(operands.back().Reg(), value.U16()); break; case IR::Type::U32: operands.push_back(TempGPReg().cvt32()); - code.mov(operands.back(), value.U32()); + code.mov(operands.back().Reg(), value.U32()); break; case IR::Type::F32: { code.mov(tmp.cvt32(), std::bit_cast(value.F32())); @@ -99,7 +99,7 @@ Operands EmitContext::Def(const IR::Value& value) { } case IR::Type::U64: operands.push_back(TempGPReg()); - code.mov(operands.back(), value.U64()); + code.mov(operands.back().Reg(), value.U64()); break; case IR::Type::F64: { code.mov(tmp, std::bit_cast(value.F64())); @@ -110,19 +110,19 @@ Operands EmitContext::Def(const IR::Value& value) { } case IR::Type::ScalarReg: operands.push_back(TempGPReg().cvt32()); - code.mov(operands.back(), std::bit_cast(value.ScalarReg())); + code.mov(operands.back().Reg(), std::bit_cast(value.ScalarReg())); break; case IR::Type::VectorReg: operands.push_back(TempXmmReg().cvt32()); - code.mov(operands.back(), std::bit_cast(value.VectorReg())); + code.mov(operands.back().Reg(), std::bit_cast(value.VectorReg())); break; case IR::Type::Attribute: operands.push_back(TempGPReg()); - code.mov(operands.back(), std::bit_cast(value.Attribute())); + code.mov(operands.back().Reg(), std::bit_cast(value.Attribute())); break; case IR::Type::Patch: operands.push_back(TempGPReg()); - code.mov(operands.back(), std::bit_cast(value.Patch())); + code.mov(operands.back().Reg(), std::bit_cast(value.Patch())); break; default: UNREACHABLE_MSG("Unsupported value type: {}", IR::NameOf(value.Type())); @@ -195,7 +195,7 @@ void EmitContext::SpillInst(RegAllocContext& ctx, const ActiveInstInterval& inte ctx.active_spill_intervals.push_back(interval); } else { Operands& operands = inst_to_operands[spill_candidate->inst]; - Reg reg = operands[spill_candidate->component].getReg(); + Reg reg = operands[spill_candidate->component].Reg(); inst_to_operands[interval.inst][interval.component] = reg.isXMM() ? reg : ResizeRegToType(reg, interval.inst); operands[spill_candidate->component] = get_operand(spill_candidate->inst); @@ -288,7 +288,7 @@ void EmitContext::AllocateRegisters() { // Free old interval resources for (auto it = ctx.active_gp_intervals.begin(); it != ctx.active_gp_intervals.end();) { if (it->end < interval.start) { - Reg64 reg = inst_to_operands[it->inst][it->component].getReg().cvt64(); + Reg64 reg = inst_to_operands[it->inst][it->component].Reg().cvt64(); ctx.free_gp_regs.push_back(reg); it = ctx.active_gp_intervals.erase(it); } else { @@ -297,7 +297,7 @@ void EmitContext::AllocateRegisters() { } for (auto it = ctx.active_xmm_intervals.begin(); it != ctx.active_xmm_intervals.end();) { if (it->end < interval.start) { - Xmm reg = inst_to_operands[it->inst][it->component].getReg().cvt128(); + Xmm reg = inst_to_operands[it->inst][it->component].Xmm(); ctx.free_xmm_regs.push_back(reg); it = ctx.active_xmm_intervals.erase(it); } else { @@ -307,7 +307,7 @@ void EmitContext::AllocateRegisters() { for (auto it = ctx.active_spill_intervals.begin(); it != ctx.active_spill_intervals.end();) { if (it->end < interval.start) { - const Address& addr = inst_to_operands[it->inst][it->component].getAddress(); + const Address& addr = inst_to_operands[it->inst][it->component].Mem(); ctx.free_stack_slots.push_back(addr.getDisp()); it = ctx.active_spill_intervals.erase(it); } else { diff --git a/src/shader_recompiler/backend/asm_x64/x64_emit_context.h b/src/shader_recompiler/backend/asm_x64/x64_emit_context.h index 994fc6f6a..ce9921233 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_emit_context.h +++ b/src/shader_recompiler/backend/asm_x64/x64_emit_context.h @@ -11,7 +11,78 @@ namespace Shader::Backend::X64 { -using Operands = boost::container::static_vector; +class OperandHolder { +public: + OperandHolder() : op() {} + OperandHolder(const OperandHolder&) = default; + OperandHolder(OperandHolder&&) = default; + OperandHolder& operator=(const OperandHolder&) = default; + OperandHolder& operator=(OperandHolder&&) = default; + + OperandHolder(const Xbyak::Reg& reg_) : reg(reg_) {} + OperandHolder(const Xbyak::Xmm& xmm_) : xmm(xmm_) {} + OperandHolder(const Xbyak::Address& mem_) : mem(mem_) {} + OperandHolder(const Xbyak::Operand& op_) : op(op_) {} + + [[nodiscard]] inline Xbyak::Operand& Op() { + return op; + } + + [[nodiscard]] inline const Xbyak::Operand& Op() const { + return op; + } + + [[nodiscard]] inline Xbyak::Reg& Reg() { + ASSERT(IsReg()); + return reg; + } + + [[nodiscard]] inline const Xbyak::Reg& Reg() const { + ASSERT(IsReg()); + return reg; + } + + [[nodiscard]] inline Xbyak::Xmm& Xmm() { + ASSERT(IsXmm()); + return xmm; + } + + [[nodiscard]] inline const Xbyak::Xmm& Xmm() const { + ASSERT(IsXmm()); + return xmm; + } + + [[nodiscard]] inline Xbyak::Address& Mem() { + ASSERT(IsMem()); + return mem; + } + + [[nodiscard]] inline const Xbyak::Address& Mem() const { + ASSERT(IsMem()); + return mem; + } + + [[nodiscard]] inline bool IsReg() const { + return op.isREG(); + } + + [[nodiscard]] inline bool IsXmm() const { + return op.isXMM(); + } + + [[nodiscard]] inline bool IsMem() const { + return op.isMEM(); + } +private: + union { + Xbyak::Operand op; + Xbyak::Reg reg; + Xbyak::Xmm xmm; + Xbyak::Address mem; + }; +}; + +using Operands = boost::container::static_vector; class EmitContext { public: diff --git a/src/shader_recompiler/backend/asm_x64/x64_utils.cpp b/src/shader_recompiler/backend/asm_x64/x64_utils.cpp index b93583696..aedd12547 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_utils.cpp +++ b/src/shader_recompiler/backend/asm_x64/x64_utils.cpp @@ -107,63 +107,73 @@ Reg ResizeRegToType(const Reg& reg, const IR::Value& value) { return reg; } -void MovFloat(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src) { +void MovFloat(EmitContext& ctx, const OperandHolder& dst, const OperandHolder& src) { CodeGenerator& c = ctx.Code(); - if (src == dst) { + if (src.Op() == dst.Op()) { return; } - if (src.isMEM() && dst.isMEM()) { + if (src.IsMem() && dst.IsMem()) { Reg tmp = ctx.TempGPReg(false).cvt32(); - c.mov(tmp, src); - c.mov(dst, tmp); - } else if (src.isMEM() && dst.isXMM()) { - c.movss(dst.getReg().cvt128(), src.getAddress()); - } else if (src.isXMM() && dst.isMEM()) { - c.movss(dst.getAddress(), src.getReg().cvt128()); - } else if (src.isXMM() && dst.isXMM()) { - c.movaps(dst.getReg().cvt128(), src.getReg().cvt128()); + c.mov(tmp, src.Mem()); + c.mov(dst.Mem(), tmp); + } else if (src.IsMem() && dst.IsXmm()) { + c.movss(dst.Xmm(), src.Mem()); + } else if (src.IsXmm() && dst.IsMem()) { + c.movss(dst.Mem(), src.Xmm()); + } else if (src.IsXmm() && dst.IsXmm()) { + c.movaps(dst.Xmm(), src.Xmm()); } else { - UNREACHABLE_MSG("Unsupported mov float {} {}", src.toString(), dst.toString()); + UNREACHABLE_MSG("Unsupported mov float {} {}", src.Op().toString(), dst.Op().toString()); } } -void MovDouble(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src) { +void MovDouble(EmitContext& ctx, const OperandHolder& dst, const OperandHolder& src) { CodeGenerator& c = ctx.Code(); - if (src == dst) { + if (src.Op() == dst.Op()) { return; } - if (src.isMEM() && dst.isMEM()) { + if (src.IsMem() && dst.IsMem()) { const Reg64& tmp = ctx.TempGPReg(false); - c.mov(tmp, src); - c.mov(dst, tmp); - } else if (src.isMEM() && dst.isXMM()) { - c.movsd(dst.getReg().cvt128(), src.getAddress()); - } else if (src.isXMM() && dst.isMEM()) { - c.movsd(dst.getAddress(), src.getReg().cvt128()); - } else if (src.isXMM() && dst.isXMM()) { - c.movapd(dst.getReg().cvt128(), src.getReg().cvt128()); + c.mov(tmp, src.Mem()); + c.mov(dst.Mem(), tmp); + } else if (src.IsMem() && dst.IsXmm()) { + c.movsd(dst.Xmm(), src.Mem()); + } else if (src.IsXmm() && dst.IsMem()) { + c.movsd(dst.Mem(), src.Xmm()); + } else if (src.IsXmm() && dst.IsXmm()) { + c.movapd(dst.Xmm(), src.Xmm()); } else { - UNREACHABLE_MSG("Unsupported mov double {} {}", src.toString(), dst.toString()); + UNREACHABLE_MSG("Unsupported mov double {} {}", src.Op().toString(), dst.Op().toString()); } } -void MovGP(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src) { +void MovGP(EmitContext& ctx, const OperandHolder& dst, const OperandHolder& src) { CodeGenerator& c = ctx.Code(); - if (src == dst) { + if (src.Op() == dst.Op()) { return; } - Reg tmp = dst.isMEM() ? ctx.TempGPReg(false).changeBit(dst.getBit()) : dst.getReg(); - if (src.getBit() < dst.getBit() && !src.isBit(32)) { - c.movzx(tmp, src); - } else if (src.getBit() > dst.getBit()) { - Operand src_tmp = src; - src_tmp.setBit(dst.getBit()); - c.mov(tmp, src_tmp); + const bool is_mem2mem = src.IsMem() && dst.IsMem(); + const u32 src_bit = src.Op().getBit(); + const u32 dst_bit = dst.Op().getBit(); + OperandHolder tmp = is_mem2mem ? ctx.TempGPReg(false).changeBit(dst_bit) : dst; + if (src_bit < dst_bit) { + if (!dst.IsMem() && !src.Op().isBit(32)) { + c.movzx(tmp.Reg(), src.Op()); + } else { + if (dst.IsMem()) { + c.mov(tmp.Op(), 0); + } + c.mov(tmp.Op(), src.Op()); + } + } else if (src_bit > dst_bit) { + OperandHolder src_tmp = src; + src_tmp.Op().setBit(dst_bit); + c.mov(tmp.Op(), src_tmp.Op()); } else { - c.mov(tmp, src); + c.mov(tmp.Op(), src.Op()); } - if (dst.isMEM()) { - c.mov(dst, tmp); + if (is_mem2mem) { + c.mov(dst.Op(), tmp.Op()); } } @@ -194,56 +204,56 @@ void MovValue(EmitContext& ctx, const Operands& dst, const IR::Value& src) { } } else { CodeGenerator& c = ctx.Code(); - const bool is_mem = dst[0].isMEM(); + const bool is_mem = dst[0].IsMem(); Reg64& tmp = ctx.TempGPReg(false); switch (src.Type()) { case IR::Type::U1: - c.mov(is_mem ? tmp.cvt8() : dst[0], src.U1()); + c.mov(is_mem ? tmp.cvt8() : dst[0].Reg(), src.U1()); break; case IR::Type::U8: - c.mov(is_mem ? tmp.cvt8() : dst[0], src.U8()); + c.mov(is_mem ? tmp.cvt8() : dst[0].Reg(), src.U8()); break; case IR::Type::U16: - c.mov(is_mem ? tmp.cvt16() : dst[0], src.U16()); + c.mov(is_mem ? tmp.cvt16() : dst[0].Reg(), src.U16()); break; case IR::Type::U32: - c.mov(is_mem ? tmp.cvt32() : dst[0], src.U32()); + c.mov(is_mem ? tmp.cvt32() : dst[0].Reg(), src.U32()); break; case IR::Type::F32: - c.mov(tmp.cvt32(), std::bit_cast(src.F32())); + c.mov(tmp.cvt32(), static_cast(src.F32())); if (!is_mem) { - c.movd(dst[0].getReg().cvt128(), tmp.cvt32()); + c.movd(dst[0].Xmm(), tmp.cvt32()); return; } break; case IR::Type::U64: - c.mov(is_mem ? tmp : dst[0], src.U64()); + c.mov(is_mem ? tmp : dst[0].Reg(), src.U64()); break; case IR::Type::F64: - c.mov(tmp, std::bit_cast(src.F64())); + c.mov(tmp, static_cast(src.F64())); if (!is_mem) { - c.movq(dst[0].getReg().cvt128(), tmp); + c.movq(dst[0].Xmm(), tmp); return; } break; case IR::Type::ScalarReg: - c.mov(is_mem ? tmp.cvt32() : dst[0], std::bit_cast(src.ScalarReg())); + c.mov(is_mem ? tmp.cvt32() : dst[0].Reg(), static_cast(src.ScalarReg())); break; case IR::Type::VectorReg: - c.mov(is_mem ? tmp.cvt32() : dst[0], std::bit_cast(src.VectorReg())); + c.mov(is_mem ? tmp.cvt32() : dst[0].Reg(), static_cast(src.VectorReg())); break; case IR::Type::Attribute: - c.mov(is_mem ? tmp : dst[0], std::bit_cast(src.Attribute())); + c.mov(is_mem ? tmp : dst[0].Reg(), std::bit_cast(src.Attribute())); break; case IR::Type::Patch: - c.mov(is_mem ? tmp : dst[0], std::bit_cast(src.Patch())); + c.mov(is_mem ? tmp : dst[0].Reg(), std::bit_cast(src.Patch())); break; default: UNREACHABLE_MSG("Unsupported type {}", IR::NameOf(src.Type())); break; } if (is_mem) { - c.mov(dst[0], tmp); + c.mov(dst[0].Mem(), tmp); } } } diff --git a/src/shader_recompiler/backend/asm_x64/x64_utils.h b/src/shader_recompiler/backend/asm_x64/x64_utils.h index 1c513234a..4782c0150 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_utils.h +++ b/src/shader_recompiler/backend/asm_x64/x64_utils.h @@ -14,9 +14,9 @@ bool IsFloatingType(const IR::Value& value); size_t GetRegBytesOfType(const IR::Value& value); u8 GetNumComponentsOfType(const IR::Value& value); Xbyak::Reg ResizeRegToType(const Xbyak::Reg& reg, const IR::Value& value); -void MovFloat(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src); -void MovDouble(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src); -void MovGP(EmitContext& ctx, const Xbyak::Operand& dst, const Xbyak::Operand& src); +void MovFloat(EmitContext& ctx, const OperandHolder& dst, const OperandHolder& src); +void MovDouble(EmitContext& ctx, const OperandHolder& dst, const OperandHolder& src); +void MovGP(EmitContext& ctx, const OperandHolder& dst, const OperandHolder& src); void MovValue(EmitContext& ctx, const Operands& dst, const IR::Value& src); void EmitInlineF16ToF32(EmitContext& ctx, const Xbyak::Operand& dest, const Xbyak::Operand& src); void EmitInlineF32ToF16(EmitContext& ctx, const Xbyak::Operand& dest, const Xbyak::Operand& src); From ab158fd4d763e1b51ccc32bde5ab19f4e7cb0a1d Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Mon, 7 Apr 2025 23:04:34 +0200 Subject: [PATCH 45/49] Don't reuse stack space for now --- .../backend/asm_x64/x64_emit_context.cpp | 23 +++---------------- .../backend/asm_x64/x64_emit_context.h | 2 -- 2 files changed, 3 insertions(+), 22 deletions(-) diff --git a/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp b/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp index ee104fe89..7d8449f53 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp +++ b/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp @@ -167,12 +167,7 @@ void EmitContext::SpillInst(RegAllocContext& ctx, const ActiveInstInterval& inte ActiveIntervalList& active_intervals) { const auto get_operand = [&](IR::Inst* inst) -> Operand { size_t current_sp = inst_stack_space; - if (ctx.free_stack_slots.empty()) { - inst_stack_space += 8; - } else { - current_sp += ctx.free_stack_slots.back(); - ctx.free_stack_slots.pop_back(); - } + inst_stack_space += 8; switch (GetRegBytesOfType(IR::Value(inst))) { case 1: return byte[r11 + current_sp]; @@ -192,14 +187,12 @@ void EmitContext::SpillInst(RegAllocContext& ctx, const ActiveInstInterval& inte [](const ActiveInstInterval& a, const ActiveInstInterval& b) { return a.end < b.end; }); if (spill_candidate == active_intervals.end() || spill_candidate->end <= interval.start) { inst_to_operands[interval.inst][interval.component] = get_operand(interval.inst); - ctx.active_spill_intervals.push_back(interval); } else { Operands& operands = inst_to_operands[spill_candidate->inst]; - Reg reg = operands[spill_candidate->component].Reg(); + OperandHolder op = operands[spill_candidate->component]; inst_to_operands[interval.inst][interval.component] = - reg.isXMM() ? reg : ResizeRegToType(reg, interval.inst); + op.IsXmm() ? op : ResizeRegToType(op.Reg(), interval.inst); operands[spill_candidate->component] = get_operand(spill_candidate->inst); - ctx.active_spill_intervals.push_back(*spill_candidate); *spill_candidate = interval; } } @@ -304,16 +297,6 @@ void EmitContext::AllocateRegisters() { ++it; } } - for (auto it = ctx.active_spill_intervals.begin(); - it != ctx.active_spill_intervals.end();) { - if (it->end < interval.start) { - const Address& addr = inst_to_operands[it->inst][it->component].Mem(); - ctx.free_stack_slots.push_back(addr.getDisp()); - it = ctx.active_spill_intervals.erase(it); - } else { - ++it; - } - } u8 num_components = GetNumComponentsOfType(interval.inst); bool is_floating = IsFloatingType(interval.inst); auto& operands = inst_to_operands[interval.inst]; diff --git a/src/shader_recompiler/backend/asm_x64/x64_emit_context.h b/src/shader_recompiler/backend/asm_x64/x64_emit_context.h index ce9921233..ad0e1da6d 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_emit_context.h +++ b/src/shader_recompiler/backend/asm_x64/x64_emit_context.h @@ -154,10 +154,8 @@ private: struct RegAllocContext { boost::container::static_vector free_gp_regs; boost::container::static_vector free_xmm_regs; - boost::container::small_vector free_stack_slots; ActiveIntervalList active_gp_intervals; ActiveIntervalList active_xmm_intervals; - ActiveIntervalList active_spill_intervals; }; using FlatInstList = boost::container::small_vector; From 8e938268f9ea9c0b19466771713f6aae4e639eb1 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Tue, 8 Apr 2025 00:15:08 +0200 Subject: [PATCH 46/49] Correctly initialize Address --- src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp b/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp index 7d8449f53..dfb7094c3 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp +++ b/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp @@ -165,7 +165,7 @@ void EmitContext::Epilogue() { void EmitContext::SpillInst(RegAllocContext& ctx, const ActiveInstInterval& interval, ActiveIntervalList& active_intervals) { - const auto get_operand = [&](IR::Inst* inst) -> Operand { + const auto get_operand = [&](IR::Inst* inst) -> Address { size_t current_sp = inst_stack_space; inst_stack_space += 8; switch (GetRegBytesOfType(IR::Value(inst))) { @@ -179,7 +179,7 @@ void EmitContext::SpillInst(RegAllocContext& ctx, const ActiveInstInterval& inte return qword[r11 + current_sp]; default: UNREACHABLE_MSG("Unsupported register size: {}", GetRegBytesOfType(inst)); - return {}; + return ptr[r11 + current_sp]; } }; auto spill_candidate = std::max_element( From 72efc1143371c2262086737e19aa9119c280d968 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Tue, 8 Apr 2025 00:47:31 +0200 Subject: [PATCH 47/49] Fix loop coverage --- .../backend/asm_x64/x64_emit_context.cpp | 45 ++++++++++++++----- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp b/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp index dfb7094c3..608faed70 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp +++ b/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp @@ -202,27 +202,50 @@ void EmitContext::AdjustInstInterval(InstInterval& interval, const FlatInstList& size_t dist = std::distance(insts.begin(), std::find(insts.begin(), insts.end(), inst)); interval.start = dist; interval.end = dist; + const auto enlarge_interval = [&](IR::Inst* inst) { + size_t position = std::distance(insts.begin(), std::find(insts.begin(), insts.end(), inst)); + interval.start = std::min(interval.start, position); + interval.end = std::max(interval.end, position); + }; for (const auto& use : inst->Uses()) { + IR::Inst* target_inst = use.user; if (use.user->GetOpcode() == IR::Opcode::Phi) { // We assign the value at the end of the phi block - IR::Inst& last_inst = use.user->PhiBlock(use.operand)->back(); - dist = std::distance(insts.begin(), std::find(insts.begin(), insts.end(), &last_inst)); - interval.start = std::min(interval.start, dist); - interval.end = std::max(interval.end, dist); - } else { - dist = std::distance(insts.begin(), std::find(insts.begin(), insts.end(), use.user)); - interval.end = std::max(interval.end, dist); + target_inst = &use.user->PhiBlock(use.operand)->back(); } + // If the user is in a loop and the instruction is not, we need to extend the interval + // to the end of the loop + u32 target_depth = inst->GetParent()->CondData().depth; + const auto* cond_data = &target_inst->GetParent()->CondData(); + const IR::AbstractSyntaxNode* target_loop = nullptr; + while (cond_data && cond_data->depth > target_depth) { + if (cond_data->asl_node->type == IR::AbstractSyntaxNode::Type::Loop) { + target_loop = cond_data->asl_node; + } + cond_data = cond_data->parent; + } + if (target_loop) { + IR::Block* cont_block = target_loop->data.loop.continue_block; + target_inst = &cont_block->back(); + ASSERT(target_inst->GetOpcode() == IR::Opcode::ConditionRef); + } + enlarge_interval(target_inst); } if (inst->GetOpcode() == IR::Opcode::Phi) { for (size_t i = 0; i < inst->NumArgs(); i++) { IR::Block* block = inst->PhiBlock(i); - dist = - std::distance(insts.begin(), std::find(insts.begin(), insts.end(), &block->back())); - interval.start = std::min(interval.start, dist); - interval.end = std::max(interval.end, dist); + enlarge_interval(&block->back()); phi_assignments[block].emplace_back(inst, inst->Arg(i)); } + // Extend to predecessors + // Phis in loop headers need to extend to the end of the loop + for (IR::Block* pred : inst->GetParent()->ImmPredecessors()) { + IR::Inst* last_inst = &pred->back(); + if (last_inst->GetOpcode() == IR::Opcode::ConditionRef) { + enlarge_interval(last_inst); + } + } + } } From 7ca985207ca426a3f1d33ffd2af7f35861828dbe Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Tue, 8 Apr 2025 13:14:30 +0200 Subject: [PATCH 48/49] Implement MinTri, MaxTri, MedTri --- .../asm_x64/emit_x64_floating_point.cpp | 28 +++ .../backend/asm_x64/emit_x64_instructions.h | 9 + .../backend/asm_x64/emit_x64_integer.cpp | 72 +++++++ .../ir/compute_value/do_float_operations.cpp | 21 ++ .../ir/compute_value/do_float_operations.h | 6 + .../compute_value/do_integer_operations.cpp | 36 ++++ .../ir/compute_value/do_integer_operations.h | 6 + .../ir/compute_value/imm_value.cpp | 196 ++++++++++++++++++ .../ir/compute_value/imm_value.h | 9 + 9 files changed, 383 insertions(+) diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp index 2a048dbcb..2630538fe 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_floating_point.cpp @@ -146,6 +146,34 @@ void EmitFPMin64(EmitContext& ctx, const Operands& dest, const Operands& op1, co MovDouble(ctx, dest[0], tmp); } +void EmitFPMinTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) { + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); + MovFloat(ctx, tmp, op1[0]); + ctx.Code().minss(tmp, op2[0].Op()); + ctx.Code().minss(tmp, op3[0].Op()); + MovFloat(ctx, dest[0], tmp); +} + +void EmitFPMaxTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) { + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); + MovFloat(ctx, tmp, op1[0]); + ctx.Code().maxss(tmp, op2[0].Op()); + ctx.Code().maxss(tmp, op3[0].Op()); + MovFloat(ctx, dest[0], tmp); +} + +void EmitFPMedTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) { + Xmm tmp = dest[0].IsMem() ? ctx.TempXmmReg() : dest[0].Xmm(); + Xmm tmp2 = ctx.TempXmmReg(); + MovFloat(ctx, tmp2, op1[0]); + ctx.Code().maxss(tmp2, op2[0].Op()); + ctx.Code().minss(tmp2, op3[0].Op()); + MovFloat(ctx, tmp, op1[0]); + ctx.Code().minss(tmp, op2[0].Op()); + ctx.Code().maxss(tmp, tmp2); + MovFloat(ctx, dest[0], tmp); +} + void EmitFPMul16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { Xmm tmp1 = ctx.TempXmmReg(); Xmm tmp2 = ctx.TempXmmReg(); diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h b/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h index c85da6890..d4a1c961c 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h @@ -237,6 +237,9 @@ void EmitFPMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, co void EmitFPMax64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); void EmitFPMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, bool is_legacy = false); void EmitFPMin64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitFPMinTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3); +void EmitFPMaxTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3); +void EmitFPMedTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3); void EmitFPMul16(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); void EmitFPMul32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); void EmitFPMul64(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); @@ -362,6 +365,12 @@ void EmitSMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, con void EmitUMin32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); void EmitSMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); void EmitUMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2); +void EmitSMinTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3); +void EmitUMinTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3); +void EmitSMaxTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3); +void EmitUMaxTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3); +void EmitSMedTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3); +void EmitUMedTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3); void EmitSClamp32(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max); void EmitUClamp32(EmitContext& ctx, const Operands& dest, const Operands& op, const Operands& min, const Operands& max); void EmitSLessThan32(EmitContext& ctx, const Operands& dest, const Operands& lhs, const Operands& rhs); diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_integer.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_integer.cpp index 6251a174a..5c9d1c4e9 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_integer.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_integer.cpp @@ -377,6 +377,78 @@ void EmitUMax32(EmitContext& ctx, const Operands& dest, const Operands& op1, con MovGP(ctx, dest[0], tmp); } +void EmitSMinTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) { + Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().cmp(tmp, op2[0].Op()); + ctx.Code().cmovg(tmp, op2[0].Op()); + ctx.Code().cmp(tmp, op3[0].Op()); + ctx.Code().cmovg(tmp, op3[0].Op()); + MovGP(ctx, dest[0], tmp); +} + +void EmitUMinTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) { + Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().cmp(tmp, op2[0].Op()); + ctx.Code().cmova(tmp, op2[0].Op()); + ctx.Code().cmp(tmp, op3[0].Op()); + ctx.Code().cmova(tmp, op3[0].Op()); + MovGP(ctx, dest[0], tmp); +} + +void EmitSMaxTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) { + Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().cmp(tmp, op2[0].Op()); + ctx.Code().cmovl(tmp, op2[0].Op()); + ctx.Code().cmp(tmp, op3[0].Op()); + ctx.Code().cmovl(tmp, op3[0].Op()); + MovGP(ctx, dest[0], tmp); +} + +void EmitUMaxTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) { + Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().cmp(tmp, op2[0].Op()); + ctx.Code().cmovb(tmp, op2[0].Op()); + ctx.Code().cmp(tmp, op3[0].Op()); + ctx.Code().cmovb(tmp, op3[0].Op()); + MovGP(ctx, dest[0], tmp); +} + +void EmitSMedTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) { + Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg(); + Reg tmp2 = ctx.TempGPReg().cvt32(); + MovGP(ctx, tmp2, op1[0]); + ctx.Code().cmp(tmp2, op2[0].Op()); + ctx.Code().cmovl(tmp2, op2[0].Op()); + ctx.Code().cmp(tmp2, op3[0].Op()); + ctx.Code().cmovg(tmp2, op3[0].Op()); + MovGP(ctx, tmp, op1[0]); + ctx.Code().cmp(tmp, op2[0].Op()); + ctx.Code().cmovg(tmp, op2[0].Op()); + ctx.Code().cmp(tmp, tmp); + ctx.Code().cmovl(tmp, tmp2); + MovGP(ctx, dest[0], tmp); +} + +void EmitUMedTri32(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2, const Operands& op3) { + Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg(); + Reg tmp2 = ctx.TempGPReg().cvt32(); + MovGP(ctx, tmp, op1[0]); + ctx.Code().cmp(tmp, op2[0].Op()); + ctx.Code().cmova(tmp, op2[0].Op()); + ctx.Code().cmp(tmp, op3[0].Op()); + ctx.Code().cmovb(tmp, op3[0].Op()); + MovGP(ctx, tmp2, op1[0]); + ctx.Code().cmp(tmp2, op2[0].Op()); + ctx.Code().cmovb(tmp2, op2[0].Op()); + ctx.Code().cmp(tmp2, tmp); + ctx.Code().cmova(tmp2, tmp); + MovGP(ctx, dest[0], tmp2); +} + void EmitSClamp32(EmitContext& ctx, const Operands& dest, const Operands& value, const Operands& min, const Operands& max) { Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt32() : dest[0].Reg(); MovGP(ctx, tmp, value[0]); diff --git a/src/shader_recompiler/ir/compute_value/do_float_operations.cpp b/src/shader_recompiler/ir/compute_value/do_float_operations.cpp index dd4175eac..1071d252d 100644 --- a/src/shader_recompiler/ir/compute_value/do_float_operations.cpp +++ b/src/shader_recompiler/ir/compute_value/do_float_operations.cpp @@ -85,6 +85,27 @@ void DoFPMin64(ImmValueList& inst_values, const ImmValueList& args0, const ImmVa std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } +void DoFPMaxTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, + const ImmValueList& args2) { + Common::CartesianInvoke(ImmValue::MaxTri, + std::insert_iterator(inst_values, inst_values.begin()), args0, args1, + args2); +} + +void DoFPMinTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, + const ImmValueList& args2) { + Common::CartesianInvoke(ImmValue::MinTri, + std::insert_iterator(inst_values, inst_values.begin()), args0, args1, + args2); +} + +void DoFPMedTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, + const ImmValueList& args2) { + Common::CartesianInvoke(ImmValue::MedTri, + std::insert_iterator(inst_values, inst_values.begin()), args0, args1, + args2); +} + void DoFPMul32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1) { Common::CartesianInvoke(ImmValue::Mul, std::insert_iterator(inst_values, inst_values.begin()), args0, args1); diff --git a/src/shader_recompiler/ir/compute_value/do_float_operations.h b/src/shader_recompiler/ir/compute_value/do_float_operations.h index ffc8040d1..170201f33 100644 --- a/src/shader_recompiler/ir/compute_value/do_float_operations.h +++ b/src/shader_recompiler/ir/compute_value/do_float_operations.h @@ -22,6 +22,12 @@ void DoFPMax64(ImmValueList& inst_values, const ImmValueList& args0, const ImmVa void DoFPMin32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, const ImmValueList& args_legacy); void DoFPMin64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); +void DoFPMinTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, + const ImmValueList& args2); +void DoFPMaxTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, + const ImmValueList& args2); +void DoFPMedTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, + const ImmValueList& args2); void DoFPMul32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); void DoFPMul64(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); void DoFPDiv32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); diff --git a/src/shader_recompiler/ir/compute_value/do_integer_operations.cpp b/src/shader_recompiler/ir/compute_value/do_integer_operations.cpp index dacdaae14..92ce82fa9 100644 --- a/src/shader_recompiler/ir/compute_value/do_integer_operations.cpp +++ b/src/shader_recompiler/ir/compute_value/do_integer_operations.cpp @@ -219,6 +219,42 @@ void DoUMax32(ImmValueList& inst_values, const ImmValueList& args0, const ImmVal std::insert_iterator(inst_values, inst_values.begin()), args0, args1); } +void DoSMinTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, + const ImmValueList& args2) { + Common::CartesianInvoke(ImmValue::MinTri, + std::insert_iterator(inst_values, inst_values.begin()), args0, args1, args2); +} + +void DoUMinTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, + const ImmValueList& args2) { + Common::CartesianInvoke(ImmValue::MinTri, + std::insert_iterator(inst_values, inst_values.begin()), args0, args1, args2); +} + +void DoSMaxTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, + const ImmValueList& args2) { + Common::CartesianInvoke(ImmValue::MaxTri, + std::insert_iterator(inst_values, inst_values.begin()), args0, args1, args2); +} + +void DoUMaxTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, + const ImmValueList& args2) { + Common::CartesianInvoke(ImmValue::MaxTri, + std::insert_iterator(inst_values, inst_values.begin()), args0, args1, args2); +} + +void DoSMedTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, + const ImmValueList& args2) { + Common::CartesianInvoke(ImmValue::MedTri, + std::insert_iterator(inst_values, inst_values.begin()), args0, args1, args2); +} + +void DoUMedTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, + const ImmValueList& args2) { + Common::CartesianInvoke(ImmValue::MedTri, + std::insert_iterator(inst_values, inst_values.begin()), args0, args1, args2); +} + void DoSClamp32(ImmValueList& inst_values, const ImmValueList& value, const ImmValueList& min, const ImmValueList& max) { Common::CartesianInvoke(ImmValue::Clamp, diff --git a/src/shader_recompiler/ir/compute_value/do_integer_operations.h b/src/shader_recompiler/ir/compute_value/do_integer_operations.h index e698f2b12..aa06ee36b 100644 --- a/src/shader_recompiler/ir/compute_value/do_integer_operations.h +++ b/src/shader_recompiler/ir/compute_value/do_integer_operations.h @@ -62,6 +62,12 @@ void DoSMin32(ImmValueList& inst_values, const ImmValueList& args0, const ImmVal void DoUMin32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); void DoSMax32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); void DoUMax32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1); +void DoSMinTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, const ImmValueList& args2); +void DoUMinTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, const ImmValueList& args2); +void DoSMaxTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, const ImmValueList& args2); +void DoUMaxTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, const ImmValueList& args2); +void DoSMedTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, const ImmValueList& args2); +void DoUMedTri32(ImmValueList& inst_values, const ImmValueList& args0, const ImmValueList& args1, const ImmValueList& args2); void DoSClamp32(ImmValueList& inst_values, const ImmValueList& value, const ImmValueList& min, const ImmValueList& max); void DoUClamp32(ImmValueList& inst_values, const ImmValueList& value, const ImmValueList& min, diff --git a/src/shader_recompiler/ir/compute_value/imm_value.cpp b/src/shader_recompiler/ir/compute_value/imm_value.cpp index 2000bdfba..86586a25a 100644 --- a/src/shader_recompiler/ir/compute_value/imm_value.cpp +++ b/src/shader_recompiler/ir/compute_value/imm_value.cpp @@ -1127,6 +1127,202 @@ ImmValue ImmValue::Max(const ImmValue& a, const ImmValue& b) no return ImmValue(std::max(a.imm_values[0].imm_f64, b.imm_values[0].imm_f64)); } +template <> +ImmValue ImmValue::MinTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + return ImmValue(std::min(std::min(a.imm_values[0].imm_u8, b.imm_values[0].imm_u8), + c.imm_values[0].imm_u8)); +} + +template <> +ImmValue ImmValue::MinTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + return ImmValue(std::min(std::min(a.imm_values[0].imm_s8, b.imm_values[0].imm_s8), + c.imm_values[0].imm_s8)); +} + +template <> +ImmValue ImmValue::MinTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + return ImmValue(std::min(std::min(a.imm_values[0].imm_u16, b.imm_values[0].imm_u16), + c.imm_values[0].imm_u16)); +} + +template <> +ImmValue ImmValue::MinTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + return ImmValue(std::min(std::min(a.imm_values[0].imm_s16, b.imm_values[0].imm_s16), + c.imm_values[0].imm_s16)); +} + +template <> +ImmValue ImmValue::MinTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + return ImmValue(std::min(std::min(a.imm_values[0].imm_u32, b.imm_values[0].imm_u32), + c.imm_values[0].imm_u32)); +} + +template <> +ImmValue ImmValue::MinTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + return ImmValue(std::min(std::min(a.imm_values[0].imm_s32, b.imm_values[0].imm_s32), + c.imm_values[0].imm_s32)); +} + +template <> +ImmValue ImmValue::MinTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + return ImmValue(std::min(std::min(a.imm_values[0].imm_u64, b.imm_values[0].imm_u64), + c.imm_values[0].imm_u64)); +} + +template <> +ImmValue ImmValue::MinTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + return ImmValue(std::min(std::min(a.imm_values[0].imm_u64, b.imm_values[0].imm_u64), + c.imm_values[0].imm_u64)); +} + +template <> +ImmValue ImmValue::MinTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + return ImmValue(std::min(std::min(a.imm_values[0].imm_f32, b.imm_values[0].imm_f32), + c.imm_values[0].imm_f32)); +} + +template <> +ImmValue ImmValue::MinTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + return ImmValue(std::min(std::min(a.imm_values[0].imm_f64, b.imm_values[0].imm_f64), + c.imm_values[0].imm_f64)); +} + +template <> +ImmValue ImmValue::MaxTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + return ImmValue(std::max(std::max(a.imm_values[0].imm_u8, b.imm_values[0].imm_u8), + c.imm_values[0].imm_u8)); +} + +template <> +ImmValue ImmValue::MaxTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + return ImmValue(std::max(std::max(a.imm_values[0].imm_s8, b.imm_values[0].imm_s8), + c.imm_values[0].imm_s8)); +} + +template <> +ImmValue ImmValue::MaxTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + return ImmValue(std::max(std::max(a.imm_values[0].imm_u16, b.imm_values[0].imm_u16), + c.imm_values[0].imm_u16)); +} + +template <> +ImmValue ImmValue::MaxTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + return ImmValue(std::max(std::max(a.imm_values[0].imm_s16, b.imm_values[0].imm_s16), + c.imm_values[0].imm_s16)); +} + +template <> +ImmValue ImmValue::MaxTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + return ImmValue(std::max(std::max(a.imm_values[0].imm_u32, b.imm_values[0].imm_u32), + c.imm_values[0].imm_u32)); +} + +template <> +ImmValue ImmValue::MaxTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + return ImmValue(std::max(std::max(a.imm_values[0].imm_s32, b.imm_values[0].imm_s32), + c.imm_values[0].imm_s32)); +} + +template <> +ImmValue ImmValue::MaxTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + return ImmValue(std::max(std::max(a.imm_values[0].imm_u64, b.imm_values[0].imm_u64), + c.imm_values[0].imm_u64)); +} + +template <> +ImmValue ImmValue::MaxTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + return ImmValue(std::max(std::max(a.imm_values[0].imm_s64, b.imm_values[0].imm_s64), + c.imm_values[0].imm_s64)); +} + +template <> +ImmValue ImmValue::MaxTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + return ImmValue(std::max(std::max(a.imm_values[0].imm_f32, b.imm_values[0].imm_f32), + c.imm_values[0].imm_f32)); +} + +template <> +ImmValue ImmValue::MaxTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + return ImmValue(std::max(std::max(a.imm_values[0].imm_f64, b.imm_values[0].imm_f64), + c.imm_values[0].imm_f64)); +} + +template <> +ImmValue ImmValue::MedTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + u8 mmx = std::min(std::max(a.imm_values[0].imm_u8, b.imm_values[0].imm_u8), + c.imm_values[0].imm_u8); + return ImmValue(std::max(std::min(a.imm_values[0].imm_u8, b.imm_values[0].imm_u8), + mmx)); +} + +template <> +ImmValue ImmValue::MedTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + s8 mmx = std::min(std::max(a.imm_values[0].imm_s8, b.imm_values[0].imm_s8), + c.imm_values[0].imm_s8); + return ImmValue(std::max(std::min(a.imm_values[0].imm_s8, b.imm_values[0].imm_s8), + mmx)); +} + +template <> +ImmValue ImmValue::MedTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + u16 mmx = std::min(std::max(a.imm_values[0].imm_u16, b.imm_values[0].imm_u16), + c.imm_values[0].imm_u16); + return ImmValue(std::max(std::min(a.imm_values[0].imm_u16, b.imm_values[0].imm_u16), + mmx)); +} + +template <> +ImmValue ImmValue::MedTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + s16 mmx = std::min(std::max(a.imm_values[0].imm_s16, b.imm_values[0].imm_s16), + c.imm_values[0].imm_s16); + return ImmValue(std::max(std::min(a.imm_values[0].imm_s16, b.imm_values[0].imm_s16), + mmx)); +} + +template <> +ImmValue ImmValue::MedTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + u32 mmx = std::min(std::max(a.imm_values[0].imm_u32, b.imm_values[0].imm_u32), + c.imm_values[0].imm_u32); + return ImmValue(std::max(std::min(a.imm_values[0].imm_u32, b.imm_values[0].imm_u32), + mmx)); +} + +template <> +ImmValue ImmValue::MedTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + s32 mmx = std::min(std::max(a.imm_values[0].imm_s32, b.imm_values[0].imm_s32), + c.imm_values[0].imm_s32); + return ImmValue(std::max(std::min(a.imm_values[0].imm_s32, b.imm_values[0].imm_s32), + mmx)); +} + +template <> +ImmValue ImmValue::MedTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + u64 mmx = std::min(std::max(a.imm_values[0].imm_u64, b.imm_values[0].imm_u64), + c.imm_values[0].imm_u64); + return ImmValue(std::max(std::min(a.imm_values[0].imm_u64, b.imm_values[0].imm_u64), + mmx)); +} + +template <> +ImmValue ImmValue::MedTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + s64 mmx = std::min(std::max(a.imm_values[0].imm_s64, b.imm_values[0].imm_s64), + c.imm_values[0].imm_s64); + return ImmValue(std::max(std::min(a.imm_values[0].imm_s64, b.imm_values[0].imm_s64), + mmx)); +} + +template <> +ImmValue ImmValue::MedTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + return ImmValue(std::max(std::max(a.imm_values[0].imm_f32, b.imm_values[0].imm_f32), + c.imm_values[0].imm_f32)); +} + +template <> +ImmValue ImmValue::MedTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept { + return ImmValue(std::max(std::max(a.imm_values[0].imm_f64, b.imm_values[0].imm_f64), + c.imm_values[0].imm_f64)); +} + template <> ImmValue ImmValue::Clamp(const ImmValue& in, const ImmValue& min, const ImmValue& max) noexcept { diff --git a/src/shader_recompiler/ir/compute_value/imm_value.h b/src/shader_recompiler/ir/compute_value/imm_value.h index 800ee4b16..513e36db0 100644 --- a/src/shader_recompiler/ir/compute_value/imm_value.h +++ b/src/shader_recompiler/ir/compute_value/imm_value.h @@ -166,6 +166,15 @@ public: template [[nodiscard]] static ImmValue Max(const ImmValue& a, const ImmValue& b) noexcept; + template + [[nodiscard]] static ImmValue MinTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept; + + template + [[nodiscard]] static ImmValue MaxTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept; + + template + [[nodiscard]] static ImmValue MedTri(const ImmValue& a, const ImmValue& b, const ImmValue& c) noexcept; + template [[nodiscard]] static ImmValue Clamp(const ImmValue& in, const ImmValue& min, const ImmValue& max) noexcept; From 6f6652a46ae81d013235a1a1d99ef218d0523ccc Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Thu, 10 Apr 2025 01:35:24 +0200 Subject: [PATCH 49/49] General fixes --- .../asm_x64/emit_x64_context_get_set.cpp | 52 +++++++++++++------ .../backend/asm_x64/emit_x64_instructions.h | 6 +-- .../backend/asm_x64/emit_x64_logical.cpp | 18 ++++--- .../backend/asm_x64/x64_emit_context.cpp | 6 ++- .../backend/asm_x64/x64_utils.cpp | 12 +++-- .../frontend/translate/scalar_memory.cpp | 2 +- src/shader_recompiler/info.h | 3 +- .../ir/passes/resource_tracking_pass.cpp | 3 +- src/shader_recompiler/ir/passes/srt.h | 2 +- 9 files changed, 67 insertions(+), 37 deletions(-) diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp index f097d68ae..8d40a973b 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp @@ -12,16 +12,15 @@ using namespace Xbyak::util; void EmitGetUserData(EmitContext& ctx, const Operands& dest, IR::ScalarReg reg) { const u32 offset = static_cast(reg) << 2; - Reg& tmp = ctx.TempGPReg(); + Reg tmp = ctx.TempGPReg(); ctx.Code().lea(tmp, ptr[ctx.UserData() + offset]); MovGP( ctx, dest[0], dword[tmp]); } void EmitSetUserData(EmitContext& ctx, const Operands& offset, const Operands& value) { - Reg& tmp = ctx.TempGPReg(); + Reg tmp = ctx.TempGPReg(); MovGP(ctx, tmp, offset[0]); - ctx.Code().shl(tmp, 2); - ctx.Code().lea(tmp, ptr[ctx.UserData() + tmp]); + ctx.Code().lea(tmp, ptr[ctx.UserData() + tmp * 4]); MovGP(ctx, dword[tmp], value[0]); } @@ -58,32 +57,53 @@ void EmitGetGotoVariable(EmitContext&) { } void EmitReadConst(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset) { - Reg& tmp = ctx.TempGPReg(); + Reg tmp = dest[0].IsMem() ? ctx.TempGPReg() : dest[0].Reg().changeBit(64); + Reg off_tmp = offset[0].IsMem() ? ctx.TempGPReg() : offset[0].Reg().changeBit(64); MovGP(ctx, tmp, base[1]); + MovGP(ctx, off_tmp, offset[0]); ctx.Code().shl(tmp, 32); ctx.Code().or_(tmp, base[0].Op()); - if (offset[0].IsMem()) { - ctx.Code().add(tmp, offset[0].Mem()); - } else { - ctx.Code().lea(tmp, ptr[tmp + offset[0].Reg().cvt64()]); - } + ctx.Code().lea(tmp, ptr[tmp + off_tmp * 4]); MovGP(ctx, dest[0], dword[tmp]); } -void EmitReadConstBuffer(EmitContext& ctx) { - throw NotImplementedException("ReadConstBuffer"); +void EmitReadConstBuffer(EmitContext& ctx, const Operands& dest, const Operands& handle, const Operands& offset) { + Reg tmp = dest[0].IsMem() ? ctx.TempGPReg() : dest[0].Reg().changeBit(64); + // Reconstruct base address + Reg off_tmp = ctx.TempGPReg(); + MovGP(ctx, tmp, handle[1]); + ctx.Code().and_(tmp, 0xFFF); + ctx.Code().shl(tmp, 32); + MovGP(ctx, off_tmp.cvt32(), handle[0]); + ctx.Code().and_(off_tmp.cvt32(), 0xFFFFFFFF); + ctx.Code().or_(tmp, off_tmp); + // TODO: we should correctly clamp the offset + MovGP(ctx, off_tmp, offset[0]); + ctx.Code().lea(tmp, ptr[tmp + off_tmp * 4]); + MovGP(ctx, dest[0], dword[tmp]); + } void EmitReadStepRate(EmitContext& ctx) { throw NotImplementedException("ReadStepRate"); } -void EmitGetAttribute(EmitContext& ctx) { - throw NotImplementedException("GetAttribute"); +void EmitGetAttribute(EmitContext& ctx, const Operands& dest) { + LOG_WARNING(Render_Recompiler, "GetAttribute stubbed, setting to 0.0"); + if (dest[0].IsMem()) { + ctx.Code().mov(dest[0].Mem(), 0); + } else { + ctx.Code().pxor(dest[0].Xmm(), dest[0].Xmm()); + } } -void EmitGetAttributeU32(EmitContext& ctx) { - throw NotImplementedException("GetAttributeU32"); +void EmitGetAttributeU32(EmitContext& ctx, const Operands& dest) { + LOG_WARNING(Render_Recompiler, "GetAttributeU32 stubbed, setting to 0"); + if (dest[0].IsMem()) { + ctx.Code().mov(dest[0].Mem(), 0); + } else { + ctx.Code().xor_(dest[0].Reg(), dest[0].Reg()); + } } void EmitSetAttribute(EmitContext& ctx) { diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h b/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h index d4a1c961c..4c109d1cf 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h @@ -61,7 +61,7 @@ void EmitSetGotoVariable(EmitContext& ctx); void EmitGetGotoVariable(EmitContext& ctx); void EmitSetScc(EmitContext& ctx); void EmitReadConst(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset); -void EmitReadConstBuffer(EmitContext& ctx); +void EmitReadConstBuffer(EmitContext& ctx, const Operands& dest, const Operands& handle, const Operands& offset); void EmitLoadBufferU8(EmitContext& ctx); void EmitLoadBufferU16(EmitContext& ctx); void EmitLoadBufferU32(EmitContext& ctx); @@ -95,8 +95,8 @@ void EmitBufferAtomicAnd32(EmitContext& ctx); void EmitBufferAtomicOr32(EmitContext& ctx); void EmitBufferAtomicXor32(EmitContext& ctx); void EmitBufferAtomicSwap32(EmitContext& ctx); -void EmitGetAttribute(EmitContext& ctx); -void EmitGetAttributeU32(EmitContext& ctx); +void EmitGetAttribute(EmitContext& ctx, const Operands& dest); +void EmitGetAttributeU32(EmitContext& ctx, const Operands& dest); void EmitSetAttribute(EmitContext& ctx); void EmitGetTessGenericAttribute(EmitContext& ctx); void EmitSetTcsGenericAttribute(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_logical.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_logical.cpp index d1d7cfb74..a7714e91b 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_logical.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_logical.cpp @@ -10,29 +10,33 @@ using namespace Xbyak; using namespace Xbyak::util; void EmitLogicalOr(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt8() : dest[0].Reg().cvt8(); + OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg().cvt8() : dest[0]; MovGP(ctx, tmp, op1[0]); - ctx.Code().or_(tmp, op2[0].Op()); + ctx.Code().or_(tmp.Op(), op2[0].Op()); + ctx.Code().and_(tmp.Op(), 1); MovGP(ctx, dest[0], tmp); } void EmitLogicalAnd(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt8() : dest[0].Reg().cvt8(); - MovGP(ctx, tmp, op1[0]); - ctx.Code().and_(tmp, op2[0].Op()); + OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg().cvt8() : dest[0]; + MovGP(ctx, tmp.Op(), op1[0]); + ctx.Code().and_(tmp.Op(), op2[0].Op()); + ctx.Code().and_(tmp.Op(), 1); MovGP(ctx, dest[0], tmp); } void EmitLogicalXor(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt8() : dest[0].Reg().cvt8(); + OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg().cvt8() : dest[0]; MovGP(ctx, tmp, op1[0]); - ctx.Code().xor_(tmp, op2[0].Op()); + ctx.Code().xor_(tmp.Op(), op2[0].Op()); + ctx.Code().and_(tmp.Op(), 1); MovGP(ctx, dest[0], tmp); } void EmitLogicalNot(EmitContext& ctx, const Operands& dest, const Operands& op) { MovGP(ctx, dest[0], op[0]); ctx.Code().not_(dest[0].Op()); + ctx.Code().and_(dest[0].Op(), 1); } } // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp b/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp index 608faed70..a37a697e4 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp +++ b/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp @@ -27,7 +27,8 @@ Reg64& EmitContext::TempGPReg(bool reserve) { if (idx > num_scratch_gp_regs && std::ranges::find(preserved_regs, reg) == preserved_regs.end()) { preserved_regs.push_back(reg); - code.push(reg); + code.sub(rsp, 8); + code.mov(ptr[rsp], reg); } return reg; } @@ -154,7 +155,8 @@ void EmitContext::Epilogue() { code.movups(reg.cvt128(), ptr[rsp]); code.add(rsp, 16); } else { - code.pop(reg); + code.mov(reg, ptr[rsp]); + code.add(rsp, 8); } } preserved_regs.clear(); diff --git a/src/shader_recompiler/backend/asm_x64/x64_utils.cpp b/src/shader_recompiler/backend/asm_x64/x64_utils.cpp index aedd12547..edbcb89c3 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_utils.cpp +++ b/src/shader_recompiler/backend/asm_x64/x64_utils.cpp @@ -157,13 +157,15 @@ void MovGP(EmitContext& ctx, const OperandHolder& dst, const OperandHolder& src) const u32 dst_bit = dst.Op().getBit(); OperandHolder tmp = is_mem2mem ? ctx.TempGPReg(false).changeBit(dst_bit) : dst; if (src_bit < dst_bit) { - if (!dst.IsMem() && !src.Op().isBit(32)) { + if (!tmp.IsMem() && !src.Op().isBit(32)) { c.movzx(tmp.Reg(), src.Op()); + } else if (tmp.IsMem()) { + Address addr = tmp.Mem(); + c.mov(addr, 0); + addr.setBit(dst_bit); + c.mov(addr, src.Reg()); } else { - if (dst.IsMem()) { - c.mov(tmp.Op(), 0); - } - c.mov(tmp.Op(), src.Op()); + c.mov(tmp.Reg().cvt32(), src.Op()); } } else if (src_bit > dst_bit) { OperandHolder src_tmp = src; diff --git a/src/shader_recompiler/frontend/translate/scalar_memory.cpp b/src/shader_recompiler/frontend/translate/scalar_memory.cpp index c2e91b328..47240df27 100644 --- a/src/shader_recompiler/frontend/translate/scalar_memory.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_memory.cpp @@ -46,7 +46,7 @@ void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) { if (smrd.offset == SQ_SRC_LITERAL) { return ir.Imm32(inst.src[1].code); } - return ir.GetScalarReg(IR::ScalarReg(smrd.offset)); + return ir.ShiftRightLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)), ir.Imm32(2)); }(); const IR::ScalarReg sbase{inst.src[0].code * 2}; const IR::Value base = diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 8dcf9c5c4..6d57b6252 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -255,8 +255,9 @@ struct Info { std::memcpy(flattened_ud_buf.data(), user_data.data(), user_data.size_bytes()); // Run the JIT program to walk the SRT and write the leaves to a flat buffer if (srt_info.walker_func) { - srt_info.walker_func(user_data.data(), flattened_ud_buf.data()); + srt_info.walker_func(flattened_ud_buf.data()); } + } void ReadTessConstantBuffer(TessellationDataConstantBuffer& tess_constants) const { diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index b9640fafc..e0910d60a 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -244,7 +244,8 @@ SharpLocation TrackSharp(const IR::Inst* inst, const Shader::Info& info) { } return std::nullopt; }; - // We are not accounting for modifications to after the source. + // Value may be modified between the ReadConst/GetUserData and inst. + // We don't take this into account. const auto result = IR::BreadthFirstSearch(inst, pred); ASSERT_MSG(result, "Unable to track sharp source"); inst = result.value(); diff --git a/src/shader_recompiler/ir/passes/srt.h b/src/shader_recompiler/ir/passes/srt.h index 0ddc15ea6..7d01a2895 100644 --- a/src/shader_recompiler/ir/passes/srt.h +++ b/src/shader_recompiler/ir/passes/srt.h @@ -9,7 +9,7 @@ namespace Shader { -using PFN_SrtWalker = void PS4_SYSV_ABI (*)(const u32* /*user_data*/, u32* /*flat_dst*/); +using PFN_SrtWalker = void PS4_SYSV_ABI (*)(u32* /*flat_dst*/); struct PersistentSrtInfo { // Special case when fetch shader uses step rates.