From 597630078802af6c67ca684a72b2d40593df8346 Mon Sep 17 00:00:00 2001 From: psucien Date: Sat, 23 Nov 2024 10:14:19 +0100 Subject: [PATCH 1/2] hot-fix: proper offset calculation for single offset lds instructions --- .../frontend/translate/data_share.cpp | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index adc2bbbc2..9685ee352 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -86,7 +86,8 @@ void Translator::V_WRITELANE_B32(const GcnInst& inst) { void Translator::DS_ADD_U32(const GcnInst& inst, bool rtn) { const IR::U32 addr{GetSrc(inst.src[0])}; const IR::U32 data{GetSrc(inst.src[1])}; - const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0)); + const IR::U32 offset = + ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)); const IR::U32 addr_offset = ir.IAdd(addr, offset); const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data); if (rtn) { @@ -97,7 +98,8 @@ void Translator::DS_ADD_U32(const GcnInst& inst, bool rtn) { void Translator::DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn) { const IR::U32 addr{GetSrc(inst.src[0])}; const IR::U32 data{GetSrc(inst.src[1])}; - const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0)); + const IR::U32 offset = + ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)); const IR::U32 addr_offset = ir.IAdd(addr, offset); const IR::Value original_val = ir.SharedAtomicIMin(addr_offset, data, is_signed); if (rtn) { @@ -108,7 +110,8 @@ void Translator::DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn) { void Translator::DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn) { const IR::U32 addr{GetSrc(inst.src[0])}; const IR::U32 data{GetSrc(inst.src[1])}; - const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0)); + const IR::U32 offset = + ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)); const IR::U32 addr_offset = ir.IAdd(addr, offset); const IR::Value original_val = ir.SharedAtomicIMax(addr_offset, data, is_signed); if (rtn) { @@ -140,12 +143,14 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool strid addr1); } } else if (bit_size == 64) { - const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0))); + const IR::U32 addr0 = ir.IAdd( + addr, ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0))); const IR::Value data = ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)); ir.WriteShared(bit_size, data, addr0); } else { - const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0))); + const IR::U32 addr0 = ir.IAdd( + addr, ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0))); ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0); } } @@ -187,26 +192,28 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data1, 1)}); } } else if (bit_size == 64) { - const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0))); + const IR::U32 addr0 = ir.IAdd( + addr, ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0))); const IR::Value data = ir.LoadShared(bit_size, is_signed, addr0); ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(data, 0)}); ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(data, 1)}); } else { - const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0))); + const IR::U32 addr0 = ir.IAdd( + addr, ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0))); const IR::U32 data = IR::U32{ir.LoadShared(bit_size, is_signed, addr0)}; ir.SetVectorReg(dst_reg, data); } } void Translator::DS_APPEND(const GcnInst& inst) { - const u32 inst_offset = inst.control.ds.offset0; + const u32 inst_offset = (u32(inst.control.ds.offset1) << 8u) + inst.control.ds.offset0; const IR::U32 gds_offset = ir.IAdd(ir.GetM0(), ir.Imm32(inst_offset)); const IR::U32 prev = ir.DataAppend(gds_offset); SetDst(inst.dst[0], prev); } void Translator::DS_CONSUME(const GcnInst& inst) { - const u32 inst_offset = inst.control.ds.offset0; + const u32 inst_offset = (u32(inst.control.ds.offset1) << 8u) + inst.control.ds.offset0; const IR::U32 gds_offset = ir.IAdd(ir.GetM0(), ir.Imm32(inst_offset)); const IR::U32 prev = ir.DataConsume(gds_offset); SetDst(inst.dst[0], prev); From d7d28aa8da6b70d5f6641e52f1b087448ca27d4b Mon Sep 17 00:00:00 2001 From: psucien <168137814+psucien@users.noreply.github.com> Date: Sat, 23 Nov 2024 11:46:31 +0100 Subject: [PATCH 2/2] video_core: restored presenter aspect calculations (#1583) * video_core: restored presenter aspect calculations * code simplification --- .../renderer_vulkan/vk_presenter.cpp | 43 +++++++++++-------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_presenter.cpp b/src/video_core/renderer_vulkan/vk_presenter.cpp index d4972cbad..23d2981f5 100644 --- a/src/video_core/renderer_vulkan/vk_presenter.cpp +++ b/src/video_core/renderer_vulkan/vk_presenter.cpp @@ -75,13 +75,13 @@ bool CanBlitToSwapchain(const vk::PhysicalDevice physical_device, vk::Format for return MakeImageBlit(frame_width, frame_height, swapchain_width, swapchain_height, 0, 0); } -[[nodiscard]] vk::ImageBlit MakeImageBlitFit(s32 frame_width, s32 frame_height, s32 swapchain_width, - s32 swapchain_height) { +static vk::Rect2D FitImage(s32 frame_width, s32 frame_height, s32 swapchain_width, + s32 swapchain_height) { float frame_aspect = static_cast(frame_width) / frame_height; float swapchain_aspect = static_cast(swapchain_width) / swapchain_height; - s32 dst_width = swapchain_width; - s32 dst_height = swapchain_height; + u32 dst_width = swapchain_width; + u32 dst_height = swapchain_height; if (frame_aspect > swapchain_aspect) { dst_height = static_cast(swapchain_width / frame_aspect); @@ -89,10 +89,18 @@ bool CanBlitToSwapchain(const vk::PhysicalDevice physical_device, vk::Format for dst_width = static_cast(swapchain_height * frame_aspect); } - s32 offset_x = (swapchain_width - dst_width) / 2; - s32 offset_y = (swapchain_height - dst_height) / 2; + const s32 offset_x = (swapchain_width - dst_width) / 2; + const s32 offset_y = (swapchain_height - dst_height) / 2; - return MakeImageBlit(frame_width, frame_height, dst_width, dst_height, offset_x, offset_y); + return vk::Rect2D{{offset_x, offset_y}, {dst_width, dst_height}}; +} + +[[nodiscard]] vk::ImageBlit MakeImageBlitFit(s32 frame_width, s32 frame_height, s32 swapchain_width, + s32 swapchain_height) { + const auto& dst_rect = FitImage(frame_width, frame_height, swapchain_width, swapchain_height); + + return MakeImageBlit(frame_width, frame_height, dst_rect.extent.width, dst_rect.extent.height, + dst_rect.offset.x, dst_rect.offset.y); } static vk::Format FormatToUnorm(vk::Format fmt) { @@ -552,25 +560,22 @@ Frame* Presenter::PrepareFrameInternal(VideoCore::ImageId image_id, bool is_eop) cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, *pp_pipeline); + const auto& dst_rect = + FitImage(image.info.size.width, image.info.size.height, frame->width, frame->height); + const std::array viewports = { vk::Viewport{ - .x = 0.0f, - .y = 0.0f, - .width = 1.0f * frame->width, - .height = 1.0f * frame->height, + .x = 1.0f * dst_rect.offset.x, + .y = 1.0f * dst_rect.offset.y, + .width = 1.0f * dst_rect.extent.width, + .height = 1.0f * dst_rect.extent.height, .minDepth = 0.0f, .maxDepth = 1.0f, }, }; - const std::array scissors = { - vk::Rect2D{ - .offset = {0, 0}, - .extent = {frame->width, frame->height}, - }, - }; cmdbuf.setViewport(0, viewports); - cmdbuf.setScissor(0, scissors); + cmdbuf.setScissor(0, {dst_rect}); cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *pp_pipeline_layout, 0, set_writes); @@ -580,7 +585,7 @@ Frame* Presenter::PrepareFrameInternal(VideoCore::ImageId image_id, bool is_eop) const std::array attachments = {vk::RenderingAttachmentInfo{ .imageView = frame->image_view, .imageLayout = vk::ImageLayout::eColorAttachmentOptimal, - .loadOp = vk::AttachmentLoadOp::eDontCare, + .loadOp = vk::AttachmentLoadOp::eClear, .storeOp = vk::AttachmentStoreOp::eStore, }};