shadPS4/src/shader_recompiler/frontend/translate/scalar_memory.cpp
TheTurtle a7c9bfa5c5
shader_recompiler: Small instruction parsing refactor/bugfixes (#340)
* translator: Implemtn f32 to f16 convert

* shader_recompiler: Add bit instructions

* shader_recompiler: More data share instructions

* shader_recompiler: Remove exec contexts, fix S_MOV_B64

* shader_recompiler: Split instruction parsing into categories

* shader_recompiler: Better BFS search

* shader_recompiler: Constant propagation pass for cmp_class_f32

* shader_recompiler: Partial readfirstlane implementation

* shader_recompiler: Stub readlane/writelane only for non-compute

* hack: Fix swizzle on RDR

* Will properly fix this when merging this

* clang format

* address_space: Bump user area size to full

* shader_recompiler: V_INTERP_MOV_F32

* Should work the same as spirv will emit flat decoration on demand

* kernel: Add MAP_OP_MAP_FLEXIBLE

* image_view: Attempt to apply storage swizzle on format

* vk_scheduler: Barrier attachments on renderpass end

* clang format

* liverpool: cs state backup

* shader_recompiler: More instructions and formats

* vector_alu: Proper V_MBCNT_U32_B32

* shader_recompiler: Port some dark souls things

* file_system: Implement sceKernelRename

* more formats

* clang format

* resource_tracking_pass: Back to assert

* translate: Tracedata

* kernel: Remove tracy lock

* Solves random crashes in Dark Souls

* code: Review comments
2024-07-30 23:32:40 +02:00

76 lines
2.6 KiB
C++

// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/frontend/translate/translate.h"
namespace Shader::Gcn {
static constexpr u32 SQ_SRC_LITERAL = 0xFF;
void Translator::EmitScalarMemory(const GcnInst& inst) {
switch (inst.opcode) {
case Opcode::S_LOAD_DWORDX4:
return S_LOAD_DWORD(4, inst);
case Opcode::S_LOAD_DWORDX8:
return S_LOAD_DWORD(8, inst);
case Opcode::S_LOAD_DWORDX16:
return S_LOAD_DWORD(16, inst);
case Opcode::S_BUFFER_LOAD_DWORD:
return S_BUFFER_LOAD_DWORD(1, inst);
case Opcode::S_BUFFER_LOAD_DWORDX2:
return S_BUFFER_LOAD_DWORD(2, inst);
case Opcode::S_BUFFER_LOAD_DWORDX4:
return S_BUFFER_LOAD_DWORD(4, inst);
case Opcode::S_BUFFER_LOAD_DWORDX8:
return S_BUFFER_LOAD_DWORD(8, inst);
case Opcode::S_BUFFER_LOAD_DWORDX16:
return S_BUFFER_LOAD_DWORD(16, inst);
default:
LogMissingOpcode(inst);
}
}
void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
const auto& smrd = inst.control.smrd;
const u32 dword_offset = [&] -> u32 {
if (smrd.imm) {
return smrd.offset;
}
if (smrd.offset == SQ_SRC_LITERAL) {
return inst.src[1].code;
}
UNREACHABLE();
}();
const IR::ScalarReg sbase{inst.src[0].code * 2};
const IR::Value base =
ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1));
IR::ScalarReg dst_reg{inst.dst[0].code};
for (u32 i = 0; i < num_dwords; i++) {
ir.SetScalarReg(dst_reg++, ir.ReadConst(base, ir.Imm32(dword_offset + i)));
}
}
void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
const auto& smrd = inst.control.smrd;
const IR::ScalarReg sbase{inst.src[0].code * 2};
const IR::U32 dword_offset = [&] -> IR::U32 {
if (smrd.imm) {
return ir.Imm32(smrd.offset);
}
if (smrd.offset == SQ_SRC_LITERAL) {
return ir.Imm32(inst.src[1].code);
}
return ir.ShiftRightLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)), ir.Imm32(2));
}();
const IR::Value vsharp =
ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1),
ir.GetScalarReg(sbase + 2), ir.GetScalarReg(sbase + 3));
IR::ScalarReg dst_reg{inst.dst[0].code};
for (u32 i = 0; i < num_dwords; i++) {
const IR::U32 index = ir.IAdd(dword_offset, ir.Imm32(i));
ir.SetScalarReg(dst_reg++, ir.ReadConstBuffer(vsharp, index));
}
}
} // namespace Shader::Gcn