mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-22 18:15:14 +00:00
Merge branch 'main' into microphone
This commit is contained in:
commit
9bece01cc7
@ -1,14 +1,28 @@
|
|||||||
# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
file(GLOB QT_KITS LIST_DIRECTORIES true "C:/Qt/*/msvc*_64")
|
set(highest_version "0")
|
||||||
list(SORT QT_KITS COMPARE NATURAL)
|
set(CANDIDATE_DRIVES A B C D E F G H I J K L M N O P Q R S T U V W X Y Z)
|
||||||
list(REVERSE QT_KITS)
|
|
||||||
if(QT_KITS)
|
|
||||||
list(GET QT_KITS 0 QT_PREFIX)
|
|
||||||
set(CMAKE_PREFIX_PATH "${QT_PREFIX}" CACHE PATH "Qt prefix auto‑detected" FORCE)
|
|
||||||
message(STATUS "Auto-detected Qt prefix: ${QT_PREFIX}")
|
|
||||||
else()
|
|
||||||
message(STATUS "findQt.cmake: no Qt‑Directory found in C:/Qt – please set CMAKE_PREFIX_PATH manually")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
|
foreach(drive ${CANDIDATE_DRIVES})
|
||||||
|
file(GLOB kits LIST_DIRECTORIES true CONFIGURE_DEPENDS "${drive}:/Qt/*/msvc*_64")
|
||||||
|
foreach(kit IN LISTS kits)
|
||||||
|
get_filename_component(version_dir "${kit}" DIRECTORY)
|
||||||
|
get_filename_component(kit_version "${version_dir}" NAME)
|
||||||
|
|
||||||
|
message(STATUS "DetectQtInstallation.cmake: Detected Qt: ${kit}")
|
||||||
|
|
||||||
|
if (kit_version VERSION_GREATER highest_version)
|
||||||
|
set(highest_version "${kit_version}")
|
||||||
|
set(QT_PREFIX "${kit}")
|
||||||
|
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
if(QT_PREFIX)
|
||||||
|
set(CMAKE_PREFIX_PATH "${QT_PREFIX}" CACHE PATH "Qt prefix auto‑detected" FORCE)
|
||||||
|
message(STATUS "DetectQtInstallation.cmake: Choose newest Qt: ${QT_PREFIX}")
|
||||||
|
else()
|
||||||
|
message(STATUS "DetectQtInstallation.cmake: No Qt‑Directory found in <drive>:/Qt – please set CMAKE_PREFIX_PATH manually")
|
||||||
|
endif()
|
||||||
|
@ -163,7 +163,9 @@ static void GenerateEXTRQ(void* /* address */, const ZydisDecodedOperand* operan
|
|||||||
mask = (1ULL << length) - 1;
|
mask = (1ULL << length) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
ASSERT_MSG(length + index <= 64, "length + index must be less than or equal to 64.");
|
if (length + index > 64) {
|
||||||
|
mask = 0xFFFF'FFFF'FFFF'FFFF;
|
||||||
|
}
|
||||||
|
|
||||||
// Get lower qword from xmm register
|
// Get lower qword from xmm register
|
||||||
c.vmovq(scratch1, xmm_dst);
|
c.vmovq(scratch1, xmm_dst);
|
||||||
@ -177,8 +179,8 @@ static void GenerateEXTRQ(void* /* address */, const ZydisDecodedOperand* operan
|
|||||||
c.mov(scratch2, mask);
|
c.mov(scratch2, mask);
|
||||||
c.and_(scratch1, scratch2);
|
c.and_(scratch1, scratch2);
|
||||||
|
|
||||||
// Writeback to xmm register, extrq instruction says top 64-bits are undefined so we don't
|
// Writeback to xmm register, extrq instruction says top 64-bits are undefined but zeroed on
|
||||||
// care to preserve them
|
// AMD CPUs
|
||||||
c.vmovq(xmm_dst, scratch1);
|
c.vmovq(xmm_dst, scratch1);
|
||||||
|
|
||||||
c.pop(scratch2);
|
c.pop(scratch2);
|
||||||
@ -287,7 +289,9 @@ static void GenerateINSERTQ(void* /* address */, const ZydisDecodedOperand* oper
|
|||||||
mask_value = (1ULL << length) - 1;
|
mask_value = (1ULL << length) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
ASSERT_MSG(length + index <= 64, "length + index must be less than or equal to 64.");
|
if (length + index > 64) {
|
||||||
|
mask_value = 0xFFFF'FFFF'FFFF'FFFF;
|
||||||
|
}
|
||||||
|
|
||||||
c.vmovq(scratch1, xmm_src);
|
c.vmovq(scratch1, xmm_src);
|
||||||
c.vmovq(scratch2, xmm_dst);
|
c.vmovq(scratch2, xmm_dst);
|
||||||
@ -307,8 +311,9 @@ static void GenerateINSERTQ(void* /* address */, const ZydisDecodedOperand* oper
|
|||||||
// dst |= src
|
// dst |= src
|
||||||
c.or_(scratch2, scratch1);
|
c.or_(scratch2, scratch1);
|
||||||
|
|
||||||
// Insert scratch2 into low 64 bits of dst, upper 64 bits are unaffected
|
// Insert scratch2 into low 64 bits of dst, upper 64 bits are undefined but zeroed on AMD
|
||||||
c.vpinsrq(xmm_dst, xmm_dst, scratch2, 0);
|
// CPUs
|
||||||
|
c.vmovq(xmm_dst, scratch2);
|
||||||
|
|
||||||
c.pop(mask);
|
c.pop(mask);
|
||||||
c.pop(scratch2);
|
c.pop(scratch2);
|
||||||
@ -374,7 +379,7 @@ static void GenerateINSERTQ(void* /* address */, const ZydisDecodedOperand* oper
|
|||||||
c.and_(scratch2, mask);
|
c.and_(scratch2, mask);
|
||||||
c.or_(scratch2, scratch1);
|
c.or_(scratch2, scratch1);
|
||||||
|
|
||||||
// Upper 64 bits are undefined in insertq
|
// Upper 64 bits are undefined in insertq but AMD CPUs zero them
|
||||||
c.vmovq(xmm_dst, scratch2);
|
c.vmovq(xmm_dst, scratch2);
|
||||||
|
|
||||||
c.pop(mask);
|
c.pop(mask);
|
||||||
@ -635,6 +640,7 @@ static bool TryExecuteIllegalInstruction(void* ctx, void* code_address) {
|
|||||||
lowQWordDst >>= index;
|
lowQWordDst >>= index;
|
||||||
lowQWordDst &= mask;
|
lowQWordDst &= mask;
|
||||||
|
|
||||||
|
memset((u8*)dst + sizeof(u64), 0, sizeof(u64));
|
||||||
memcpy(dst, &lowQWordDst, sizeof(lowQWordDst));
|
memcpy(dst, &lowQWordDst, sizeof(lowQWordDst));
|
||||||
|
|
||||||
Common::IncrementRip(ctx, 4);
|
Common::IncrementRip(ctx, 4);
|
||||||
@ -675,6 +681,7 @@ static bool TryExecuteIllegalInstruction(void* ctx, void* code_address) {
|
|||||||
lowQWordDst &= ~(mask << index);
|
lowQWordDst &= ~(mask << index);
|
||||||
lowQWordDst |= lowQWordSrc << index;
|
lowQWordDst |= lowQWordSrc << index;
|
||||||
|
|
||||||
|
memset((u8*)dst + sizeof(u64), 0, sizeof(u64));
|
||||||
memcpy(dst, &lowQWordDst, sizeof(lowQWordDst));
|
memcpy(dst, &lowQWordDst, sizeof(lowQWordDst));
|
||||||
|
|
||||||
Common::IncrementRip(ctx, 4);
|
Common::IncrementRip(ctx, 4);
|
||||||
|
@ -379,7 +379,7 @@ int ImeDialogUi::InputTextCallback(ImGuiInputTextCallbackData* data) {
|
|||||||
// the current language?)
|
// the current language?)
|
||||||
.user_id = ui->state->user_id,
|
.user_id = ui->state->user_id,
|
||||||
.resource_id = 0,
|
.resource_id = 0,
|
||||||
.timestamp = 0,
|
.timestamp = {0},
|
||||||
};
|
};
|
||||||
|
|
||||||
if (!ui->state->ConvertUTF8ToOrbis(event_char, 4, &src_keycode.character, 1)) {
|
if (!ui->state->ConvertUTF8ToOrbis(event_char, 4, &src_keycode.character, 1)) {
|
||||||
|
@ -133,6 +133,7 @@ void Emulator::Run(std::filesystem::path file, const std::vector<std::string> ar
|
|||||||
LOG_INFO(Config, "General isNeo: {}", Config::isNeoModeConsole());
|
LOG_INFO(Config, "General isNeo: {}", Config::isNeoModeConsole());
|
||||||
LOG_INFO(Config, "GPU isNullGpu: {}", Config::nullGpu());
|
LOG_INFO(Config, "GPU isNullGpu: {}", Config::nullGpu());
|
||||||
LOG_INFO(Config, "GPU readbacks: {}", Config::readbacks());
|
LOG_INFO(Config, "GPU readbacks: {}", Config::readbacks());
|
||||||
|
LOG_INFO(Config, "GPU readbackLinearImages: {}", Config::readbackLinearImages());
|
||||||
LOG_INFO(Config, "GPU directMemoryAccess: {}", Config::directMemoryAccess());
|
LOG_INFO(Config, "GPU directMemoryAccess: {}", Config::directMemoryAccess());
|
||||||
LOG_INFO(Config, "GPU shouldDumpShaders: {}", Config::dumpShaders());
|
LOG_INFO(Config, "GPU shouldDumpShaders: {}", Config::dumpShaders());
|
||||||
LOG_INFO(Config, "GPU vblankDivider: {}", Config::vblankDiv());
|
LOG_INFO(Config, "GPU vblankDivider: {}", Config::vblankDiv());
|
||||||
|
@ -54,17 +54,23 @@ Id SharedAtomicU64(EmitContext& ctx, Id offset, Id value,
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id SharedAtomicU64IncDec(EmitContext& ctx, Id offset,
|
||||||
|
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id)) {
|
||||||
|
const Id shift_id{ctx.ConstU32(3U)};
|
||||||
|
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
|
||||||
|
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 8u)};
|
||||||
|
const Id pointer{ctx.EmitSharedMemoryAccess(ctx.shared_u64, ctx.shared_memory_u64, index)};
|
||||||
|
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||||
|
return AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
||||||
|
return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
template <bool is_float = false>
|
template <bool is_float = false>
|
||||||
Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
|
Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
|
||||||
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
|
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
|
||||||
const auto& buffer = ctx.buffers[handle];
|
const auto& buffer = ctx.buffers[handle];
|
||||||
const auto type = [&] {
|
const Id type = is_float ? ctx.F32[1] : ctx.U32[1];
|
||||||
if constexpr (is_float) {
|
|
||||||
return ctx.F32[1];
|
|
||||||
} else {
|
|
||||||
return ctx.U32[1];
|
|
||||||
}
|
|
||||||
}();
|
|
||||||
if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) {
|
if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) {
|
||||||
address = ctx.OpIAdd(ctx.U32[1], address, offset);
|
address = ctx.OpIAdd(ctx.U32[1], address, offset);
|
||||||
}
|
}
|
||||||
@ -148,42 +154,82 @@ Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value) {
|
|||||||
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMax);
|
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMax);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitSharedAtomicUMax64(EmitContext& ctx, Id offset, Id value) {
|
||||||
|
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicUMax);
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value) {
|
Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value) {
|
||||||
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMax);
|
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMax);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitSharedAtomicSMax64(EmitContext& ctx, Id offset, Id value) {
|
||||||
|
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicSMax);
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value) {
|
Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value) {
|
||||||
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMin);
|
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMin);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitSharedAtomicUMin64(EmitContext& ctx, Id offset, Id value) {
|
||||||
|
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicUMin);
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value) {
|
Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value) {
|
||||||
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMin);
|
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMin);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitSharedAtomicSMin64(EmitContext& ctx, Id offset, Id value) {
|
||||||
|
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicSMin);
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value) {
|
Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value) {
|
||||||
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicAnd);
|
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicAnd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitSharedAtomicAnd64(EmitContext& ctx, Id offset, Id value) {
|
||||||
|
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicAnd);
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value) {
|
Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value) {
|
||||||
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicOr);
|
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicOr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitSharedAtomicOr64(EmitContext& ctx, Id offset, Id value) {
|
||||||
|
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicOr);
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value) {
|
Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value) {
|
||||||
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicXor);
|
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicXor);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitSharedAtomicXor64(EmitContext& ctx, Id offset, Id value) {
|
||||||
|
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicXor);
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitSharedAtomicISub32(EmitContext& ctx, Id offset, Id value) {
|
Id EmitSharedAtomicISub32(EmitContext& ctx, Id offset, Id value) {
|
||||||
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicISub);
|
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicISub);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitSharedAtomicISub64(EmitContext& ctx, Id offset, Id value) {
|
||||||
|
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicISub);
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset) {
|
Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset) {
|
||||||
return SharedAtomicU32IncDec(ctx, offset, &Sirit::Module::OpAtomicIIncrement);
|
return SharedAtomicU32IncDec(ctx, offset, &Sirit::Module::OpAtomicIIncrement);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitSharedAtomicInc64(EmitContext& ctx, Id offset) {
|
||||||
|
return SharedAtomicU64IncDec(ctx, offset, &Sirit::Module::OpAtomicIIncrement);
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset) {
|
Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset) {
|
||||||
return SharedAtomicU32IncDec(ctx, offset, &Sirit::Module::OpAtomicIDecrement);
|
return SharedAtomicU32IncDec(ctx, offset, &Sirit::Module::OpAtomicIDecrement);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitSharedAtomicDec64(EmitContext& ctx, Id offset) {
|
||||||
|
return SharedAtomicU64IncDec(ctx, offset, &Sirit::Module::OpAtomicIDecrement);
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||||
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd);
|
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd);
|
||||||
}
|
}
|
||||||
|
@ -139,15 +139,25 @@ void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value);
|
|||||||
Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value);
|
Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value);
|
||||||
Id EmitSharedAtomicIAdd64(EmitContext& ctx, Id offset, Id value);
|
Id EmitSharedAtomicIAdd64(EmitContext& ctx, Id offset, Id value);
|
||||||
Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value);
|
Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value);
|
||||||
|
Id EmitSharedAtomicUMax64(EmitContext& ctx, Id offset, Id value);
|
||||||
Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value);
|
Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value);
|
||||||
|
Id EmitSharedAtomicSMax64(EmitContext& ctx, Id offset, Id value);
|
||||||
Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value);
|
Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value);
|
||||||
|
Id EmitSharedAtomicUMin64(EmitContext& ctx, Id offset, Id value);
|
||||||
Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value);
|
Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value);
|
||||||
|
Id EmitSharedAtomicSMin64(EmitContext& ctx, Id offset, Id value);
|
||||||
Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value);
|
Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value);
|
||||||
|
Id EmitSharedAtomicAnd64(EmitContext& ctx, Id offset, Id value);
|
||||||
Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value);
|
Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value);
|
||||||
|
Id EmitSharedAtomicOr64(EmitContext& ctx, Id offset, Id value);
|
||||||
Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value);
|
Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value);
|
||||||
|
Id EmitSharedAtomicXor64(EmitContext& ctx, Id offset, Id value);
|
||||||
Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset);
|
Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset);
|
||||||
|
Id EmitSharedAtomicInc64(EmitContext& ctx, Id offset);
|
||||||
Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset);
|
Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset);
|
||||||
|
Id EmitSharedAtomicDec64(EmitContext& ctx, Id offset);
|
||||||
Id EmitSharedAtomicISub32(EmitContext& ctx, Id offset, Id value);
|
Id EmitSharedAtomicISub32(EmitContext& ctx, Id offset, Id value);
|
||||||
|
Id EmitSharedAtomicISub64(EmitContext& ctx, Id offset, Id value);
|
||||||
|
|
||||||
Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2);
|
Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2);
|
||||||
Id EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3);
|
Id EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3);
|
||||||
@ -519,7 +529,7 @@ Id EmitLaneId(EmitContext& ctx);
|
|||||||
Id EmitWarpId(EmitContext& ctx);
|
Id EmitWarpId(EmitContext& ctx);
|
||||||
Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index);
|
Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index);
|
||||||
Id EmitReadFirstLane(EmitContext& ctx, Id value);
|
Id EmitReadFirstLane(EmitContext& ctx, Id value);
|
||||||
Id EmitReadLane(EmitContext& ctx, Id value, u32 lane);
|
Id EmitReadLane(EmitContext& ctx, Id value, Id lane);
|
||||||
Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane);
|
Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane);
|
||||||
Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding);
|
Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding);
|
||||||
Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding);
|
Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding);
|
||||||
|
@ -26,9 +26,8 @@ Id EmitReadFirstLane(EmitContext& ctx, Id value) {
|
|||||||
return ctx.OpGroupNonUniformBroadcastFirst(ctx.U32[1], SubgroupScope(ctx), value);
|
return ctx.OpGroupNonUniformBroadcastFirst(ctx.U32[1], SubgroupScope(ctx), value);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitReadLane(EmitContext& ctx, Id value, u32 lane) {
|
Id EmitReadLane(EmitContext& ctx, Id value, Id lane) {
|
||||||
return ctx.OpGroupNonUniformBroadcast(ctx.U32[1], SubgroupScope(ctx), value,
|
return ctx.OpGroupNonUniformBroadcast(ctx.U32[1], SubgroupScope(ctx), value, lane);
|
||||||
ctx.ConstU32(lane));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane) {
|
Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane) {
|
||||||
|
@ -76,6 +76,7 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf
|
|||||||
} else {
|
} else {
|
||||||
SetMemoryModel(spv::AddressingModel::Logical, spv::MemoryModel::GLSL450);
|
SetMemoryModel(spv::AddressingModel::Logical, spv::MemoryModel::GLSL450);
|
||||||
}
|
}
|
||||||
|
String(fmt::format("{:#x}", info.pgm_hash));
|
||||||
|
|
||||||
AddCapability(spv::Capability::Shader);
|
AddCapability(spv::Capability::Shader);
|
||||||
DefineArithmeticTypes();
|
DefineArithmeticTypes();
|
||||||
@ -700,7 +701,7 @@ void EmitContext::DefineOutputs() {
|
|||||||
void EmitContext::DefinePushDataBlock() {
|
void EmitContext::DefinePushDataBlock() {
|
||||||
// Create push constants block for instance steps rates
|
// Create push constants block for instance steps rates
|
||||||
const Id struct_type{Name(TypeStruct(U32[1], U32[1], F32[1], F32[1], F32[1], F32[1], U32[4],
|
const Id struct_type{Name(TypeStruct(U32[1], U32[1], F32[1], F32[1], F32[1], F32[1], U32[4],
|
||||||
U32[4], U32[4], U32[4], U32[4], U32[4]),
|
U32[4], U32[4], U32[4], U32[4], U32[4], U32[2]),
|
||||||
"AuxData")};
|
"AuxData")};
|
||||||
Decorate(struct_type, spv::Decoration::Block);
|
Decorate(struct_type, spv::Decoration::Block);
|
||||||
MemberName(struct_type, PushData::Step0Index, "sr0");
|
MemberName(struct_type, PushData::Step0Index, "sr0");
|
||||||
@ -715,6 +716,7 @@ void EmitContext::DefinePushDataBlock() {
|
|||||||
MemberName(struct_type, PushData::UdRegsIndex + 3, "ud_regs3");
|
MemberName(struct_type, PushData::UdRegsIndex + 3, "ud_regs3");
|
||||||
MemberName(struct_type, PushData::BufOffsetIndex + 0, "buf_offsets0");
|
MemberName(struct_type, PushData::BufOffsetIndex + 0, "buf_offsets0");
|
||||||
MemberName(struct_type, PushData::BufOffsetIndex + 1, "buf_offsets1");
|
MemberName(struct_type, PushData::BufOffsetIndex + 1, "buf_offsets1");
|
||||||
|
MemberName(struct_type, PushData::BufOffsetIndex + 2, "buf_offsets2");
|
||||||
MemberDecorate(struct_type, PushData::Step0Index, spv::Decoration::Offset, 0U);
|
MemberDecorate(struct_type, PushData::Step0Index, spv::Decoration::Offset, 0U);
|
||||||
MemberDecorate(struct_type, PushData::Step1Index, spv::Decoration::Offset, 4U);
|
MemberDecorate(struct_type, PushData::Step1Index, spv::Decoration::Offset, 4U);
|
||||||
MemberDecorate(struct_type, PushData::XOffsetIndex, spv::Decoration::Offset, 8U);
|
MemberDecorate(struct_type, PushData::XOffsetIndex, spv::Decoration::Offset, 8U);
|
||||||
@ -727,6 +729,7 @@ void EmitContext::DefinePushDataBlock() {
|
|||||||
MemberDecorate(struct_type, PushData::UdRegsIndex + 3, spv::Decoration::Offset, 72U);
|
MemberDecorate(struct_type, PushData::UdRegsIndex + 3, spv::Decoration::Offset, 72U);
|
||||||
MemberDecorate(struct_type, PushData::BufOffsetIndex + 0, spv::Decoration::Offset, 88U);
|
MemberDecorate(struct_type, PushData::BufOffsetIndex + 0, spv::Decoration::Offset, 88U);
|
||||||
MemberDecorate(struct_type, PushData::BufOffsetIndex + 1, spv::Decoration::Offset, 104U);
|
MemberDecorate(struct_type, PushData::BufOffsetIndex + 1, spv::Decoration::Offset, 104U);
|
||||||
|
MemberDecorate(struct_type, PushData::BufOffsetIndex + 2, spv::Decoration::Offset, 120U);
|
||||||
push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant);
|
push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant);
|
||||||
Name(push_data_block, "push_data");
|
Name(push_data_block, "push_data");
|
||||||
interfaces.push_back(push_data_block);
|
interfaces.push_back(push_data_block);
|
||||||
|
@ -188,14 +188,15 @@ void CFG::SplitDivergenceScopes() {
|
|||||||
const bool is_close = is_close_scope(inst);
|
const bool is_close = is_close_scope(inst);
|
||||||
if ((is_close || index == blk->end_index) && curr_begin != -1) {
|
if ((is_close || index == blk->end_index) && curr_begin != -1) {
|
||||||
// If there are no instructions inside scope don't do anything.
|
// If there are no instructions inside scope don't do anything.
|
||||||
if (index - curr_begin == 1) {
|
if (index - curr_begin == 1 && is_close) {
|
||||||
curr_begin = -1;
|
curr_begin = -1;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// If all instructions in the scope ignore exec masking, we shouldn't insert a
|
// If all instructions in the scope ignore exec masking, we shouldn't insert a
|
||||||
// scope.
|
// scope.
|
||||||
const auto start = inst_list.begin() + curr_begin + 1;
|
const auto start = inst_list.begin() + curr_begin + 1;
|
||||||
if (!std::ranges::all_of(start, inst_list.begin() + index, IgnoresExecMask)) {
|
if (!std::ranges::all_of(start, inst_list.begin() + index + !is_close,
|
||||||
|
IgnoresExecMask)) {
|
||||||
// Determine the first instruction affected by the exec mask.
|
// Determine the first instruction affected by the exec mask.
|
||||||
do {
|
do {
|
||||||
++curr_begin;
|
++curr_begin;
|
||||||
|
@ -397,7 +397,7 @@ constexpr std::array<InstFormat, 27> InstructionFormatSOPP = {{
|
|||||||
// 17 = S_SENDMSGHALT
|
// 17 = S_SENDMSGHALT
|
||||||
{InstClass::ScalarProgFlow, InstCategory::FlowControl, 0, 1, ScalarType::Any, ScalarType::Any},
|
{InstClass::ScalarProgFlow, InstCategory::FlowControl, 0, 1, ScalarType::Any, ScalarType::Any},
|
||||||
// 18 = S_TRAP
|
// 18 = S_TRAP
|
||||||
{InstClass::Undefined, InstCategory::Undefined, 0, 1, ScalarType::Any, ScalarType::Any},
|
{InstClass::Undefined, InstCategory::FlowControl, 0, 1, ScalarType::Any, ScalarType::Any},
|
||||||
// 19 = S_ICACHE_INV
|
// 19 = S_ICACHE_INV
|
||||||
{InstClass::ScalarCache, InstCategory::FlowControl, 0, 1, ScalarType::Any, ScalarType::Any},
|
{InstClass::ScalarCache, InstCategory::FlowControl, 0, 1, ScalarType::Any, ScalarType::Any},
|
||||||
// 20 = S_INCPERFLEVEL
|
// 20 = S_INCPERFLEVEL
|
||||||
|
@ -3,7 +3,6 @@
|
|||||||
|
|
||||||
#include "shader_recompiler/frontend/translate/translate.h"
|
#include "shader_recompiler/frontend/translate/translate.h"
|
||||||
#include "shader_recompiler/ir/reg.h"
|
#include "shader_recompiler/ir/reg.h"
|
||||||
#include "shader_recompiler/profile.h"
|
|
||||||
#include "shader_recompiler/runtime_info.h"
|
#include "shader_recompiler/runtime_info.h"
|
||||||
|
|
||||||
namespace Shader::Gcn {
|
namespace Shader::Gcn {
|
||||||
@ -12,29 +11,29 @@ void Translator::EmitDataShare(const GcnInst& inst) {
|
|||||||
switch (inst.opcode) {
|
switch (inst.opcode) {
|
||||||
// DS
|
// DS
|
||||||
case Opcode::DS_ADD_U32:
|
case Opcode::DS_ADD_U32:
|
||||||
return DS_ADD_U32(inst, false);
|
return DS_OP(inst, AtomicOp::Add, false);
|
||||||
case Opcode::DS_ADD_U64:
|
case Opcode::DS_ADD_U64:
|
||||||
return DS_ADD_U64(inst, false);
|
return DS_OP<IR::U64>(inst, AtomicOp::Add, false);
|
||||||
case Opcode::DS_SUB_U32:
|
case Opcode::DS_SUB_U32:
|
||||||
return DS_SUB_U32(inst, false);
|
return DS_OP(inst, AtomicOp::Sub, false);
|
||||||
case Opcode::DS_INC_U32:
|
case Opcode::DS_INC_U32:
|
||||||
return DS_INC_U32(inst, false);
|
return DS_OP(inst, AtomicOp::Inc, false);
|
||||||
case Opcode::DS_DEC_U32:
|
case Opcode::DS_DEC_U32:
|
||||||
return DS_DEC_U32(inst, false);
|
return DS_OP(inst, AtomicOp::Dec, false);
|
||||||
case Opcode::DS_MIN_I32:
|
case Opcode::DS_MIN_I32:
|
||||||
return DS_MIN_U32(inst, true, false);
|
return DS_OP(inst, AtomicOp::Smin, false);
|
||||||
case Opcode::DS_MAX_I32:
|
case Opcode::DS_MAX_I32:
|
||||||
return DS_MAX_U32(inst, true, false);
|
return DS_OP(inst, AtomicOp::Smax, false);
|
||||||
case Opcode::DS_MIN_U32:
|
case Opcode::DS_MIN_U32:
|
||||||
return DS_MIN_U32(inst, false, false);
|
return DS_OP(inst, AtomicOp::Umin, false);
|
||||||
case Opcode::DS_MAX_U32:
|
case Opcode::DS_MAX_U32:
|
||||||
return DS_MAX_U32(inst, false, false);
|
return DS_OP(inst, AtomicOp::Umax, false);
|
||||||
case Opcode::DS_AND_B32:
|
case Opcode::DS_AND_B32:
|
||||||
return DS_AND_B32(inst, false);
|
return DS_OP(inst, AtomicOp::And, false);
|
||||||
case Opcode::DS_OR_B32:
|
case Opcode::DS_OR_B32:
|
||||||
return DS_OR_B32(inst, false);
|
return DS_OP(inst, AtomicOp::Or, false);
|
||||||
case Opcode::DS_XOR_B32:
|
case Opcode::DS_XOR_B32:
|
||||||
return DS_XOR_B32(inst, false);
|
return DS_OP(inst, AtomicOp::Xor, false);
|
||||||
case Opcode::DS_WRITE_B32:
|
case Opcode::DS_WRITE_B32:
|
||||||
return DS_WRITE(32, false, false, false, inst);
|
return DS_WRITE(32, false, false, false, inst);
|
||||||
case Opcode::DS_WRITE2_B32:
|
case Opcode::DS_WRITE2_B32:
|
||||||
@ -42,19 +41,19 @@ void Translator::EmitDataShare(const GcnInst& inst) {
|
|||||||
case Opcode::DS_WRITE2ST64_B32:
|
case Opcode::DS_WRITE2ST64_B32:
|
||||||
return DS_WRITE(32, false, true, true, inst);
|
return DS_WRITE(32, false, true, true, inst);
|
||||||
case Opcode::DS_ADD_RTN_U32:
|
case Opcode::DS_ADD_RTN_U32:
|
||||||
return DS_ADD_U32(inst, true);
|
return DS_OP(inst, AtomicOp::Add, true);
|
||||||
case Opcode::DS_SUB_RTN_U32:
|
case Opcode::DS_SUB_RTN_U32:
|
||||||
return DS_SUB_U32(inst, true);
|
return DS_OP(inst, AtomicOp::Sub, true);
|
||||||
case Opcode::DS_MIN_RTN_U32:
|
case Opcode::DS_MIN_RTN_U32:
|
||||||
return DS_MIN_U32(inst, false, true);
|
return DS_OP(inst, AtomicOp::Umin, true);
|
||||||
case Opcode::DS_MAX_RTN_U32:
|
case Opcode::DS_MAX_RTN_U32:
|
||||||
return DS_MAX_U32(inst, false, true);
|
return DS_OP(inst, AtomicOp::Umax, true);
|
||||||
case Opcode::DS_AND_RTN_B32:
|
case Opcode::DS_AND_RTN_B32:
|
||||||
return DS_AND_B32(inst, true);
|
return DS_OP(inst, AtomicOp::And, true);
|
||||||
case Opcode::DS_OR_RTN_B32:
|
case Opcode::DS_OR_RTN_B32:
|
||||||
return DS_OR_B32(inst, true);
|
return DS_OP(inst, AtomicOp::Or, true);
|
||||||
case Opcode::DS_XOR_RTN_B32:
|
case Opcode::DS_XOR_RTN_B32:
|
||||||
return DS_XOR_B32(inst, true);
|
return DS_OP(inst, AtomicOp::Xor, true);
|
||||||
case Opcode::DS_SWIZZLE_B32:
|
case Opcode::DS_SWIZZLE_B32:
|
||||||
return DS_SWIZZLE_B32(inst);
|
return DS_SWIZZLE_B32(inst);
|
||||||
case Opcode::DS_READ_B32:
|
case Opcode::DS_READ_B32:
|
||||||
@ -117,92 +116,63 @@ void Translator::V_WRITELANE_B32(const GcnInst& inst) {
|
|||||||
|
|
||||||
// DS
|
// DS
|
||||||
|
|
||||||
void Translator::DS_ADD_U32(const GcnInst& inst, bool rtn) {
|
template <typename T>
|
||||||
|
void Translator::DS_OP(const GcnInst& inst, AtomicOp op, bool rtn) {
|
||||||
|
const bool is_gds = inst.control.ds.gds;
|
||||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||||
const IR::U32 data{GetSrc(inst.src[1])};
|
const T data = [&] {
|
||||||
|
if (op == AtomicOp::Inc || op == AtomicOp::Dec) {
|
||||||
|
return T{};
|
||||||
|
}
|
||||||
|
if constexpr (std::is_same_v<T, IR::U32>) {
|
||||||
|
return GetSrc(inst.src[1]);
|
||||||
|
} else {
|
||||||
|
return GetSrc64(inst.src[1]);
|
||||||
|
}
|
||||||
|
}();
|
||||||
const IR::U32 offset =
|
const IR::U32 offset =
|
||||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
||||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||||
const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data);
|
const T original_val = [&] -> T {
|
||||||
|
switch (op) {
|
||||||
|
case AtomicOp::Add:
|
||||||
|
return ir.SharedAtomicIAdd(addr_offset, data, is_gds);
|
||||||
|
case AtomicOp::Umin:
|
||||||
|
return ir.SharedAtomicIMin(addr_offset, data, false, is_gds);
|
||||||
|
case AtomicOp::Smin:
|
||||||
|
return ir.SharedAtomicIMin(addr_offset, data, true, is_gds);
|
||||||
|
case AtomicOp::Umax:
|
||||||
|
return ir.SharedAtomicIMax(addr_offset, data, false, is_gds);
|
||||||
|
case AtomicOp::Smax:
|
||||||
|
return ir.SharedAtomicIMax(addr_offset, data, true, is_gds);
|
||||||
|
case AtomicOp::And:
|
||||||
|
return ir.SharedAtomicAnd(addr_offset, data, is_gds);
|
||||||
|
case AtomicOp::Or:
|
||||||
|
return ir.SharedAtomicOr(addr_offset, data, is_gds);
|
||||||
|
case AtomicOp::Xor:
|
||||||
|
return ir.SharedAtomicXor(addr_offset, data, is_gds);
|
||||||
|
case AtomicOp::Sub:
|
||||||
|
return ir.SharedAtomicISub(addr_offset, data, is_gds);
|
||||||
|
case AtomicOp::Inc:
|
||||||
|
return ir.SharedAtomicInc<T>(addr_offset, is_gds);
|
||||||
|
case AtomicOp::Dec:
|
||||||
|
return ir.SharedAtomicDec<T>(addr_offset, is_gds);
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}();
|
||||||
if (rtn) {
|
if (rtn) {
|
||||||
SetDst(inst.dst[0], IR::U32{original_val});
|
if constexpr (std::is_same_v<T, IR::U32>) {
|
||||||
}
|
SetDst(inst.dst[0], original_val);
|
||||||
}
|
} else {
|
||||||
|
SetDst64(inst.dst[0], original_val);
|
||||||
void Translator::DS_ADD_U64(const GcnInst& inst, bool rtn) {
|
}
|
||||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
|
||||||
const IR::U64 data{GetSrc64(inst.src[1])};
|
|
||||||
const IR::U32 offset =
|
|
||||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
|
||||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
|
||||||
const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data);
|
|
||||||
if (rtn) {
|
|
||||||
SetDst64(inst.dst[0], IR::U64{original_val});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Translator::DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn) {
|
|
||||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
|
||||||
const IR::U32 data{GetSrc(inst.src[1])};
|
|
||||||
const IR::U32 offset =
|
|
||||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
|
||||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
|
||||||
const IR::Value original_val = ir.SharedAtomicIMin(addr_offset, data, is_signed);
|
|
||||||
if (rtn) {
|
|
||||||
SetDst(inst.dst[0], IR::U32{original_val});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Translator::DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn) {
|
|
||||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
|
||||||
const IR::U32 data{GetSrc(inst.src[1])};
|
|
||||||
const IR::U32 offset =
|
|
||||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
|
||||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
|
||||||
const IR::Value original_val = ir.SharedAtomicIMax(addr_offset, data, is_signed);
|
|
||||||
if (rtn) {
|
|
||||||
SetDst(inst.dst[0], IR::U32{original_val});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Translator::DS_AND_B32(const GcnInst& inst, bool rtn) {
|
|
||||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
|
||||||
const IR::U32 data{GetSrc(inst.src[1])};
|
|
||||||
const IR::U32 offset =
|
|
||||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
|
||||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
|
||||||
const IR::Value original_val = ir.SharedAtomicAnd(addr_offset, data);
|
|
||||||
if (rtn) {
|
|
||||||
SetDst(inst.dst[0], IR::U32{original_val});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Translator::DS_OR_B32(const GcnInst& inst, bool rtn) {
|
|
||||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
|
||||||
const IR::U32 data{GetSrc(inst.src[1])};
|
|
||||||
const IR::U32 offset =
|
|
||||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
|
||||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
|
||||||
const IR::Value original_val = ir.SharedAtomicOr(addr_offset, data);
|
|
||||||
if (rtn) {
|
|
||||||
SetDst(inst.dst[0], IR::U32{original_val});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Translator::DS_XOR_B32(const GcnInst& inst, bool rtn) {
|
|
||||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
|
||||||
const IR::U32 data{GetSrc(inst.src[1])};
|
|
||||||
const IR::U32 offset =
|
|
||||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
|
||||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
|
||||||
const IR::Value original_val = ir.SharedAtomicXor(addr_offset, data);
|
|
||||||
if (rtn) {
|
|
||||||
SetDst(inst.dst[0], IR::U32{original_val});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64,
|
void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64,
|
||||||
const GcnInst& inst) {
|
const GcnInst& inst) {
|
||||||
|
const bool is_gds = inst.control.ds.gds;
|
||||||
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
|
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
|
||||||
const IR::VectorReg data0{inst.src[1].code};
|
const IR::VectorReg data0{inst.src[1].code};
|
||||||
const IR::VectorReg data1{inst.src[2].code};
|
const IR::VectorReg data1{inst.src[2].code};
|
||||||
@ -220,33 +190,85 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool strid
|
|||||||
ir.WriteShared(64,
|
ir.WriteShared(64,
|
||||||
ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data0),
|
ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data0),
|
||||||
ir.GetVectorReg(data0 + 1))),
|
ir.GetVectorReg(data0 + 1))),
|
||||||
addr0);
|
addr0, is_gds);
|
||||||
} else if (bit_size == 32) {
|
} else if (bit_size == 32) {
|
||||||
ir.WriteShared(32, ir.GetVectorReg(data0), addr0);
|
ir.WriteShared(32, ir.GetVectorReg(data0), addr0, is_gds);
|
||||||
} else if (bit_size == 16) {
|
} else if (bit_size == 16) {
|
||||||
ir.WriteShared(16, ir.UConvert(16, ir.GetVectorReg(data0)), addr0);
|
ir.WriteShared(16, ir.UConvert(16, ir.GetVectorReg(data0)), addr0, is_gds);
|
||||||
}
|
}
|
||||||
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj)));
|
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj)));
|
||||||
if (bit_size == 64) {
|
if (bit_size == 64) {
|
||||||
ir.WriteShared(64,
|
ir.WriteShared(64,
|
||||||
ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data1),
|
ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data1),
|
||||||
ir.GetVectorReg(data1 + 1))),
|
ir.GetVectorReg(data1 + 1))),
|
||||||
addr1);
|
addr1, is_gds);
|
||||||
} else if (bit_size == 32) {
|
} else if (bit_size == 32) {
|
||||||
ir.WriteShared(32, ir.GetVectorReg(data1), addr1);
|
ir.WriteShared(32, ir.GetVectorReg(data1), addr1, is_gds);
|
||||||
} else if (bit_size == 16) {
|
} else if (bit_size == 16) {
|
||||||
ir.WriteShared(16, ir.UConvert(16, ir.GetVectorReg(data1)), addr1);
|
ir.WriteShared(16, ir.UConvert(16, ir.GetVectorReg(data1)), addr1, is_gds);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
||||||
if (bit_size == 64) {
|
if (bit_size == 64) {
|
||||||
const IR::Value data =
|
const IR::Value data =
|
||||||
ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1));
|
ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1));
|
||||||
ir.WriteShared(bit_size, ir.PackUint2x32(data), addr0);
|
ir.WriteShared(bit_size, ir.PackUint2x32(data), addr0, is_gds);
|
||||||
} else if (bit_size == 32) {
|
} else if (bit_size == 32) {
|
||||||
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0);
|
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0, is_gds);
|
||||||
} else if (bit_size == 16) {
|
} else if (bit_size == 16) {
|
||||||
ir.WriteShared(bit_size, ir.UConvert(16, ir.GetVectorReg(data0)), addr0);
|
ir.WriteShared(bit_size, ir.UConvert(16, ir.GetVectorReg(data0)), addr0, is_gds);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64,
|
||||||
|
const GcnInst& inst) {
|
||||||
|
const bool is_gds = inst.control.ds.gds;
|
||||||
|
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
|
||||||
|
IR::VectorReg dst_reg{inst.dst[0].code};
|
||||||
|
const u32 offset = (inst.control.ds.offset1 << 8u) + inst.control.ds.offset0;
|
||||||
|
if (info.stage == Stage::Fragment) {
|
||||||
|
ASSERT_MSG(!is_pair && bit_size == 32 && offset % 256 == 0,
|
||||||
|
"Unexpected shared memory offset alignment: {}", offset);
|
||||||
|
ir.SetVectorReg(dst_reg, ir.GetVectorReg(GetScratchVgpr(offset)));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (is_pair) {
|
||||||
|
// Pair loads are either 32 or 64-bit
|
||||||
|
const u32 adj = (bit_size == 32 ? 4 : 8) * (stride64 ? 64 : 1);
|
||||||
|
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0 * adj)));
|
||||||
|
const IR::Value data0 = ir.LoadShared(bit_size, is_signed, addr0, is_gds);
|
||||||
|
if (bit_size == 64) {
|
||||||
|
const auto vector = ir.UnpackUint2x32(IR::U64{data0});
|
||||||
|
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 0)});
|
||||||
|
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 1)});
|
||||||
|
} else if (bit_size == 32) {
|
||||||
|
ir.SetVectorReg(dst_reg++, IR::U32{data0});
|
||||||
|
} else if (bit_size == 16) {
|
||||||
|
ir.SetVectorReg(dst_reg++, IR::U32{ir.UConvert(32, IR::U16{data0})});
|
||||||
|
}
|
||||||
|
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj)));
|
||||||
|
const IR::Value data1 = ir.LoadShared(bit_size, is_signed, addr1, is_gds);
|
||||||
|
if (bit_size == 64) {
|
||||||
|
const auto vector = ir.UnpackUint2x32(IR::U64{data1});
|
||||||
|
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 0)});
|
||||||
|
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 1)});
|
||||||
|
} else if (bit_size == 32) {
|
||||||
|
ir.SetVectorReg(dst_reg++, IR::U32{data1});
|
||||||
|
} else if (bit_size == 16) {
|
||||||
|
ir.SetVectorReg(dst_reg++, IR::U32{ir.UConvert(32, IR::U16{data1})});
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
||||||
|
const IR::Value data = ir.LoadShared(bit_size, is_signed, addr0, is_gds);
|
||||||
|
if (bit_size == 64) {
|
||||||
|
const auto vector = ir.UnpackUint2x32(IR::U64{data});
|
||||||
|
ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(vector, 0)});
|
||||||
|
ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(vector, 1)});
|
||||||
|
} else if (bit_size == 32) {
|
||||||
|
ir.SetVectorReg(dst_reg, IR::U32{data});
|
||||||
|
} else if (bit_size == 16) {
|
||||||
|
ir.SetVectorReg(dst_reg++, IR::U32{ir.UConvert(32, IR::U16{data})});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -263,91 +285,6 @@ void Translator::DS_SWIZZLE_B32(const GcnInst& inst) {
|
|||||||
SetDst(inst.dst[0], ir.QuadShuffle(src, index));
|
SetDst(inst.dst[0], ir.QuadShuffle(src, index));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::DS_INC_U32(const GcnInst& inst, bool rtn) {
|
|
||||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
|
||||||
const IR::U32 offset =
|
|
||||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
|
||||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
|
||||||
const IR::Value original_val = ir.SharedAtomicInc(addr_offset);
|
|
||||||
if (rtn) {
|
|
||||||
SetDst(inst.dst[0], IR::U32{original_val});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Translator::DS_DEC_U32(const GcnInst& inst, bool rtn) {
|
|
||||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
|
||||||
const IR::U32 offset =
|
|
||||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
|
||||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
|
||||||
const IR::Value original_val = ir.SharedAtomicDec(addr_offset);
|
|
||||||
if (rtn) {
|
|
||||||
SetDst(inst.dst[0], IR::U32{original_val});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Translator::DS_SUB_U32(const GcnInst& inst, bool rtn) {
|
|
||||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
|
||||||
const IR::U32 data{GetSrc(inst.src[1])};
|
|
||||||
const IR::U32 offset =
|
|
||||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
|
||||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
|
||||||
const IR::Value original_val = ir.SharedAtomicISub(addr_offset, data);
|
|
||||||
if (rtn) {
|
|
||||||
SetDst(inst.dst[0], IR::U32{original_val});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64,
|
|
||||||
const GcnInst& inst) {
|
|
||||||
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
|
|
||||||
IR::VectorReg dst_reg{inst.dst[0].code};
|
|
||||||
const u32 offset = (inst.control.ds.offset1 << 8u) + inst.control.ds.offset0;
|
|
||||||
if (info.stage == Stage::Fragment) {
|
|
||||||
ASSERT_MSG(!is_pair && bit_size == 32 && offset % 256 == 0,
|
|
||||||
"Unexpected shared memory offset alignment: {}", offset);
|
|
||||||
ir.SetVectorReg(dst_reg, ir.GetVectorReg(GetScratchVgpr(offset)));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (is_pair) {
|
|
||||||
// Pair loads are either 32 or 64-bit
|
|
||||||
const u32 adj = (bit_size == 32 ? 4 : 8) * (stride64 ? 64 : 1);
|
|
||||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0 * adj)));
|
|
||||||
const IR::Value data0 = ir.LoadShared(bit_size, is_signed, addr0);
|
|
||||||
if (bit_size == 64) {
|
|
||||||
const auto vector = ir.UnpackUint2x32(IR::U64{data0});
|
|
||||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 0)});
|
|
||||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 1)});
|
|
||||||
} else if (bit_size == 32) {
|
|
||||||
ir.SetVectorReg(dst_reg++, IR::U32{data0});
|
|
||||||
} else if (bit_size == 16) {
|
|
||||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.UConvert(32, IR::U16{data0})});
|
|
||||||
}
|
|
||||||
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj)));
|
|
||||||
const IR::Value data1 = ir.LoadShared(bit_size, is_signed, addr1);
|
|
||||||
if (bit_size == 64) {
|
|
||||||
const auto vector = ir.UnpackUint2x32(IR::U64{data1});
|
|
||||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 0)});
|
|
||||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 1)});
|
|
||||||
} else if (bit_size == 32) {
|
|
||||||
ir.SetVectorReg(dst_reg++, IR::U32{data1});
|
|
||||||
} else if (bit_size == 16) {
|
|
||||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.UConvert(32, IR::U16{data1})});
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
|
||||||
const IR::Value data = ir.LoadShared(bit_size, is_signed, addr0);
|
|
||||||
if (bit_size == 64) {
|
|
||||||
const auto vector = ir.UnpackUint2x32(IR::U64{data});
|
|
||||||
ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(vector, 0)});
|
|
||||||
ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(vector, 1)});
|
|
||||||
} else if (bit_size == 32) {
|
|
||||||
ir.SetVectorReg(dst_reg, IR::U32{data});
|
|
||||||
} else if (bit_size == 16) {
|
|
||||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.UConvert(32, IR::U16{data})});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Translator::DS_APPEND(const GcnInst& inst) {
|
void Translator::DS_APPEND(const GcnInst& inst) {
|
||||||
const u32 inst_offset = (u32(inst.control.ds.offset1) << 8u) + inst.control.ds.offset0;
|
const u32 inst_offset = (u32(inst.control.ds.offset1) << 8u) + inst.control.ds.offset0;
|
||||||
const IR::U32 gds_offset = ir.IAdd(ir.GetM0(), ir.Imm32(inst_offset));
|
const IR::U32 gds_offset = ir.IAdd(ir.GetM0(), ir.Imm32(inst_offset));
|
||||||
|
@ -586,6 +586,15 @@ void Translator::S_MOV(const GcnInst& inst) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Translator::S_MOV_B64(const GcnInst& inst) {
|
void Translator::S_MOV_B64(const GcnInst& inst) {
|
||||||
|
// Moving SGPR to SGPR is used for thread masks, like most operations, but it can also be used
|
||||||
|
// for moving sharps.
|
||||||
|
if (inst.dst[0].field == OperandField::ScalarGPR &&
|
||||||
|
inst.src[0].field == OperandField::ScalarGPR) {
|
||||||
|
ir.SetScalarReg(IR::ScalarReg(inst.dst[0].code),
|
||||||
|
ir.GetScalarReg(IR::ScalarReg(inst.src[0].code)));
|
||||||
|
ir.SetScalarReg(IR::ScalarReg(inst.dst[0].code + 1),
|
||||||
|
ir.GetScalarReg(IR::ScalarReg(inst.src[0].code + 1)));
|
||||||
|
}
|
||||||
const IR::U1 src = [&] {
|
const IR::U1 src = [&] {
|
||||||
switch (inst.src[0].field) {
|
switch (inst.src[0].field) {
|
||||||
case OperandField::VccLo:
|
case OperandField::VccLo:
|
||||||
|
@ -16,6 +16,9 @@ void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) {
|
|||||||
case Opcode::S_SETPRIO:
|
case Opcode::S_SETPRIO:
|
||||||
LOG_WARNING(Render_Vulkan, "S_SETPRIO instruction!");
|
LOG_WARNING(Render_Vulkan, "S_SETPRIO instruction!");
|
||||||
return;
|
return;
|
||||||
|
case Opcode::S_TRAP:
|
||||||
|
LOG_WARNING(Render_Vulkan, "S_TRAP instruction!");
|
||||||
|
return;
|
||||||
case Opcode::S_GETPC_B64:
|
case Opcode::S_GETPC_B64:
|
||||||
return S_GETPC_B64(pc, inst);
|
return S_GETPC_B64(pc, inst);
|
||||||
case Opcode::S_SETPC_B64:
|
case Opcode::S_SETPC_B64:
|
||||||
|
@ -270,21 +270,13 @@ public:
|
|||||||
|
|
||||||
// Data share
|
// Data share
|
||||||
// DS
|
// DS
|
||||||
void DS_ADD_U32(const GcnInst& inst, bool rtn);
|
template <typename T = IR::U32>
|
||||||
void DS_ADD_U64(const GcnInst& inst, bool rtn);
|
void DS_OP(const GcnInst& inst, AtomicOp op, bool rtn);
|
||||||
void DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn);
|
|
||||||
void DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn);
|
|
||||||
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst);
|
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst);
|
||||||
void DS_SWIZZLE_B32(const GcnInst& inst);
|
|
||||||
void DS_AND_B32(const GcnInst& inst, bool rtn);
|
|
||||||
void DS_OR_B32(const GcnInst& inst, bool rtn);
|
|
||||||
void DS_XOR_B32(const GcnInst& inst, bool rtn);
|
|
||||||
void DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst);
|
void DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst);
|
||||||
|
void DS_SWIZZLE_B32(const GcnInst& inst);
|
||||||
void DS_APPEND(const GcnInst& inst);
|
void DS_APPEND(const GcnInst& inst);
|
||||||
void DS_CONSUME(const GcnInst& inst);
|
void DS_CONSUME(const GcnInst& inst);
|
||||||
void DS_SUB_U32(const GcnInst& inst, bool rtn);
|
|
||||||
void DS_INC_U32(const GcnInst& inst, bool rtn);
|
|
||||||
void DS_DEC_U32(const GcnInst& inst, bool rtn);
|
|
||||||
|
|
||||||
// Buffer Memory
|
// Buffer Memory
|
||||||
// MUBUF / MTBUF
|
// MUBUF / MTBUF
|
||||||
|
@ -565,7 +565,8 @@ void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) {
|
|||||||
}
|
}
|
||||||
// v_mbcnt_hi_u32_b32 vX, exec_hi, 0/vZ
|
// v_mbcnt_hi_u32_b32 vX, exec_hi, 0/vZ
|
||||||
if ((inst.src[0].field == OperandField::ExecHi ||
|
if ((inst.src[0].field == OperandField::ExecHi ||
|
||||||
inst.src[0].field == OperandField::VccHi) &&
|
inst.src[0].field == OperandField::VccHi ||
|
||||||
|
inst.src[0].field == OperandField::ScalarGPR) &&
|
||||||
(inst.src[1].field == OperandField::ConstZero ||
|
(inst.src[1].field == OperandField::ConstZero ||
|
||||||
inst.src[1].field == OperandField::VectorGPR)) {
|
inst.src[1].field == OperandField::VectorGPR)) {
|
||||||
return SetDst(inst.dst[0], GetSrc(inst.src[1]));
|
return SetDst(inst.dst[0], GetSrc(inst.src[1]));
|
||||||
@ -579,7 +580,8 @@ void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) {
|
|||||||
}
|
}
|
||||||
// v_mbcnt_lo_u32_b32 vY, exec_lo, vX
|
// v_mbcnt_lo_u32_b32 vY, exec_lo, vX
|
||||||
// used combined with above for append buffer indexing.
|
// used combined with above for append buffer indexing.
|
||||||
if (inst.src[0].field == OperandField::ExecLo || inst.src[0].field == OperandField::VccLo) {
|
if (inst.src[0].field == OperandField::ExecLo || inst.src[0].field == OperandField::VccLo ||
|
||||||
|
inst.src[0].field == OperandField::ScalarGPR) {
|
||||||
return SetDst(inst.dst[0], GetSrc(inst.src[1]));
|
return SetDst(inst.dst[0], GetSrc(inst.src[1]));
|
||||||
}
|
}
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
|
@ -192,9 +192,10 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_inst_typed, bool is_buffer_
|
|||||||
const IR::VectorReg vaddr{inst.src[0].code};
|
const IR::VectorReg vaddr{inst.src[0].code};
|
||||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||||
const IR::Value soffset{GetSrc(inst.src[3])};
|
const IR::Value soffset{GetSrc(inst.src[3])};
|
||||||
|
const bool has_soffset = !soffset.IsImmediate() || soffset.U32() != 0;
|
||||||
if (info.stage != Stage::Geometry) {
|
if (info.stage != Stage::Geometry) {
|
||||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0,
|
ASSERT_MSG(!has_soffset || !mubuf.offen,
|
||||||
"Non immediate offset not supported");
|
"Having both scalar and vector offsets is not supported");
|
||||||
}
|
}
|
||||||
|
|
||||||
const IR::Value address = [&] -> IR::Value {
|
const IR::Value address = [&] -> IR::Value {
|
||||||
@ -204,15 +205,21 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_inst_typed, bool is_buffer_
|
|||||||
if (mubuf.idxen && mubuf.offen) {
|
if (mubuf.idxen && mubuf.offen) {
|
||||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
|
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
|
||||||
}
|
}
|
||||||
|
if (mubuf.idxen && has_soffset) {
|
||||||
|
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset);
|
||||||
|
}
|
||||||
if (mubuf.idxen || mubuf.offen) {
|
if (mubuf.idxen || mubuf.offen) {
|
||||||
return ir.GetVectorReg(vaddr);
|
return ir.GetVectorReg(vaddr);
|
||||||
}
|
}
|
||||||
|
if (has_soffset) {
|
||||||
|
return soffset;
|
||||||
|
}
|
||||||
return {};
|
return {};
|
||||||
}();
|
}();
|
||||||
|
|
||||||
IR::BufferInstInfo buffer_info{};
|
IR::BufferInstInfo buffer_info{};
|
||||||
buffer_info.index_enable.Assign(mubuf.idxen);
|
buffer_info.index_enable.Assign(mubuf.idxen);
|
||||||
buffer_info.offset_enable.Assign(mubuf.offen);
|
buffer_info.offset_enable.Assign(mubuf.offen || has_soffset);
|
||||||
buffer_info.inst_offset.Assign(mubuf.offset);
|
buffer_info.inst_offset.Assign(mubuf.offset);
|
||||||
buffer_info.globally_coherent.Assign(mubuf.glc);
|
buffer_info.globally_coherent.Assign(mubuf.glc);
|
||||||
buffer_info.system_coherent.Assign(mubuf.slc);
|
buffer_info.system_coherent.Assign(mubuf.slc);
|
||||||
|
@ -25,7 +25,7 @@ namespace Shader {
|
|||||||
|
|
||||||
static constexpr size_t NumUserDataRegs = 16;
|
static constexpr size_t NumUserDataRegs = 16;
|
||||||
static constexpr size_t NumImages = 64;
|
static constexpr size_t NumImages = 64;
|
||||||
static constexpr size_t NumBuffers = 32;
|
static constexpr size_t NumBuffers = 40;
|
||||||
static constexpr size_t NumSamplers = 16;
|
static constexpr size_t NumSamplers = 16;
|
||||||
static constexpr size_t NumFMasks = 8;
|
static constexpr size_t NumFMasks = 8;
|
||||||
|
|
||||||
|
@ -291,78 +291,137 @@ void IREmitter::SetPatch(Patch patch, const F32& value) {
|
|||||||
Inst(Opcode::SetPatch, patch, value);
|
Inst(Opcode::SetPatch, patch, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
|
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset, bool is_gds) {
|
||||||
switch (bit_size) {
|
switch (bit_size) {
|
||||||
case 16:
|
case 16:
|
||||||
return Inst<U16>(Opcode::LoadSharedU16, offset);
|
return Inst<U16>(Opcode::LoadSharedU16, Flags{is_gds}, offset);
|
||||||
case 32:
|
case 32:
|
||||||
return Inst<U32>(Opcode::LoadSharedU32, offset);
|
return Inst<U32>(Opcode::LoadSharedU32, Flags{is_gds}, offset);
|
||||||
case 64:
|
case 64:
|
||||||
return Inst<U64>(Opcode::LoadSharedU64, offset);
|
return Inst<U64>(Opcode::LoadSharedU64, Flags{is_gds}, offset);
|
||||||
default:
|
default:
|
||||||
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
|
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) {
|
void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset, bool is_gds) {
|
||||||
switch (bit_size) {
|
switch (bit_size) {
|
||||||
case 16:
|
case 16:
|
||||||
Inst(Opcode::WriteSharedU16, offset, value);
|
Inst(Opcode::WriteSharedU16, Flags{is_gds}, offset, value);
|
||||||
break;
|
break;
|
||||||
case 32:
|
case 32:
|
||||||
Inst(Opcode::WriteSharedU32, offset, value);
|
Inst(Opcode::WriteSharedU32, Flags{is_gds}, offset, value);
|
||||||
break;
|
break;
|
||||||
case 64:
|
case 64:
|
||||||
Inst(Opcode::WriteSharedU64, offset, value);
|
Inst(Opcode::WriteSharedU64, Flags{is_gds}, offset, value);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
|
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
U32U64 IREmitter::SharedAtomicIAdd(const U32& address, const U32U64& data) {
|
U32U64 IREmitter::SharedAtomicIAdd(const U32& address, const U32U64& data, bool is_gds) {
|
||||||
switch (data.Type()) {
|
switch (data.Type()) {
|
||||||
case Type::U32:
|
case Type::U32:
|
||||||
return Inst<U32>(Opcode::SharedAtomicIAdd32, address, data);
|
return Inst<U32>(Opcode::SharedAtomicIAdd32, Flags{is_gds}, address, data);
|
||||||
case Type::U64:
|
case Type::U64:
|
||||||
return Inst<U64>(Opcode::SharedAtomicIAdd64, address, data);
|
return Inst<U64>(Opcode::SharedAtomicIAdd64, Flags{is_gds}, address, data);
|
||||||
default:
|
default:
|
||||||
ThrowInvalidType(data.Type());
|
ThrowInvalidType(data.Type());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::SharedAtomicIMin(const U32& address, const U32& data, bool is_signed) {
|
U32U64 IREmitter::SharedAtomicIMin(const U32& address, const U32U64& data, bool is_signed,
|
||||||
return is_signed ? Inst<U32>(Opcode::SharedAtomicSMin32, address, data)
|
bool is_gds) {
|
||||||
: Inst<U32>(Opcode::SharedAtomicUMin32, address, data);
|
switch (data.Type()) {
|
||||||
|
case Type::U32:
|
||||||
|
return Inst<U32>(is_signed ? Opcode::SharedAtomicSMin32 : Opcode::SharedAtomicUMin32,
|
||||||
|
Flags{is_gds}, address, data);
|
||||||
|
case Type::U64:
|
||||||
|
return Inst<U64>(is_signed ? Opcode::SharedAtomicSMin64 : Opcode::SharedAtomicUMin64,
|
||||||
|
Flags{is_gds}, address, data);
|
||||||
|
default:
|
||||||
|
ThrowInvalidType(data.Type());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::SharedAtomicIMax(const U32& address, const U32& data, bool is_signed) {
|
U32U64 IREmitter::SharedAtomicIMax(const U32& address, const U32U64& data, bool is_signed,
|
||||||
return is_signed ? Inst<U32>(Opcode::SharedAtomicSMax32, address, data)
|
bool is_gds) {
|
||||||
: Inst<U32>(Opcode::SharedAtomicUMax32, address, data);
|
switch (data.Type()) {
|
||||||
|
case Type::U32:
|
||||||
|
return Inst<U32>(is_signed ? Opcode::SharedAtomicSMax32 : Opcode::SharedAtomicUMax32,
|
||||||
|
Flags{is_gds}, address, data);
|
||||||
|
case Type::U64:
|
||||||
|
return Inst<U64>(is_signed ? Opcode::SharedAtomicSMax64 : Opcode::SharedAtomicUMax64,
|
||||||
|
Flags{is_gds}, address, data);
|
||||||
|
default:
|
||||||
|
ThrowInvalidType(data.Type());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::SharedAtomicAnd(const U32& address, const U32& data) {
|
U32U64 IREmitter::SharedAtomicAnd(const U32& address, const U32U64& data, bool is_gds) {
|
||||||
return Inst<U32>(Opcode::SharedAtomicAnd32, address, data);
|
switch (data.Type()) {
|
||||||
|
case Type::U32:
|
||||||
|
return Inst<U32>(Opcode::SharedAtomicAnd32, Flags{is_gds}, address, data);
|
||||||
|
case Type::U64:
|
||||||
|
return Inst<U64>(Opcode::SharedAtomicAnd64, Flags{is_gds}, address, data);
|
||||||
|
default:
|
||||||
|
ThrowInvalidType(data.Type());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::SharedAtomicOr(const U32& address, const U32& data) {
|
U32U64 IREmitter::SharedAtomicOr(const U32& address, const U32U64& data, bool is_gds) {
|
||||||
|
switch (data.Type()) {
|
||||||
|
case Type::U32:
|
||||||
|
return Inst<U32>(Opcode::SharedAtomicAnd32, Flags{is_gds}, address, data);
|
||||||
|
case Type::U64:
|
||||||
|
return Inst<U64>(Opcode::SharedAtomicAnd64, Flags{is_gds}, address, data);
|
||||||
|
default:
|
||||||
|
ThrowInvalidType(data.Type());
|
||||||
|
}
|
||||||
return Inst<U32>(Opcode::SharedAtomicOr32, address, data);
|
return Inst<U32>(Opcode::SharedAtomicOr32, address, data);
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::SharedAtomicXor(const U32& address, const U32& data) {
|
U32U64 IREmitter::SharedAtomicXor(const U32& address, const U32U64& data, bool is_gds) {
|
||||||
return Inst<U32>(Opcode::SharedAtomicXor32, address, data);
|
switch (data.Type()) {
|
||||||
|
case Type::U32:
|
||||||
|
return Inst<U32>(Opcode::SharedAtomicXor32, Flags{is_gds}, address, data);
|
||||||
|
case Type::U64:
|
||||||
|
return Inst<U64>(Opcode::SharedAtomicXor64, Flags{is_gds}, address, data);
|
||||||
|
default:
|
||||||
|
ThrowInvalidType(data.Type());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::SharedAtomicInc(const U32& address) {
|
U32U64 IREmitter::SharedAtomicISub(const U32& address, const U32U64& data, bool is_gds) {
|
||||||
return Inst<U32>(Opcode::SharedAtomicInc32, address);
|
switch (data.Type()) {
|
||||||
|
case Type::U32:
|
||||||
|
return Inst<U32>(Opcode::SharedAtomicISub32, Flags{is_gds}, address, data);
|
||||||
|
case Type::U64:
|
||||||
|
return Inst<U64>(Opcode::SharedAtomicISub64, Flags{is_gds}, address, data);
|
||||||
|
default:
|
||||||
|
ThrowInvalidType(data.Type());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::SharedAtomicDec(const U32& address) {
|
template <>
|
||||||
return Inst<U32>(Opcode::SharedAtomicDec32, address);
|
U32 IREmitter::SharedAtomicInc(const U32& address, bool is_gds) {
|
||||||
|
return Inst<U32>(Opcode::SharedAtomicInc32, Flags{is_gds}, address);
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::SharedAtomicISub(const U32& address, const U32& data) {
|
template <>
|
||||||
return Inst<U32>(Opcode::SharedAtomicISub32, address, data);
|
U64 IREmitter::SharedAtomicInc(const U32& address, bool is_gds) {
|
||||||
|
return Inst<U64>(Opcode::SharedAtomicInc64, Flags{is_gds}, address);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
U32 IREmitter::SharedAtomicDec(const U32& address, bool is_gds) {
|
||||||
|
return Inst<U32>(Opcode::SharedAtomicDec32, Flags{is_gds}, address);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
U64 IREmitter::SharedAtomicDec(const U32& address, bool is_gds) {
|
||||||
|
return Inst<U64>(Opcode::SharedAtomicDec64, Flags{is_gds}, address);
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::ReadConst(const Value& base, const U32& offset) {
|
U32 IREmitter::ReadConst(const Value& base, const U32& offset) {
|
||||||
|
@ -96,18 +96,24 @@ public:
|
|||||||
[[nodiscard]] F32 GetPatch(Patch patch);
|
[[nodiscard]] F32 GetPatch(Patch patch);
|
||||||
void SetPatch(Patch patch, const F32& value);
|
void SetPatch(Patch patch, const F32& value);
|
||||||
|
|
||||||
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
|
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset,
|
||||||
void WriteShared(int bit_size, const Value& value, const U32& offset);
|
bool is_gds = false);
|
||||||
|
void WriteShared(int bit_size, const Value& value, const U32& offset, bool is_gds = false);
|
||||||
|
|
||||||
[[nodiscard]] U32U64 SharedAtomicIAdd(const U32& address, const U32U64& data);
|
[[nodiscard]] U32U64 SharedAtomicIAdd(const U32& address, const U32U64& data, bool is_gds);
|
||||||
[[nodiscard]] U32 SharedAtomicISub(const U32& address, const U32& data);
|
[[nodiscard]] U32U64 SharedAtomicISub(const U32& address, const U32U64& data, bool is_gds);
|
||||||
[[nodiscard]] U32 SharedAtomicIMin(const U32& address, const U32& data, bool is_signed);
|
[[nodiscard]] U32U64 SharedAtomicIMin(const U32& address, const U32U64& data, bool is_signed,
|
||||||
[[nodiscard]] U32 SharedAtomicIMax(const U32& address, const U32& data, bool is_signed);
|
bool is_gds);
|
||||||
[[nodiscard]] U32 SharedAtomicInc(const U32& address);
|
[[nodiscard]] U32U64 SharedAtomicIMax(const U32& address, const U32U64& data, bool is_signed,
|
||||||
[[nodiscard]] U32 SharedAtomicDec(const U32& address);
|
bool is_gds);
|
||||||
[[nodiscard]] U32 SharedAtomicAnd(const U32& address, const U32& data);
|
[[nodiscard]] U32U64 SharedAtomicAnd(const U32& address, const U32U64& data, bool is_gds);
|
||||||
[[nodiscard]] U32 SharedAtomicOr(const U32& address, const U32& data);
|
[[nodiscard]] U32U64 SharedAtomicOr(const U32& address, const U32U64& data, bool is_gds);
|
||||||
[[nodiscard]] U32 SharedAtomicXor(const U32& address, const U32& data);
|
[[nodiscard]] U32U64 SharedAtomicXor(const U32& address, const U32U64& data, bool is_gds);
|
||||||
|
|
||||||
|
template <typename T = U32>
|
||||||
|
[[nodiscard]] T SharedAtomicInc(const U32& address, bool is_gds);
|
||||||
|
template <typename T = U32>
|
||||||
|
[[nodiscard]] T SharedAtomicDec(const U32& address, bool is_gds);
|
||||||
|
|
||||||
[[nodiscard]] U32 ReadConst(const Value& base, const U32& offset);
|
[[nodiscard]] U32 ReadConst(const Value& base, const U32& offset);
|
||||||
[[nodiscard]] U32 ReadConstBuffer(const Value& handle, const U32& index);
|
[[nodiscard]] U32 ReadConstBuffer(const Value& handle, const U32& index);
|
||||||
|
@ -92,7 +92,6 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
|||||||
case Opcode::WriteSharedU32:
|
case Opcode::WriteSharedU32:
|
||||||
case Opcode::WriteSharedU64:
|
case Opcode::WriteSharedU64:
|
||||||
case Opcode::SharedAtomicIAdd32:
|
case Opcode::SharedAtomicIAdd32:
|
||||||
case Opcode::SharedAtomicIAdd64:
|
|
||||||
case Opcode::SharedAtomicISub32:
|
case Opcode::SharedAtomicISub32:
|
||||||
case Opcode::SharedAtomicSMin32:
|
case Opcode::SharedAtomicSMin32:
|
||||||
case Opcode::SharedAtomicUMin32:
|
case Opcode::SharedAtomicUMin32:
|
||||||
@ -103,6 +102,17 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
|||||||
case Opcode::SharedAtomicAnd32:
|
case Opcode::SharedAtomicAnd32:
|
||||||
case Opcode::SharedAtomicOr32:
|
case Opcode::SharedAtomicOr32:
|
||||||
case Opcode::SharedAtomicXor32:
|
case Opcode::SharedAtomicXor32:
|
||||||
|
case Opcode::SharedAtomicIAdd64:
|
||||||
|
case Opcode::SharedAtomicISub64:
|
||||||
|
case Opcode::SharedAtomicSMin64:
|
||||||
|
case Opcode::SharedAtomicUMin64:
|
||||||
|
case Opcode::SharedAtomicSMax64:
|
||||||
|
case Opcode::SharedAtomicUMax64:
|
||||||
|
case Opcode::SharedAtomicInc64:
|
||||||
|
case Opcode::SharedAtomicDec64:
|
||||||
|
case Opcode::SharedAtomicAnd64:
|
||||||
|
case Opcode::SharedAtomicOr64:
|
||||||
|
case Opcode::SharedAtomicXor64:
|
||||||
case Opcode::ImageWrite:
|
case Opcode::ImageWrite:
|
||||||
case Opcode::ImageAtomicIAdd32:
|
case Opcode::ImageAtomicIAdd32:
|
||||||
case Opcode::ImageAtomicSMin32:
|
case Opcode::ImageAtomicSMin32:
|
||||||
|
@ -41,15 +41,25 @@ OPCODE(WriteSharedU64, Void, U32,
|
|||||||
OPCODE(SharedAtomicIAdd32, U32, U32, U32, )
|
OPCODE(SharedAtomicIAdd32, U32, U32, U32, )
|
||||||
OPCODE(SharedAtomicIAdd64, U64, U32, U64, )
|
OPCODE(SharedAtomicIAdd64, U64, U32, U64, )
|
||||||
OPCODE(SharedAtomicISub32, U32, U32, U32, )
|
OPCODE(SharedAtomicISub32, U32, U32, U32, )
|
||||||
|
OPCODE(SharedAtomicISub64, U64, U32, U64, )
|
||||||
OPCODE(SharedAtomicSMin32, U32, U32, U32, )
|
OPCODE(SharedAtomicSMin32, U32, U32, U32, )
|
||||||
|
OPCODE(SharedAtomicSMin64, U64, U32, U64, )
|
||||||
OPCODE(SharedAtomicUMin32, U32, U32, U32, )
|
OPCODE(SharedAtomicUMin32, U32, U32, U32, )
|
||||||
|
OPCODE(SharedAtomicUMin64, U64, U32, U64, )
|
||||||
OPCODE(SharedAtomicSMax32, U32, U32, U32, )
|
OPCODE(SharedAtomicSMax32, U32, U32, U32, )
|
||||||
|
OPCODE(SharedAtomicSMax64, U64, U32, U64, )
|
||||||
OPCODE(SharedAtomicUMax32, U32, U32, U32, )
|
OPCODE(SharedAtomicUMax32, U32, U32, U32, )
|
||||||
|
OPCODE(SharedAtomicUMax64, U64, U32, U64, )
|
||||||
OPCODE(SharedAtomicInc32, U32, U32, )
|
OPCODE(SharedAtomicInc32, U32, U32, )
|
||||||
|
OPCODE(SharedAtomicInc64, U64, U32, )
|
||||||
OPCODE(SharedAtomicDec32, U32, U32, )
|
OPCODE(SharedAtomicDec32, U32, U32, )
|
||||||
|
OPCODE(SharedAtomicDec64, U64, U32, )
|
||||||
OPCODE(SharedAtomicAnd32, U32, U32, U32, )
|
OPCODE(SharedAtomicAnd32, U32, U32, U32, )
|
||||||
|
OPCODE(SharedAtomicAnd64, U64, U32, U64, )
|
||||||
OPCODE(SharedAtomicOr32, U32, U32, U32, )
|
OPCODE(SharedAtomicOr32, U32, U32, U32, )
|
||||||
|
OPCODE(SharedAtomicOr64, U64, U32, U64, )
|
||||||
OPCODE(SharedAtomicXor32, U32, U32, U32, )
|
OPCODE(SharedAtomicXor32, U32, U32, U32, )
|
||||||
|
OPCODE(SharedAtomicXor64, U64, U32, U64, )
|
||||||
|
|
||||||
// Context getters/setters
|
// Context getters/setters
|
||||||
OPCODE(GetUserData, U32, ScalarReg, )
|
OPCODE(GetUserData, U32, ScalarReg, )
|
||||||
|
@ -95,6 +95,20 @@ void ReadLaneEliminationPass(IR::Program& program) {
|
|||||||
if (inst.GetOpcode() != IR::Opcode::ReadLane) {
|
if (inst.GetOpcode() != IR::Opcode::ReadLane) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check for the following pattern and replace it with ReadFirstLane
|
||||||
|
// s_ff1_i32_b64 sgpr, exec
|
||||||
|
// v_readlane_b32 sdst, vgpr, sgpr
|
||||||
|
if (const auto lane = inst.Arg(1); !lane.IsImmediate()) {
|
||||||
|
if (lane.InstRecursive()->GetOpcode() == IR::Opcode::FindILsb64) {
|
||||||
|
const auto value = inst.Arg(0);
|
||||||
|
inst.ReplaceOpcode(IR::Opcode::ReadFirstLane);
|
||||||
|
inst.ClearArgs();
|
||||||
|
inst.SetArg(0, value);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
const u32 lane = inst.Arg(1).U32();
|
const u32 lane = inst.Arg(1).U32();
|
||||||
IR::Inst* prod = inst.Arg(0).InstRecursive();
|
IR::Inst* prod = inst.Arg(0).InstRecursive();
|
||||||
|
|
||||||
|
@ -84,8 +84,42 @@ bool IsBufferInstruction(const IR::Inst& inst) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool IsDataRingInstruction(const IR::Inst& inst) {
|
bool IsDataRingInstruction(const IR::Inst& inst) {
|
||||||
return inst.GetOpcode() == IR::Opcode::DataAppend ||
|
switch (inst.GetOpcode()) {
|
||||||
inst.GetOpcode() == IR::Opcode::DataConsume;
|
case IR::Opcode::DataAppend:
|
||||||
|
case IR::Opcode::DataConsume:
|
||||||
|
return true;
|
||||||
|
case IR::Opcode::LoadSharedU16:
|
||||||
|
case IR::Opcode::LoadSharedU32:
|
||||||
|
case IR::Opcode::LoadSharedU64:
|
||||||
|
case IR::Opcode::WriteSharedU16:
|
||||||
|
case IR::Opcode::WriteSharedU32:
|
||||||
|
case IR::Opcode::WriteSharedU64:
|
||||||
|
case IR::Opcode::SharedAtomicIAdd32:
|
||||||
|
case IR::Opcode::SharedAtomicIAdd64:
|
||||||
|
case IR::Opcode::SharedAtomicUMin32:
|
||||||
|
case IR::Opcode::SharedAtomicUMin64:
|
||||||
|
case IR::Opcode::SharedAtomicSMin32:
|
||||||
|
case IR::Opcode::SharedAtomicSMin64:
|
||||||
|
case IR::Opcode::SharedAtomicUMax32:
|
||||||
|
case IR::Opcode::SharedAtomicUMax64:
|
||||||
|
case IR::Opcode::SharedAtomicSMax32:
|
||||||
|
case IR::Opcode::SharedAtomicSMax64:
|
||||||
|
case IR::Opcode::SharedAtomicAnd32:
|
||||||
|
case IR::Opcode::SharedAtomicAnd64:
|
||||||
|
case IR::Opcode::SharedAtomicOr32:
|
||||||
|
case IR::Opcode::SharedAtomicOr64:
|
||||||
|
case IR::Opcode::SharedAtomicXor32:
|
||||||
|
case IR::Opcode::SharedAtomicXor64:
|
||||||
|
case IR::Opcode::SharedAtomicISub32:
|
||||||
|
case IR::Opcode::SharedAtomicISub64:
|
||||||
|
case IR::Opcode::SharedAtomicInc32:
|
||||||
|
case IR::Opcode::SharedAtomicInc64:
|
||||||
|
case IR::Opcode::SharedAtomicDec32:
|
||||||
|
case IR::Opcode::SharedAtomicDec64:
|
||||||
|
return inst.Flags<bool>(); // is_gds
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
|
IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
|
||||||
@ -507,7 +541,8 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
void PatchGlobalDataShareAccess(IR::Block& block, IR::Inst& inst, Info& info,
|
||||||
|
Descriptors& descriptors) {
|
||||||
const u32 binding = descriptors.Add(BufferResource{
|
const u32 binding = descriptors.Add(BufferResource{
|
||||||
.used_types = IR::Type::U32,
|
.used_types = IR::Type::U32,
|
||||||
.inline_cbuf = AmdGpu::Buffer::Null(),
|
.inline_cbuf = AmdGpu::Buffer::Null(),
|
||||||
@ -515,37 +550,111 @@ void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descripto
|
|||||||
.is_written = true,
|
.is_written = true,
|
||||||
});
|
});
|
||||||
|
|
||||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
|
||||||
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
|
|
||||||
return inst;
|
|
||||||
}
|
|
||||||
return std::nullopt;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Attempt to deduce the GDS address of counter at compile time.
|
|
||||||
u32 gds_addr = 0;
|
|
||||||
const IR::Value& gds_offset = inst.Arg(0);
|
|
||||||
if (gds_offset.IsImmediate()) {
|
|
||||||
// Nothing to do, offset is known.
|
|
||||||
gds_addr = gds_offset.U32() & 0xFFFF;
|
|
||||||
} else {
|
|
||||||
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
|
||||||
ASSERT_MSG(result, "Unable to track M0 source");
|
|
||||||
|
|
||||||
// M0 must be set by some user data register.
|
|
||||||
const IR::Inst* prod = gds_offset.InstRecursive();
|
|
||||||
const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg());
|
|
||||||
u32 m0_val = info.user_data[ud_reg] >> 16;
|
|
||||||
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
|
|
||||||
m0_val += prod->Arg(1).U32();
|
|
||||||
}
|
|
||||||
gds_addr = m0_val & 0xFFFF;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Patch instruction.
|
|
||||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
inst.SetArg(0, ir.Imm32(gds_addr >> 2));
|
|
||||||
inst.SetArg(1, ir.Imm32(binding));
|
// For data append/consume operations attempt to deduce the GDS address.
|
||||||
|
if (inst.GetOpcode() == IR::Opcode::DataAppend || inst.GetOpcode() == IR::Opcode::DataConsume) {
|
||||||
|
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||||
|
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
|
||||||
|
return inst;
|
||||||
|
}
|
||||||
|
return std::nullopt;
|
||||||
|
};
|
||||||
|
|
||||||
|
u32 gds_addr = 0;
|
||||||
|
const IR::Value& gds_offset = inst.Arg(0);
|
||||||
|
if (gds_offset.IsImmediate()) {
|
||||||
|
// Nothing to do, offset is known.
|
||||||
|
gds_addr = gds_offset.U32() & 0xFFFF;
|
||||||
|
} else {
|
||||||
|
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
||||||
|
ASSERT_MSG(result, "Unable to track M0 source");
|
||||||
|
|
||||||
|
// M0 must be set by some user data register.
|
||||||
|
const IR::Inst* prod = gds_offset.InstRecursive();
|
||||||
|
const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg());
|
||||||
|
u32 m0_val = info.user_data[ud_reg] >> 16;
|
||||||
|
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
|
||||||
|
m0_val += prod->Arg(1).U32();
|
||||||
|
}
|
||||||
|
gds_addr = m0_val & 0xFFFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Patch instruction.
|
||||||
|
inst.SetArg(0, ir.Imm32(gds_addr >> 2));
|
||||||
|
inst.SetArg(1, ir.Imm32(binding));
|
||||||
|
} else {
|
||||||
|
// Convert shared memory opcode to storage buffer atomic to GDS buffer.
|
||||||
|
const IR::U32 offset = IR::U32{inst.Arg(0)};
|
||||||
|
const IR::U32 address_words = ir.ShiftRightLogical(offset, ir.Imm32(1));
|
||||||
|
const IR::U32 address_dwords = ir.ShiftRightLogical(offset, ir.Imm32(2));
|
||||||
|
const IR::U32 address_qwords = ir.ShiftRightLogical(offset, ir.Imm32(3));
|
||||||
|
const IR::U32 handle = ir.Imm32(binding);
|
||||||
|
switch (inst.GetOpcode()) {
|
||||||
|
case IR::Opcode::SharedAtomicIAdd32:
|
||||||
|
inst.ReplaceUsesWith(ir.BufferAtomicIAdd(handle, address_dwords, inst.Arg(1), {}));
|
||||||
|
break;
|
||||||
|
case IR::Opcode::SharedAtomicIAdd64:
|
||||||
|
inst.ReplaceUsesWith(
|
||||||
|
ir.BufferAtomicIAdd(handle, address_qwords, IR::U64{inst.Arg(1)}, {}));
|
||||||
|
break;
|
||||||
|
case IR::Opcode::SharedAtomicISub32:
|
||||||
|
inst.ReplaceUsesWith(ir.BufferAtomicISub(handle, address_dwords, inst.Arg(1), {}));
|
||||||
|
break;
|
||||||
|
case IR::Opcode::SharedAtomicSMin32:
|
||||||
|
case IR::Opcode::SharedAtomicUMin32: {
|
||||||
|
const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMin32;
|
||||||
|
inst.ReplaceUsesWith(
|
||||||
|
ir.BufferAtomicIMin(handle, address_dwords, inst.Arg(1), is_signed, {}));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case IR::Opcode::SharedAtomicSMax32:
|
||||||
|
case IR::Opcode::SharedAtomicUMax32: {
|
||||||
|
const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMax32;
|
||||||
|
inst.ReplaceUsesWith(
|
||||||
|
ir.BufferAtomicIMax(handle, address_dwords, inst.Arg(1), is_signed, {}));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case IR::Opcode::SharedAtomicInc32:
|
||||||
|
inst.ReplaceUsesWith(ir.BufferAtomicInc(handle, address_dwords, {}));
|
||||||
|
break;
|
||||||
|
case IR::Opcode::SharedAtomicDec32:
|
||||||
|
inst.ReplaceUsesWith(ir.BufferAtomicDec(handle, address_dwords, {}));
|
||||||
|
break;
|
||||||
|
case IR::Opcode::SharedAtomicAnd32:
|
||||||
|
inst.ReplaceUsesWith(ir.BufferAtomicAnd(handle, address_dwords, inst.Arg(1), {}));
|
||||||
|
break;
|
||||||
|
case IR::Opcode::SharedAtomicOr32:
|
||||||
|
inst.ReplaceUsesWith(ir.BufferAtomicOr(handle, address_dwords, inst.Arg(1), {}));
|
||||||
|
break;
|
||||||
|
case IR::Opcode::SharedAtomicXor32:
|
||||||
|
inst.ReplaceUsesWith(ir.BufferAtomicXor(handle, address_dwords, inst.Arg(1), {}));
|
||||||
|
break;
|
||||||
|
case IR::Opcode::LoadSharedU16:
|
||||||
|
inst.ReplaceUsesWith(ir.LoadBufferU16(handle, address_words, {}));
|
||||||
|
break;
|
||||||
|
case IR::Opcode::LoadSharedU32:
|
||||||
|
inst.ReplaceUsesWith(ir.LoadBufferU32(1, handle, address_dwords, {}));
|
||||||
|
break;
|
||||||
|
case IR::Opcode::LoadSharedU64:
|
||||||
|
inst.ReplaceUsesWith(ir.LoadBufferU64(handle, address_qwords, {}));
|
||||||
|
break;
|
||||||
|
case IR::Opcode::WriteSharedU16:
|
||||||
|
ir.StoreBufferU16(handle, address_words, IR::U16{inst.Arg(1)}, {});
|
||||||
|
inst.Invalidate();
|
||||||
|
break;
|
||||||
|
case IR::Opcode::WriteSharedU32:
|
||||||
|
ir.StoreBufferU32(1, handle, address_dwords, inst.Arg(1), {});
|
||||||
|
inst.Invalidate();
|
||||||
|
break;
|
||||||
|
case IR::Opcode::WriteSharedU64:
|
||||||
|
ir.StoreBufferU64(handle, address_qwords, IR::U64{inst.Arg(1)}, {});
|
||||||
|
inst.Invalidate();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const Info& info,
|
IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const Info& info,
|
||||||
@ -916,8 +1025,6 @@ void ResourceTrackingPass(IR::Program& program) {
|
|||||||
PatchBufferSharp(*block, inst, info, descriptors);
|
PatchBufferSharp(*block, inst, info, descriptors);
|
||||||
} else if (IsImageInstruction(inst)) {
|
} else if (IsImageInstruction(inst)) {
|
||||||
PatchImageSharp(*block, inst, info, descriptors);
|
PatchImageSharp(*block, inst, info, descriptors);
|
||||||
} else if (IsDataRingInstruction(inst)) {
|
|
||||||
PatchDataRingAccess(*block, inst, info, descriptors);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -929,6 +1036,8 @@ void ResourceTrackingPass(IR::Program& program) {
|
|||||||
PatchBufferArgs(*block, inst, info);
|
PatchBufferArgs(*block, inst, info);
|
||||||
} else if (IsImageInstruction(inst)) {
|
} else if (IsImageInstruction(inst)) {
|
||||||
PatchImageArgs(*block, inst, info);
|
PatchImageArgs(*block, inst, info);
|
||||||
|
} else if (IsDataRingInstruction(inst)) {
|
||||||
|
PatchGlobalDataShareAccess(*block, inst, info, descriptors);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -55,6 +55,16 @@ void Visit(Info& info, const IR::Inst& inst) {
|
|||||||
info.shared_types |= IR::Type::U32;
|
info.shared_types |= IR::Type::U32;
|
||||||
break;
|
break;
|
||||||
case IR::Opcode::SharedAtomicIAdd64:
|
case IR::Opcode::SharedAtomicIAdd64:
|
||||||
|
case IR::Opcode::SharedAtomicISub64:
|
||||||
|
case IR::Opcode::SharedAtomicSMin64:
|
||||||
|
case IR::Opcode::SharedAtomicUMin64:
|
||||||
|
case IR::Opcode::SharedAtomicSMax64:
|
||||||
|
case IR::Opcode::SharedAtomicUMax64:
|
||||||
|
case IR::Opcode::SharedAtomicInc64:
|
||||||
|
case IR::Opcode::SharedAtomicDec64:
|
||||||
|
case IR::Opcode::SharedAtomicAnd64:
|
||||||
|
case IR::Opcode::SharedAtomicOr64:
|
||||||
|
case IR::Opcode::SharedAtomicXor64:
|
||||||
info.uses_shared_int64_atomics = true;
|
info.uses_shared_int64_atomics = true;
|
||||||
[[fallthrough]];
|
[[fallthrough]];
|
||||||
case IR::Opcode::LoadSharedU64:
|
case IR::Opcode::LoadSharedU64:
|
||||||
|
@ -15,6 +15,16 @@ static bool Requires16BitSharedAtomic(const IR::Inst& inst) {
|
|||||||
static bool Requires64BitSharedAtomic(const IR::Inst& inst) {
|
static bool Requires64BitSharedAtomic(const IR::Inst& inst) {
|
||||||
switch (inst.GetOpcode()) {
|
switch (inst.GetOpcode()) {
|
||||||
case IR::Opcode::SharedAtomicIAdd64:
|
case IR::Opcode::SharedAtomicIAdd64:
|
||||||
|
case IR::Opcode::SharedAtomicISub64:
|
||||||
|
case IR::Opcode::SharedAtomicSMin64:
|
||||||
|
case IR::Opcode::SharedAtomicUMin64:
|
||||||
|
case IR::Opcode::SharedAtomicSMax64:
|
||||||
|
case IR::Opcode::SharedAtomicUMax64:
|
||||||
|
case IR::Opcode::SharedAtomicInc64:
|
||||||
|
case IR::Opcode::SharedAtomicDec64:
|
||||||
|
case IR::Opcode::SharedAtomicAnd64:
|
||||||
|
case IR::Opcode::SharedAtomicOr64:
|
||||||
|
case IR::Opcode::SharedAtomicXor64:
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
|
@ -17,7 +17,6 @@ static bool IsSharedAccess(const IR::Inst& inst) {
|
|||||||
case IR::Opcode::WriteSharedU32:
|
case IR::Opcode::WriteSharedU32:
|
||||||
case IR::Opcode::WriteSharedU64:
|
case IR::Opcode::WriteSharedU64:
|
||||||
case IR::Opcode::SharedAtomicIAdd32:
|
case IR::Opcode::SharedAtomicIAdd32:
|
||||||
case IR::Opcode::SharedAtomicIAdd64:
|
|
||||||
case IR::Opcode::SharedAtomicISub32:
|
case IR::Opcode::SharedAtomicISub32:
|
||||||
case IR::Opcode::SharedAtomicSMin32:
|
case IR::Opcode::SharedAtomicSMin32:
|
||||||
case IR::Opcode::SharedAtomicUMin32:
|
case IR::Opcode::SharedAtomicUMin32:
|
||||||
@ -28,6 +27,17 @@ static bool IsSharedAccess(const IR::Inst& inst) {
|
|||||||
case IR::Opcode::SharedAtomicAnd32:
|
case IR::Opcode::SharedAtomicAnd32:
|
||||||
case IR::Opcode::SharedAtomicOr32:
|
case IR::Opcode::SharedAtomicOr32:
|
||||||
case IR::Opcode::SharedAtomicXor32:
|
case IR::Opcode::SharedAtomicXor32:
|
||||||
|
case IR::Opcode::SharedAtomicIAdd64:
|
||||||
|
case IR::Opcode::SharedAtomicISub64:
|
||||||
|
case IR::Opcode::SharedAtomicSMin64:
|
||||||
|
case IR::Opcode::SharedAtomicUMin64:
|
||||||
|
case IR::Opcode::SharedAtomicSMax64:
|
||||||
|
case IR::Opcode::SharedAtomicUMax64:
|
||||||
|
case IR::Opcode::SharedAtomicInc64:
|
||||||
|
case IR::Opcode::SharedAtomicDec64:
|
||||||
|
case IR::Opcode::SharedAtomicAnd64:
|
||||||
|
case IR::Opcode::SharedAtomicOr64:
|
||||||
|
case IR::Opcode::SharedAtomicXor64:
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
@ -64,6 +74,16 @@ IR::Type CalculateSharedMemoryTypes(IR::Program& program) {
|
|||||||
case IR::Opcode::LoadSharedU64:
|
case IR::Opcode::LoadSharedU64:
|
||||||
case IR::Opcode::WriteSharedU64:
|
case IR::Opcode::WriteSharedU64:
|
||||||
case IR::Opcode::SharedAtomicIAdd64:
|
case IR::Opcode::SharedAtomicIAdd64:
|
||||||
|
case IR::Opcode::SharedAtomicISub64:
|
||||||
|
case IR::Opcode::SharedAtomicSMin64:
|
||||||
|
case IR::Opcode::SharedAtomicUMin64:
|
||||||
|
case IR::Opcode::SharedAtomicSMax64:
|
||||||
|
case IR::Opcode::SharedAtomicUMax64:
|
||||||
|
case IR::Opcode::SharedAtomicInc64:
|
||||||
|
case IR::Opcode::SharedAtomicDec64:
|
||||||
|
case IR::Opcode::SharedAtomicAnd64:
|
||||||
|
case IR::Opcode::SharedAtomicOr64:
|
||||||
|
case IR::Opcode::SharedAtomicXor64:
|
||||||
used_types |= IR::Type::U64;
|
used_types |= IR::Type::U64;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@ -119,19 +139,26 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_
|
|||||||
ir.BufferAtomicIAdd(handle, address, inst.Arg(1), {}));
|
ir.BufferAtomicIAdd(handle, address, inst.Arg(1), {}));
|
||||||
continue;
|
continue;
|
||||||
case IR::Opcode::SharedAtomicISub32:
|
case IR::Opcode::SharedAtomicISub32:
|
||||||
|
case IR::Opcode::SharedAtomicISub64:
|
||||||
inst.ReplaceUsesWithAndRemove(
|
inst.ReplaceUsesWithAndRemove(
|
||||||
ir.BufferAtomicISub(handle, address, inst.Arg(1), {}));
|
ir.BufferAtomicISub(handle, address, inst.Arg(1), {}));
|
||||||
continue;
|
continue;
|
||||||
case IR::Opcode::SharedAtomicSMin32:
|
case IR::Opcode::SharedAtomicSMin32:
|
||||||
case IR::Opcode::SharedAtomicUMin32: {
|
case IR::Opcode::SharedAtomicUMin32:
|
||||||
const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMin32;
|
case IR::Opcode::SharedAtomicSMin64:
|
||||||
|
case IR::Opcode::SharedAtomicUMin64: {
|
||||||
|
const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMin32 ||
|
||||||
|
inst.GetOpcode() == IR::Opcode::SharedAtomicSMin64;
|
||||||
inst.ReplaceUsesWithAndRemove(
|
inst.ReplaceUsesWithAndRemove(
|
||||||
ir.BufferAtomicIMin(handle, address, inst.Arg(1), is_signed, {}));
|
ir.BufferAtomicIMin(handle, address, inst.Arg(1), is_signed, {}));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
case IR::Opcode::SharedAtomicSMax32:
|
case IR::Opcode::SharedAtomicSMax32:
|
||||||
case IR::Opcode::SharedAtomicUMax32: {
|
case IR::Opcode::SharedAtomicUMax32:
|
||||||
const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMax32;
|
case IR::Opcode::SharedAtomicSMax64:
|
||||||
|
case IR::Opcode::SharedAtomicUMax64: {
|
||||||
|
const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMax32 ||
|
||||||
|
inst.GetOpcode() == IR::Opcode::SharedAtomicSMax64;
|
||||||
inst.ReplaceUsesWithAndRemove(
|
inst.ReplaceUsesWithAndRemove(
|
||||||
ir.BufferAtomicIMax(handle, address, inst.Arg(1), is_signed, {}));
|
ir.BufferAtomicIMax(handle, address, inst.Arg(1), is_signed, {}));
|
||||||
continue;
|
continue;
|
||||||
@ -143,12 +170,15 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_
|
|||||||
inst.ReplaceUsesWithAndRemove(ir.BufferAtomicDec(handle, address, {}));
|
inst.ReplaceUsesWithAndRemove(ir.BufferAtomicDec(handle, address, {}));
|
||||||
continue;
|
continue;
|
||||||
case IR::Opcode::SharedAtomicAnd32:
|
case IR::Opcode::SharedAtomicAnd32:
|
||||||
|
case IR::Opcode::SharedAtomicAnd64:
|
||||||
inst.ReplaceUsesWithAndRemove(ir.BufferAtomicAnd(handle, address, inst.Arg(1), {}));
|
inst.ReplaceUsesWithAndRemove(ir.BufferAtomicAnd(handle, address, inst.Arg(1), {}));
|
||||||
continue;
|
continue;
|
||||||
case IR::Opcode::SharedAtomicOr32:
|
case IR::Opcode::SharedAtomicOr32:
|
||||||
|
case IR::Opcode::SharedAtomicOr64:
|
||||||
inst.ReplaceUsesWithAndRemove(ir.BufferAtomicOr(handle, address, inst.Arg(1), {}));
|
inst.ReplaceUsesWithAndRemove(ir.BufferAtomicOr(handle, address, inst.Arg(1), {}));
|
||||||
continue;
|
continue;
|
||||||
case IR::Opcode::SharedAtomicXor32:
|
case IR::Opcode::SharedAtomicXor32:
|
||||||
|
case IR::Opcode::SharedAtomicXor64:
|
||||||
inst.ReplaceUsesWithAndRemove(ir.BufferAtomicXor(handle, address, inst.Arg(1), {}));
|
inst.ReplaceUsesWithAndRemove(ir.BufferAtomicXor(handle, address, inst.Arg(1), {}));
|
||||||
continue;
|
continue;
|
||||||
case IR::Opcode::LoadSharedU16:
|
case IR::Opcode::LoadSharedU16:
|
||||||
@ -173,7 +203,7 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_
|
|||||||
inst.Invalidate();
|
inst.Invalidate();
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -603,6 +603,8 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||||||
// TODO: handle proper synchronization, for now signal that update is done
|
// TODO: handle proper synchronization, for now signal that update is done
|
||||||
// immediately
|
// immediately
|
||||||
regs.cp_strmout_cntl.offset_update_done = 1;
|
regs.cp_strmout_cntl.offset_update_done = 1;
|
||||||
|
} else if (event->event_index.Value() == EventIndex::ZpassDone) {
|
||||||
|
LOG_WARNING(Render, "Unimplemented occlusion query");
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -88,7 +88,7 @@ struct Liverpool {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static const BinaryInfo& SearchBinaryInfo(const u32* code, size_t search_limit = 0x1000) {
|
static const BinaryInfo& SearchBinaryInfo(const u32* code, size_t search_limit = 0x2000) {
|
||||||
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
|
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
|
||||||
|
|
||||||
if (code[0] == token_mov_vcchi) {
|
if (code[0] == token_mov_vcchi) {
|
||||||
|
@ -48,6 +48,8 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
|
|||||||
|
|
||||||
memory_tracker = std::make_unique<MemoryTracker>(tracker);
|
memory_tracker = std::make_unique<MemoryTracker>(tracker);
|
||||||
|
|
||||||
|
std::memset(gds_buffer.mapped_data.data(), 0, DataShareBufferSize);
|
||||||
|
|
||||||
// Ensure the first slot is used for the null buffer
|
// Ensure the first slot is used for the null buffer
|
||||||
const auto null_id =
|
const auto null_id =
|
||||||
slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, 16);
|
slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, 16);
|
||||||
@ -312,7 +314,10 @@ void BufferCache::BindIndexBuffer(u32 index_offset) {
|
|||||||
void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
|
void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
|
||||||
ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned");
|
ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned");
|
||||||
if (!is_gds) {
|
if (!is_gds) {
|
||||||
ASSERT(memory->TryWriteBacking(std::bit_cast<void*>(address), value, num_bytes));
|
if (!memory->TryWriteBacking(std::bit_cast<void*>(address), value, num_bytes)) {
|
||||||
|
std::memcpy(std::bit_cast<void*>(address), value, num_bytes);
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (!IsRegionRegistered(address, num_bytes)) {
|
if (!IsRegionRegistered(address, num_bytes)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user