Merge branch 'shadps4-emu:main' into filesystem

This commit is contained in:
Daniel R 2024-07-10 13:16:34 +02:00 committed by GitHub
commit ec48aa8cd6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
34 changed files with 590 additions and 130 deletions

View File

@ -5,6 +5,35 @@ SPDX-License-Identifier: GPL-2.0-or-later
## Build shadPS4 for Linux
### Install the necessary tools to build shadPS4:
#### Debian & Ubuntu
```
sudo apt-get install build-essential libasound2-dev libpulse-dev libopenal-dev zlib1g-dev libedit-dev libvulkan-dev libudev-dev git libevdev-dev libsdl2-2.0 libsdl2-dev libjack-dev libsndio-dev
```
#### Fedora
```
sudo dnf install alsa-lib-devel cmake libatomic libevdev-devel libudev-devel openal-devel qt6-qtbase-devel qt6-qtbase-private-devel vulkan-devel pipewire-jack-audio-connection-kit-devel qt6-qtmultimedia-devel qt6-qtsvg-devel
```
#### Arch Linux
```
sudo pacman -S openal cmake vulkan-validation-layers qt6-base qt6-declarative qt6-multimedia sdl2 sndio jack2 base-devel
```
#### OpenSUSE
```
sudo zypper install git cmake libasound2 libpulse-devel openal-soft-devel zlib-devel libedit-devel vulkan-devel libudev-devel libqt6-qtbase-devel libqt6-qtmultimedia-devel libqt6-qtsvg-devel libQt6Gui-private-headers-devel libevdev-devel libsndio7_1 libjack-devel
```
### Cloning and compiling:
Clone the repository recursively:
```
git clone --recursive https://github.com/shadps4-emu/shadPS4.git
cd shadPS4
```
Generate the build directory in the shadPS4 directory:
```
cmake -S . -B build/
@ -17,5 +46,11 @@ cd build/
Use make to build the project:
```
make -j$(nproc)
cmake --build . --parallel$(nproc)
```
Now run the emulator:
```
./shadps4 /"PATH"/"TO"/"GAME"/"FOLDER"/eboot.bin
```

View File

@ -11,8 +11,8 @@
namespace Audio {
int SDLAudio::AudioOutOpen(int type, u32 samples_num, u32 freq,
Libraries::AudioOut::OrbisAudioOutParam format) {
using Libraries::AudioOut::OrbisAudioOutParam;
Libraries::AudioOut::OrbisAudioOutParamFormat format) {
using Libraries::AudioOut::OrbisAudioOutParamFormat;
std::scoped_lock lock{m_mutex};
for (int id = 0; id < portsOut.size(); id++) {
auto& port = portsOut[id];
@ -24,42 +24,42 @@ int SDLAudio::AudioOutOpen(int type, u32 samples_num, u32 freq,
port.format = format;
SDL_AudioFormat sampleFormat;
switch (format) {
case OrbisAudioOutParam::ORBIS_AUDIO_OUT_PARAM_FORMAT_S16_MONO:
case OrbisAudioOutParamFormat::ORBIS_AUDIO_OUT_PARAM_FORMAT_S16_MONO:
sampleFormat = SDL_AUDIO_S16;
port.channels_num = 1;
port.sample_size = 2;
break;
case OrbisAudioOutParam::ORBIS_AUDIO_OUT_PARAM_FORMAT_FLOAT_MONO:
case OrbisAudioOutParamFormat::ORBIS_AUDIO_OUT_PARAM_FORMAT_FLOAT_MONO:
sampleFormat = SDL_AUDIO_F32;
port.channels_num = 1;
port.sample_size = 4;
break;
case OrbisAudioOutParam::ORBIS_AUDIO_OUT_PARAM_FORMAT_S16_STEREO:
case OrbisAudioOutParamFormat::ORBIS_AUDIO_OUT_PARAM_FORMAT_S16_STEREO:
sampleFormat = SDL_AUDIO_S16;
port.channels_num = 2;
port.sample_size = 2;
break;
case OrbisAudioOutParam::ORBIS_AUDIO_OUT_PARAM_FORMAT_FLOAT_STEREO:
case OrbisAudioOutParamFormat::ORBIS_AUDIO_OUT_PARAM_FORMAT_FLOAT_STEREO:
sampleFormat = SDL_AUDIO_F32;
port.channels_num = 2;
port.sample_size = 4;
break;
case OrbisAudioOutParam::ORBIS_AUDIO_OUT_PARAM_FORMAT_S16_8CH:
case OrbisAudioOutParamFormat::ORBIS_AUDIO_OUT_PARAM_FORMAT_S16_8CH:
sampleFormat = SDL_AUDIO_S16;
port.channels_num = 8;
port.sample_size = 2;
break;
case OrbisAudioOutParam::ORBIS_AUDIO_OUT_PARAM_FORMAT_FLOAT_8CH:
case OrbisAudioOutParamFormat::ORBIS_AUDIO_OUT_PARAM_FORMAT_FLOAT_8CH:
sampleFormat = SDL_AUDIO_F32;
port.channels_num = 8;
port.sample_size = 4;
break;
case OrbisAudioOutParam::ORBIS_AUDIO_OUT_PARAM_FORMAT_S16_8CH_STD:
case OrbisAudioOutParamFormat::ORBIS_AUDIO_OUT_PARAM_FORMAT_S16_8CH_STD:
sampleFormat = SDL_AUDIO_S16;
port.channels_num = 8;
port.sample_size = 2;
break;
case OrbisAudioOutParam::ORBIS_AUDIO_OUT_PARAM_FORMAT_FLOAT_8CH_STD:
case OrbisAudioOutParamFormat::ORBIS_AUDIO_OUT_PARAM_FORMAT_FLOAT_8CH_STD:
sampleFormat = SDL_AUDIO_F32;
port.channels_num = 8;
port.sample_size = 4;
@ -108,7 +108,7 @@ s32 SDLAudio::AudioOutOutput(s32 handle, const void* ptr) {
}
bool SDLAudio::AudioOutSetVolume(s32 handle, s32 bitflag, s32* volume) {
using Libraries::AudioOut::OrbisAudioOutParam;
using Libraries::AudioOut::OrbisAudioOutParamFormat;
std::scoped_lock lock{m_mutex};
auto& port = portsOut[handle - 1];
if (!port.isOpen) {
@ -119,8 +119,9 @@ bool SDLAudio::AudioOutSetVolume(s32 handle, s32 bitflag, s32* volume) {
if (bit == 1) {
int src_index = i;
if (port.format == OrbisAudioOutParam::ORBIS_AUDIO_OUT_PARAM_FORMAT_FLOAT_8CH_STD ||
port.format == OrbisAudioOutParam::ORBIS_AUDIO_OUT_PARAM_FORMAT_S16_8CH_STD) {
if (port.format ==
OrbisAudioOutParamFormat::ORBIS_AUDIO_OUT_PARAM_FORMAT_FLOAT_8CH_STD ||
port.format == OrbisAudioOutParamFormat::ORBIS_AUDIO_OUT_PARAM_FORMAT_S16_8CH_STD) {
switch (i) {
case 4:
src_index = 6;

View File

@ -15,7 +15,7 @@ public:
virtual ~SDLAudio() = default;
int AudioOutOpen(int type, u32 samples_num, u32 freq,
Libraries::AudioOut::OrbisAudioOutParam format);
Libraries::AudioOut::OrbisAudioOutParamFormat format);
s32 AudioOutOutput(s32 handle, const void* ptr);
bool AudioOutSetVolume(s32 handle, s32 bitflag, s32* volume);
bool AudioOutGetStatus(s32 handle, int* type, int* channels_num);

View File

@ -18,7 +18,7 @@ static std::string logFilter;
static std::string logType = "sync";
static bool isDebugDump = false;
static bool isLibc = true;
static bool isShowSplash = true;
static bool isShowSplash = false;
static bool isNullGpu = false;
static bool shouldDumpShaders = false;
static bool shouldDumpPM4 = false;

View File

@ -33,7 +33,7 @@ static std::string_view GetAudioOutPort(u32 port) {
}
}
static std::string_view GetAudioOutParam(u32 param) {
static std::string_view GetAudioOutParamFormat(OrbisAudioOutParamFormat param) {
switch (param) {
case ORBIS_AUDIO_OUT_PARAM_FORMAT_S16_MONO:
return "S16_MONO";
@ -56,6 +56,19 @@ static std::string_view GetAudioOutParam(u32 param) {
}
}
static std::string_view GetAudioOutParamAttr(OrbisAudioOutParamAttr attr) {
switch (attr) {
case ORBIS_AUDIO_OUT_PARAM_ATTR_NONE:
return "NONE";
case ORBIS_AUDIO_OUT_PARAM_ATTR_RESTRICTED:
return "RESTRICTED";
case ORBIS_AUDIO_OUT_PARAM_ATTR_MIX_TO_MAIN:
return "MIX_TO_MAIN";
default:
return "INVALID";
}
}
int PS4_SYSV_ABI sceAudioOutDeviceIdOpen() {
LOG_ERROR(Lib_AudioOut, "(STUBBED) called");
return ORBIS_OK;
@ -259,12 +272,14 @@ int PS4_SYSV_ABI sceAudioOutMbusInit() {
s32 PS4_SYSV_ABI sceAudioOutOpen(UserService::OrbisUserServiceUserId user_id,
OrbisAudioOutPort port_type, s32 index, u32 length,
u32 sample_rate, OrbisAudioOutParam param_type) {
u32 sample_rate,
OrbisAudioOutParamExtendedInformation param_type) {
LOG_INFO(Lib_AudioOut,
"AudioOutOpen id = {} port_type = {} index = {} lenght= {} sample_rate = {} "
"param_type = {}",
"param_type = {} attr = {}",
user_id, GetAudioOutPort(port_type), index, length, sample_rate,
GetAudioOutParam(param_type));
GetAudioOutParamFormat(param_type.data_format),
GetAudioOutParamAttr(param_type.attributes));
if ((port_type < 0 || port_type > 4) && (port_type != 127)) {
LOG_ERROR(Lib_AudioOut, "Invalid port type");
return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT_TYPE;
@ -273,10 +288,6 @@ s32 PS4_SYSV_ABI sceAudioOutOpen(UserService::OrbisUserServiceUserId user_id,
LOG_ERROR(Lib_AudioOut, "Invalid sample rate");
return ORBIS_AUDIO_OUT_ERROR_INVALID_SAMPLE_FREQ;
}
if (param_type < 0 || param_type > 7) {
LOG_ERROR(Lib_AudioOut, "Invalid format");
return ORBIS_AUDIO_OUT_ERROR_INVALID_FORMAT;
}
if (length != 256 && length != 512 && length != 768 && length != 1024 && length != 1280 &&
length != 1536 && length != 1792 && length != 2048) {
LOG_ERROR(Lib_AudioOut, "Invalid length");
@ -285,7 +296,18 @@ s32 PS4_SYSV_ABI sceAudioOutOpen(UserService::OrbisUserServiceUserId user_id,
if (index != 0) {
LOG_ERROR(Lib_AudioOut, "index is not valid !=0 {}", index);
}
int result = audio->AudioOutOpen(port_type, length, sample_rate, param_type);
OrbisAudioOutParamFormat format = param_type.data_format;
if (format < 0 || format > 7) {
LOG_ERROR(Lib_AudioOut, "Invalid format");
return ORBIS_AUDIO_OUT_ERROR_INVALID_FORMAT;
}
OrbisAudioOutParamAttr attr = param_type.attributes;
if (attr < 0 || attr > 2) {
// TODO Handle attributes in output audio device
LOG_ERROR(Lib_AudioOut, "Invalid format attribute");
return ORBIS_AUDIO_OUT_ERROR_INVALID_FORMAT;
}
int result = audio->AudioOutOpen(port_type, length, sample_rate, format);
if (result == -1) {
LOG_ERROR(Lib_AudioOut, "Audio ports are full");
return ORBIS_AUDIO_OUT_ERROR_PORT_FULL;

View File

@ -3,6 +3,8 @@
#pragma once
#include "common/bit_field.h"
#include "core/libraries/system/userservice.h"
namespace Libraries::AudioOut {
@ -18,7 +20,7 @@ enum OrbisAudioOutPort {
ORBIS_AUDIO_OUT_PORT_TYPE_AUX = 127
};
enum OrbisAudioOutParam {
enum OrbisAudioOutParamFormat {
ORBIS_AUDIO_OUT_PARAM_FORMAT_S16_MONO = 0,
ORBIS_AUDIO_OUT_PARAM_FORMAT_S16_STEREO = 1,
ORBIS_AUDIO_OUT_PARAM_FORMAT_S16_8CH = 2,
@ -29,6 +31,22 @@ enum OrbisAudioOutParam {
ORBIS_AUDIO_OUT_PARAM_FORMAT_FLOAT_8CH_STD = 7
};
enum OrbisAudioOutParamAttr {
ORBIS_AUDIO_OUT_PARAM_ATTR_NONE = 0,
ORBIS_AUDIO_OUT_PARAM_ATTR_RESTRICTED = 1,
ORBIS_AUDIO_OUT_PARAM_ATTR_MIX_TO_MAIN = 2,
};
struct OrbisAudioOutParamExtendedInformation {
union {
BitField<0, 8, OrbisAudioOutParamFormat> data_format;
BitField<8, 8, u32> reserve0;
BitField<16, 4, OrbisAudioOutParamAttr> attributes;
BitField<20, 10, u32> reserve1;
BitField<31, 1, u32> unused;
};
};
struct OrbisAudioOutOutputParam {
s32 handle;
const void* ptr;
@ -80,7 +98,7 @@ int PS4_SYSV_ABI sceAudioOutMasteringTerm();
int PS4_SYSV_ABI sceAudioOutMbusInit();
s32 PS4_SYSV_ABI sceAudioOutOpen(UserService::OrbisUserServiceUserId user_id,
OrbisAudioOutPort port_type, s32 index, u32 length,
u32 sample_rate, OrbisAudioOutParam param_type);
u32 sample_rate, OrbisAudioOutParamExtendedInformation param_type);
int PS4_SYSV_ABI sceAudioOutOpenEx();
s32 PS4_SYSV_ABI sceAudioOutOutput(s32 handle, const void* ptr);
s32 PS4_SYSV_ABI sceAudioOutOutputs(OrbisAudioOutOutputParam* param, u32 num);

View File

@ -3,6 +3,7 @@
#include "common/assert.h"
#include "common/config.h"
#include "common/debug.h"
#include "common/logging/log.h"
#include "common/path_util.h"
#include "common/slot_vector.h"
@ -264,6 +265,7 @@ static_assert(CtxInitSequence400.size() == 0x61);
// In case if `submitDone` is issued we need to block submissions until GPU idle
static u32 submission_lock{};
std::condition_variable cv_lock{};
static std::mutex m_submission{};
static u64 frames_submitted{}; // frame counter
static bool send_init_packet{true}; // initialize HW state before first game's submit in a frame
@ -277,6 +279,18 @@ struct AscQueueInfo {
static Common::SlotVector<AscQueueInfo> asc_queues{};
static constexpr VAddr tessellation_factors_ring_addr = 0xFF0000000ULL;
static void ResetSubmissionLock(Platform::InterruptId irq) {
std::unique_lock lock{m_submission};
submission_lock = 0;
cv_lock.notify_all();
}
static void WaitGpuIdle() {
HLE_TRACE;
std::unique_lock lock{m_submission};
cv_lock.wait(lock, [] { return submission_lock == 0; });
}
static void DumpCommandList(std::span<const u32> cmd_list, const std::string& postfix) {
using namespace Common::FS;
const auto dump_dir = GetUserPath(PathType::PM4Dir);
@ -465,14 +479,9 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) {
return;
}
std::unique_lock lock{m_submission};
if (submission_lock != 0) {
liverpool->WaitGpuIdle();
WaitGpuIdle();
// Suspend logic goes here
submission_lock = 0;
}
/* Suspend logic goes here */
auto vqid = gnm_vqid - 1;
auto& asc_queue = asc_queues[{vqid}];
@ -863,9 +872,9 @@ int PS4_SYSV_ABI sceGnmEndWorkload() {
return ORBIS_OK;
}
int PS4_SYSV_ABI sceGnmFindResourcesPublic() {
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
return ORBIS_OK;
s32 PS4_SYSV_ABI sceGnmFindResourcesPublic() {
LOG_TRACE(Lib_GnmDriver, "called");
return ORBIS_GNM_ERROR_FAILURE; // not available in retail FW
}
void PS4_SYSV_ABI sceGnmFlushGarlic() {
@ -1321,7 +1330,7 @@ s32 PS4_SYSV_ABI sceGnmSetEmbeddedPsShader(u32* cmdbuf, u32 size, u32 shader_id,
if (shader_id > 1) {
LOG_ERROR(Lib_GnmDriver, "Unknown shader id {}", shader_id);
return 0x8eee00ff;
return ORBIS_GNM_ERROR_FAILURE;
}
// clang-format off
@ -1391,7 +1400,7 @@ s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id,
if (shader_id != 0) {
LOG_ERROR(Lib_GnmDriver, "Unknown shader id {}", shader_id);
return 0x8eee00ff;
return ORBIS_GNM_ERROR_FAILURE;
}
// A fullscreen triangle with one uv set
@ -1930,13 +1939,9 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[
}
}
if (submission_lock != 0) {
liverpool->WaitGpuIdle();
WaitGpuIdle();
// Suspend logic goes here
submission_lock = 0;
}
/* Suspend logic goes here */
if (send_init_packet) {
if (sdk_version <= 0x1ffffffu) {
@ -1990,7 +1995,6 @@ int PS4_SYSV_ABI sceGnmSubmitDone() {
if (!liverpool->IsGpuIdle()) {
submission_lock = true;
}
liverpool->NotifySubmitDone();
send_init_packet = true;
++frames_submitted;
return ORBIS_OK;
@ -2471,6 +2475,9 @@ void RegisterlibSceGnmDriver(Core::Loader::SymbolsResolver* sym) {
sdk_version = 0;
}
Platform::IrqC::Instance()->Register(Platform::InterruptId::GpuIdle, ResetSubmissionLock,
nullptr);
LIB_FUNCTION("b0xyllnVY-I", "libSceGnmDriver", 1, "libSceGnmDriver", 1, 1, sceGnmAddEqEvent);
LIB_FUNCTION("b08AgtPlHPg", "libSceGnmDriver", 1, "libSceGnmDriver", 1, 1,
sceGnmAreSubmitsAllowed);

View File

@ -75,7 +75,7 @@ int PS4_SYSV_ABI sceGnmDriverInternalVirtualQuery();
int PS4_SYSV_ABI sceGnmDriverTraceInProgress();
int PS4_SYSV_ABI sceGnmDriverTriggerCapture();
int PS4_SYSV_ABI sceGnmEndWorkload();
int PS4_SYSV_ABI sceGnmFindResourcesPublic();
s32 PS4_SYSV_ABI sceGnmFindResourcesPublic();
void PS4_SYSV_ABI sceGnmFlushGarlic();
int PS4_SYSV_ABI sceGnmGetCoredumpAddress();
int PS4_SYSV_ABI sceGnmGetCoredumpMode();

View File

@ -243,6 +243,7 @@ int MemoryManager::VirtualQuery(VAddr addr, int flags,
if (vma.type == VMAType::Direct) {
const auto dmem_it = FindDmemArea(vma.phys_base);
ASSERT(dmem_it != dmem_map.end());
info->offset = vma.phys_base;
info->memory_type = dmem_it->second.memory_type;
}

View File

@ -26,6 +26,7 @@ enum class InterruptId : u32 {
Compute6RelMem = 6u,
GfxEop = 7u,
GfxFlip = 8u,
GpuIdle = 9u,
};
using IrqHandler = std::function<void(InterruptId)>;

View File

@ -3,15 +3,31 @@
#pragma once
#include <QCoreApplication>
#include <QDesktopServices>
#include <QFile>
#include <QHeaderView>
#include <QImage>
#include <QMenu>
#include <QMessageBox>
#include <QPixmap>
#include <QStandardPaths>
#include <QTableWidget>
#include <QTextStream>
#include <QTreeWidget>
#include <QTreeWidgetItem>
#include "game_info.h"
#include "trophy_viewer.h"
#ifdef Q_OS_WIN
#include <ShlObj.h>
#include <Windows.h>
#include <objbase.h>
#include <shlguid.h>
#include <shobjidl.h>
#endif
class GuiContextMenus : public QObject {
Q_OBJECT
public:
@ -27,13 +43,16 @@ public:
// Setup menu.
QMenu menu(widget);
QAction createShortcut("Create Shortcut", widget);
QAction openFolder("Open Game Folder", widget);
QAction openSfoViewer("SFO Viewer", widget);
QAction openTrophyViewer("Trophy Viewer", widget);
menu.addAction(&createShortcut);
menu.addAction(&openFolder);
menu.addAction(&openSfoViewer);
menu.addAction(&openTrophyViewer);
// Show menu.
auto selected = menu.exec(global_pos);
if (!selected) {
@ -105,6 +124,73 @@ public:
connect(widget->parent(), &QWidget::destroyed, trophyViewer,
[widget, trophyViewer]() { trophyViewer->deleteLater(); });
}
if (selected == &createShortcut) {
QString targetPath = QString::fromStdString(m_games[itemID].path);
QString ebootPath = targetPath + "/eboot.bin";
// Get the full path to the icon
QString iconPath = QString::fromStdString(m_games[itemID].icon_path);
QFileInfo iconFileInfo(iconPath);
QString icoPath = iconFileInfo.absolutePath() + "/" + iconFileInfo.baseName() + ".ico";
// Path to shortcut/link
QString linkPath;
// Path to the shadps4.exe executable
QString exePath;
#ifdef Q_OS_WIN
linkPath = QStandardPaths::writableLocation(QStandardPaths::DesktopLocation) + "/" +
QString::fromStdString(m_games[itemID].name)
.remove(QRegularExpression("[\\\\/:*?\"<>|]")) +
".lnk";
exePath = QCoreApplication::applicationFilePath().replace("\\", "/");
#else
linkPath = QStandardPaths::writableLocation(QStandardPaths::DesktopLocation) + "/" +
QString::fromStdString(m_games[itemID].name)
.remove(QRegularExpression("[\\\\/:*?\"<>|]")) +
".desktop";
#endif
// Convert the icon to .ico if necessary
if (iconFileInfo.suffix().toLower() == "png") {
// Convert icon from PNG to ICO
if (convertPngToIco(iconPath, icoPath)) {
#ifdef Q_OS_WIN
if (createShortcutWin(linkPath, ebootPath, icoPath, exePath)) {
#else
if (createShortcutLinux(linkPath, ebootPath, iconPath)) {
#endif
QMessageBox::information(
nullptr, "Shortcut Creation",
QString("Shortcut created successfully:\n %1").arg(linkPath));
} else {
QMessageBox::critical(
nullptr, "Error",
QString("Error creating shortcut:\n %1").arg(linkPath));
}
} else {
QMessageBox::critical(nullptr, "Error", "Failed to convert icon.");
}
} else {
// If the icon is already in ICO format, we just create the shortcut
#ifdef Q_OS_WIN
if (createShortcutWin(linkPath, ebootPath, iconPath, exePath)) {
#else
if (createShortcutLinux(linkPath, ebootPath, iconPath)) {
#endif
QMessageBox::information(
nullptr, "Shortcut Creation",
QString("Shortcut created successfully:\n %1").arg(linkPath));
} else {
QMessageBox::critical(nullptr, "Error",
QString("Error creating shortcut:\n %1").arg(linkPath));
}
}
}
}
int GetRowIndex(QTreeWidget* treeWidget, QTreeWidgetItem* item) {
@ -155,4 +241,88 @@ public:
InstallDragDropPkg(path, 1, 1);
}
}
private:
bool convertPngToIco(const QString& pngFilePath, const QString& icoFilePath) {
// Load the PNG image
QImage image(pngFilePath);
if (image.isNull()) {
return false;
}
// Scale the image to the default icon size (256x256 pixels)
QImage scaledImage =
image.scaled(QSize(256, 256), Qt::KeepAspectRatio, Qt::SmoothTransformation);
// Convert the image to QPixmap
QPixmap pixmap = QPixmap::fromImage(scaledImage);
// Save the pixmap as an ICO file
if (pixmap.save(icoFilePath, "ICO")) {
return true;
} else {
return false;
}
}
#ifdef Q_OS_WIN
bool createShortcutWin(const QString& linkPath, const QString& targetPath,
const QString& iconPath, const QString& exePath) {
CoInitializeEx(nullptr, COINIT_APARTMENTTHREADED);
// Create the ShellLink object
IShellLink* pShellLink = nullptr;
HRESULT hres = CoCreateInstance(CLSID_ShellLink, nullptr, CLSCTX_INPROC_SERVER,
IID_IShellLink, (LPVOID*)&pShellLink);
if (SUCCEEDED(hres)) {
// Defines the path to the program executable
pShellLink->SetPath((LPCWSTR)exePath.utf16());
// Sets the home directory ("Start in")
pShellLink->SetWorkingDirectory((LPCWSTR)QFileInfo(exePath).absolutePath().utf16());
// Set arguments, eboot.bin file location
QString arguments = QString("\"%1\"").arg(targetPath);
pShellLink->SetArguments((LPCWSTR)arguments.utf16());
// Set the icon for the shortcut
pShellLink->SetIconLocation((LPCWSTR)iconPath.utf16(), 0);
// Save the shortcut
IPersistFile* pPersistFile = nullptr;
hres = pShellLink->QueryInterface(IID_IPersistFile, (LPVOID*)&pPersistFile);
if (SUCCEEDED(hres)) {
hres = pPersistFile->Save((LPCWSTR)linkPath.utf16(), TRUE);
pPersistFile->Release();
}
pShellLink->Release();
}
CoUninitialize();
return SUCCEEDED(hres);
}
#else
bool createShortcutLinux(const QString& linkPath, const QString& targetPath,
const QString& iconPath) {
QFile shortcutFile(linkPath);
if (!shortcutFile.open(QIODevice::WriteOnly | QIODevice::Text)) {
QMessageBox::critical(nullptr, "Error",
QString("Error creating shortcut:\n %1").arg(linkPath));
return false;
}
QTextStream out(&shortcutFile);
out << "[Desktop Entry]\n";
out << "Version=1.0\n";
out << "Name=" << QFileInfo(linkPath).baseName() << "\n";
out << "Exec=" << QCoreApplication::applicationFilePath() << " \"" << targetPath << "\"\n";
out << "Icon=" << iconPath << "\n";
out << "Terminal=false\n";
out << "Type=Application\n";
shortcutFile.close();
return true;
}
#endif
};

View File

@ -8,10 +8,16 @@
#include "qt_gui/game_install_dialog.h"
#include "qt_gui/main_window.h"
#include <emulator.h>
#include <fmt/core.h>
// Custom message handler to ignore Qt logs
void customMessageHandler(QtMsgType, const QMessageLogContext&, const QString&) {}
int main(int argc, char* argv[]) {
QApplication a(argc, argv);
// Load configurations and initialize Qt application
const auto config_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir);
Config::load(config_dir / "config.toml");
QString gameDataPath = qApp->applicationDirPath() + "/game_data/";
@ -23,14 +29,25 @@ int main(int argc, char* argv[]) {
#endif
std::filesystem::create_directory(path);
// Check if the game install directory is set
if (Config::getGameInstallDir() == "") {
GameInstallDialog dlg;
dlg.exec();
}
qInstallMessageHandler(customMessageHandler); // ignore qt logs.
// Ignore Qt logs
qInstallMessageHandler(customMessageHandler);
// Initialize the main window
MainWindow* m_main_window = new MainWindow(nullptr);
m_main_window->Init();
// Check for command line arguments
if (argc > 1) {
Core::Emulator emulator;
emulator.Run(argv[1]);
}
// Run the Qt application
return a.exec();
}
}

View File

@ -135,15 +135,33 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) {
if (IR::IsParam(attr)) {
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
const auto& param{ctx.input_params.at(index)};
if (!ValidId(param.id)) {
// Attribute is disabled or varying component is not written
return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f);
}
if (param.num_components > 1) {
const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))};
return ctx.OpLoad(param.component_type, pointer);
if (param.buffer_handle < 0) {
if (!ValidId(param.id)) {
// Attribute is disabled or varying component is not written
return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f);
}
if (param.num_components > 1) {
const Id pointer{
ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))};
return ctx.OpLoad(param.component_type, pointer);
} else {
return ctx.OpLoad(param.component_type, param.id);
}
} else {
return ctx.OpLoad(param.component_type, param.id);
const auto rate_idx = param.id.value == 0 ? ctx.u32_zero_value : ctx.u32_one_value;
const auto step_rate = ctx.OpLoad(
ctx.U32[1],
ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]),
ctx.instance_step_rates, rate_idx));
const auto offset = ctx.OpIAdd(
ctx.U32[1],
ctx.OpIMul(
ctx.U32[1],
ctx.OpUDiv(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id), step_rate),
ctx.ConstU32(param.num_components)),
ctx.ConstU32(comp));
return EmitReadConstBuffer(ctx, param.buffer_handle, offset);
}
}
switch (attr) {

View File

@ -51,7 +51,11 @@ Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c));
}
Id EmitFPMax32(EmitContext& ctx, Id a, Id b) {
Id EmitFPMax32(EmitContext& ctx, Id a, Id b, bool is_legacy) {
if (is_legacy) {
return ctx.OpNMax(ctx.F32[1], a, b);
}
return ctx.OpFMax(ctx.F32[1], a, b);
}
@ -59,7 +63,11 @@ Id EmitFPMax64(EmitContext& ctx, Id a, Id b) {
return ctx.OpFMax(ctx.F64[1], a, b);
}
Id EmitFPMin32(EmitContext& ctx, Id a, Id b) {
Id EmitFPMin32(EmitContext& ctx, Id a, Id b, bool is_legacy) {
if (is_legacy) {
return ctx.OpNMin(ctx.F32[1], a, b);
}
return ctx.OpFMin(ctx.F32[1], a, b);
}

View File

@ -165,9 +165,9 @@ Id EmitFPSub32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
Id EmitFPMax32(EmitContext& ctx, Id a, Id b);
Id EmitFPMax32(EmitContext& ctx, Id a, Id b, bool is_legacy = false);
Id EmitFPMax64(EmitContext& ctx, Id a, Id b);
Id EmitFPMin32(EmitContext& ctx, Id a, Id b);
Id EmitFPMin32(EmitContext& ctx, Id a, Id b, bool is_legacy = false);
Id EmitFPMin64(EmitContext& ctx, Id a, Id b);
Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);

View File

@ -171,17 +171,47 @@ Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
void EmitContext::DefineInputs(const Info& info) {
switch (stage) {
case Stage::Vertex:
case Stage::Vertex: {
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
// Create push constants block for instance steps rates
const Id struct_type{Name(TypeStruct(U32[1], U32[1]), "instance_step_rates")};
Decorate(struct_type, spv::Decoration::Block);
MemberName(struct_type, 0, "sr0");
MemberName(struct_type, 1, "sr1");
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U);
instance_step_rates = DefineVar(struct_type, spv::StorageClass::PushConstant);
Name(instance_step_rates, "step_rates");
interfaces.push_back(instance_step_rates);
for (const auto& input : info.vs_inputs) {
const Id type{GetAttributeType(*this, input.fmt)};
const Id id{DefineInput(type, input.binding)};
Name(id, fmt::format("vs_in_attr{}", input.binding));
input_params[input.binding] = GetAttributeInfo(input.fmt, id);
interfaces.push_back(id);
if (input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ||
input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate1) {
const u32 rate_idx =
input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ? 0
: 1;
// Note that we pass index rather than Id
input_params[input.binding] = {
rate_idx, input_u32, U32[1], input.num_components, input.instance_data_buf,
};
} else {
Id id{DefineInput(type, input.binding)};
if (input.instance_step_rate == Info::VsInput::InstanceIdType::Plain) {
Name(id, fmt::format("vs_instance_attr{}", input.binding));
} else {
Name(id, fmt::format("vs_in_attr{}", input.binding));
}
input_params[input.binding] = GetAttributeInfo(input.fmt, id);
interfaces.push_back(id);
}
}
break;
}
case Stage::Fragment:
if (info.uses_group_quad) {
subgroup_local_invocation_id = DefineVariable(
@ -276,7 +306,10 @@ void EmitContext::DefineBuffers(const Info& info) {
if (std::ranges::find(type_ids, record_array_type.value, &Id::value) == type_ids.end()) {
Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
const auto name =
fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT);
buffer.is_instance_data
? fmt::format("{}_instance_data{}_{}{}", stage, i, 'f',
sizeof(float) * CHAR_BIT)
: fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT);
Name(struct_type, name);
Decorate(struct_type, spv::Decoration::Block);
MemberName(struct_type, 0, "data");
@ -317,6 +350,14 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
return spv::ImageFormat::Rg32f;
}
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32_32 &&
image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) {
return spv::ImageFormat::Rg32ui;
}
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32_32_32_32 &&
image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) {
return spv::ImageFormat::Rgba32ui;
}
if (image.GetDataFmt() == AmdGpu::DataFormat::Format16 &&
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
return spv::ImageFormat::R16f;

View File

@ -165,6 +165,8 @@ public:
Id output_position{};
Id vertex_index{};
Id instance_id{};
Id instance_step_rates{};
Id base_vertex{};
Id frag_coord{};
Id front_facing{};
@ -214,6 +216,7 @@ public:
Id pointer_type;
Id component_type;
u32 num_components;
s32 buffer_handle{-1};
};
std::array<SpirvAttribute, 32> input_params{};
std::array<SpirvAttribute, 32> output_params{};

View File

@ -235,9 +235,22 @@ void Translator::EmitFetch(const GcnInst& inst) {
ir.SetVectorReg(dst_reg++, comp);
}
if (attrib.instance_data == 2 || attrib.instance_data == 3) {
LOG_WARNING(Render_Recompiler, "Unsupported instance step rate = {}",
attrib.instance_data);
// In case of programmable step rates we need to fallback to instance data pulling in
// shader, so VBs should be bound as regular data buffers
s32 instance_buf_handle = -1;
const auto step_rate = static_cast<Info::VsInput::InstanceIdType>(attrib.instance_data);
if (step_rate == Info::VsInput::OverStepRate0 ||
step_rate == Info::VsInput::OverStepRate1) {
info.buffers.push_back({
.sgpr_base = attrib.sgpr_base,
.dword_offset = attrib.dword_offset,
.stride = buffer.GetStride(),
.num_records = buffer.num_records,
.used_types = IR::Type::F32,
.is_storage = true, // we may not fit into UBO with large meshes
.is_instance_data = true,
});
instance_buf_handle = s32(info.buffers.size() - 1);
}
const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt());
@ -247,7 +260,8 @@ void Translator::EmitFetch(const GcnInst& inst) {
.num_components = std::min<u16>(attrib.num_elements, num_components),
.sgpr_base = attrib.sgpr_base,
.dword_offset = attrib.dword_offset,
.instance_step_rate = static_cast<Info::VsInput::InstanceIdType>(attrib.instance_data),
.instance_step_rate = step_rate,
.instance_data_buf = instance_buf_handle,
});
}
}
@ -625,6 +639,9 @@ void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_l
case Opcode::V_MIN3_F32:
translator.V_MIN3_F32(inst);
break;
case Opcode::V_MIN_LEGACY_F32:
translator.V_MIN_F32(inst, true);
break;
case Opcode::V_MADMK_F32:
translator.V_MADMK_F32(inst);
break;
@ -875,6 +892,9 @@ void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_l
case Opcode::V_MAD_LEGACY_F32:
translator.V_MAD_F32(inst);
break;
case Opcode::V_MAX_LEGACY_F32:
translator.V_MAX_F32(inst, true);
break;
case Opcode::V_RSQ_LEGACY_F32:
case Opcode::V_RSQ_CLAMP_F32:
translator.V_RSQ_F32(inst);

View File

@ -111,14 +111,14 @@ public:
void V_RCP_F32(const GcnInst& inst);
void V_FMA_F32(const GcnInst& inst);
void V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst);
void V_MAX_F32(const GcnInst& inst);
void V_MAX_F32(const GcnInst& inst, bool is_legacy = false);
void V_MAX_U32(bool is_signed, const GcnInst& inst);
void V_RSQ_F32(const GcnInst& inst);
void V_SIN_F32(const GcnInst& inst);
void V_LOG_F32(const GcnInst& inst);
void V_EXP_F32(const GcnInst& inst);
void V_SQRT_F32(const GcnInst& inst);
void V_MIN_F32(const GcnInst& inst);
void V_MIN_F32(const GcnInst& inst, bool is_legacy = false);
void V_MIN3_F32(const GcnInst& inst);
void V_MADMK_F32(const GcnInst& inst);
void V_CUBEMA_F32(const GcnInst& inst);
@ -133,7 +133,7 @@ public:
void V_MUL_HI_U32(bool is_signed, const GcnInst& inst);
void V_SAD_U32(const GcnInst& inst);
void V_BFE_U32(bool is_signed, const GcnInst& inst);
void V_MAD_I32_I24(const GcnInst& inst);
void V_MAD_I32_I24(const GcnInst& inst, bool is_signed = true);
void V_MUL_I32_I24(const GcnInst& inst);
void V_SUB_I32(const GcnInst& inst);
void V_LSHR_B32(const GcnInst& inst);

View File

@ -203,10 +203,10 @@ void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) {
}
}
void Translator::V_MAX_F32(const GcnInst& inst) {
void Translator::V_MAX_F32(const GcnInst& inst, bool is_legacy) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src1{GetSrc(inst.src[1], true)};
SetDst(inst.dst[0], ir.FPMax(src0, src1));
SetDst(inst.dst[0], ir.FPMax(src0, src1, is_legacy));
}
void Translator::V_MAX_U32(bool is_signed, const GcnInst& inst) {
@ -240,10 +240,10 @@ void Translator::V_SQRT_F32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.FPSqrt(src0));
}
void Translator::V_MIN_F32(const GcnInst& inst) {
void Translator::V_MIN_F32(const GcnInst& inst, bool is_legacy) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src1{GetSrc(inst.src[1], true)};
SetDst(inst.dst[0], ir.FPMin(src0, src1));
SetDst(inst.dst[0], ir.FPMin(src0, src1, is_legacy));
}
void Translator::V_MIN3_F32(const GcnInst& inst) {
@ -361,9 +361,11 @@ void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) {
SetDst(inst.dst[0], ir.BitFieldExtract(src0, src1, src2, is_signed));
}
void Translator::V_MAD_I32_I24(const GcnInst& inst) {
const IR::U32 src0{ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0), ir.Imm32(24), true)};
const IR::U32 src1{ir.BitFieldExtract(GetSrc(inst.src[1]), ir.Imm32(0), ir.Imm32(24), true)};
void Translator::V_MAD_I32_I24(const GcnInst& inst, bool is_signed) {
const IR::U32 src0{
ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0), ir.Imm32(24), is_signed)};
const IR::U32 src1{
ir.BitFieldExtract(GetSrc(inst.src[1]), ir.Imm32(0), ir.Imm32(24), is_signed)};
const IR::U32 src2{GetSrc(inst.src[2])};
SetDst(inst.dst[0], ir.IAdd(ir.IMul(src0, src1), src2));
}
@ -393,8 +395,7 @@ void Translator::V_ASHRREV_I32(const GcnInst& inst) {
}
void Translator::V_MAD_U32_U24(const GcnInst& inst) {
// TODO:
V_MAD_I32_I24(inst);
V_MAD_I32_I24(inst, false);
}
void Translator::V_RNDNE_F32(const GcnInst& inst) {
@ -518,7 +519,38 @@ void Translator::V_CVT_FLR_I32_F32(const GcnInst& inst) {
}
void Translator::V_CMP_CLASS_F32(const GcnInst& inst) {
UNREACHABLE();
constexpr u32 SIGNALING_NAN = 1 << 0;
constexpr u32 QUIET_NAN = 1 << 1;
constexpr u32 NEGATIVE_INFINITY = 1 << 2;
constexpr u32 NEGATIVE_NORMAL = 1 << 3;
constexpr u32 NEGATIVE_DENORM = 1 << 4;
constexpr u32 NEGATIVE_ZERO = 1 << 5;
constexpr u32 POSITIVE_ZERO = 1 << 6;
constexpr u32 POSITIVE_DENORM = 1 << 7;
constexpr u32 POSITIVE_NORMAL = 1 << 8;
constexpr u32 POSITIVE_INFINITY = 1 << 9;
const IR::F32F64 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
if (src1.IsImmediate()) {
const u32 class_mask = src1.U32();
IR::U1 value;
if ((class_mask & (SIGNALING_NAN | QUIET_NAN)) == (SIGNALING_NAN | QUIET_NAN)) {
value = ir.FPIsNan(src0);
} else if ((class_mask & (POSITIVE_INFINITY | NEGATIVE_INFINITY)) ==
(POSITIVE_INFINITY | NEGATIVE_INFINITY)) {
value = ir.FPIsInf(src0);
} else {
UNREACHABLE();
}
if (inst.dst[1].field == OperandField::VccLo) {
return ir.SetVcc(value);
} else {
UNREACHABLE();
}
} else {
UNREACHABLE();
}
}
} // namespace Shader::Gcn

View File

@ -165,13 +165,14 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) {
if (!flags.test(MimgModifier::Pcf)) {
return ir.ImageGather(handle, body, offset, {}, info);
}
ASSERT(mimg.dmask & 1); // should be always 1st (R) component
return ir.ImageGatherDref(handle, body, offset, {}, dref, info);
}();
// For gather4 instructions dmask selects which component to read and must have
// only one bit set to 1
ASSERT_MSG(std::popcount(mimg.dmask) == 1, "Unexpected bits in gather dmask");
for (u32 i = 0; i < 4; i++) {
if (((mimg.dmask >> i) & 1) == 0) {
continue;
}
const IR::F32 value = IR::F32{ir.CompositeExtract(texel, i)};
ir.SetVectorReg(dest_reg++, value);
}

View File

@ -865,28 +865,35 @@ U1 IREmitter::FPUnordered(const F32F64& lhs, const F32F64& rhs) {
return LogicalOr(FPIsNan(lhs), FPIsNan(rhs));
}
F32F64 IREmitter::FPMax(const F32F64& lhs, const F32F64& rhs) {
F32F64 IREmitter::FPMax(const F32F64& lhs, const F32F64& rhs, bool is_legacy) {
if (lhs.Type() != rhs.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type());
}
switch (lhs.Type()) {
case Type::F32:
return Inst<F32>(Opcode::FPMax32, lhs, rhs);
return Inst<F32>(Opcode::FPMax32, lhs, rhs, is_legacy);
case Type::F64:
if (is_legacy) {
UNREACHABLE_MSG("F64 cannot be used with LEGACY ops");
}
return Inst<F64>(Opcode::FPMax64, lhs, rhs);
default:
ThrowInvalidType(lhs.Type());
}
}
F32F64 IREmitter::FPMin(const F32F64& lhs, const F32F64& rhs) {
F32F64 IREmitter::FPMin(const F32F64& lhs, const F32F64& rhs, bool is_legacy) {
if (lhs.Type() != rhs.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type());
}
switch (lhs.Type()) {
case Type::F32:
return Inst<F32>(Opcode::FPMin32, lhs, rhs);
return Inst<F32>(Opcode::FPMin32, lhs, rhs, is_legacy);
case Type::F64:
if (is_legacy) {
UNREACHABLE_MSG("F64 cannot be used with LEGACY ops");
}
return Inst<F64>(Opcode::FPMin64, lhs, rhs);
default:
ThrowInvalidType(lhs.Type());

View File

@ -149,8 +149,8 @@ public:
[[nodiscard]] U1 FPIsInf(const F32F64& value);
[[nodiscard]] U1 FPOrdered(const F32F64& lhs, const F32F64& rhs);
[[nodiscard]] U1 FPUnordered(const F32F64& lhs, const F32F64& rhs);
[[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs);
[[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs);
[[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, bool is_legacy = false);
[[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs, bool is_legacy = false);
[[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
[[nodiscard]] Value IAddCary(const U32& a, const U32& b);

View File

@ -154,9 +154,9 @@ OPCODE(FPAdd64, F64, F64,
OPCODE(FPSub32, F32, F32, F32, )
OPCODE(FPFma32, F32, F32, F32, F32, )
OPCODE(FPFma64, F64, F64, F64, F64, )
OPCODE(FPMax32, F32, F32, F32, )
OPCODE(FPMax32, F32, F32, F32, U1, )
OPCODE(FPMax64, F64, F64, F64, )
OPCODE(FPMin32, F32, F32, F32, )
OPCODE(FPMin32, F32, F32, F32, U1, )
OPCODE(FPMin64, F64, F64, F64, )
OPCODE(FPMul32, F32, F32, F32, )
OPCODE(FPMul64, F64, F64, F64, )

View File

@ -77,7 +77,8 @@ struct BufferResource {
u32 num_records;
IR::Type used_types;
AmdGpu::Buffer inline_cbuf;
bool is_storage;
bool is_storage{false};
bool is_instance_data{false};
constexpr AmdGpu::Buffer GetVsharp(const Info& info) const noexcept;
};
@ -116,6 +117,7 @@ struct Info {
u8 sgpr_base;
u8 dword_offset;
InstanceIdType instance_step_rate;
s32 instance_data_buf;
};
boost::container::static_vector<VsInput, 32> vs_inputs{};

View File

@ -66,21 +66,10 @@ void Liverpool::Process(std::stop_token stoken) {
}
}
if (submit_done) {
std::scoped_lock lk{submit_mutex};
submit_cv.notify_all();
submit_done = false;
}
Platform::IrqC::Instance()->Signal(Platform::InterruptId::GpuIdle);
}
}
void Liverpool::WaitGpuIdle() {
RENDERER_TRACE;
std::unique_lock lk{submit_mutex};
submit_cv.wait(lk, [this] { return num_submits == 0; });
}
Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
TracyFiberEnter(ccb_task_name);

View File

@ -887,7 +887,10 @@ struct Liverpool {
IndexBufferType index_buffer_type;
INSERT_PADDING_WORDS(0xA2A1 - 0xA29E - 2);
u32 enable_primitive_id;
INSERT_PADDING_WORDS(0xA2DF - 0xA2A1 - 1);
INSERT_PADDING_WORDS(0xA2A8 - 0xA2A1 - 1);
u32 vgt_instance_step_rate_0;
u32 vgt_instance_step_rate_1;
INSERT_PADDING_WORDS(0xA2DF - 0xA2A9 - 1);
PolygonOffset poly_offset;
INSERT_PADDING_WORDS(0xA2F8 - 0xA2DF - 5);
AaConfig aa_config;
@ -937,18 +940,10 @@ public:
void SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb);
void SubmitAsc(u32 vqid, std::span<const u32> acb);
void WaitGpuIdle();
bool IsGpuIdle() const {
return num_submits == 0;
}
void NotifySubmitDone() {
std::scoped_lock lk{submit_mutex};
submit_done = true;
submit_cv.notify_all();
}
void BindRasterizer(Vulkan::Rasterizer* rasterizer_) {
rasterizer = rasterizer_;
}
@ -1017,7 +1012,6 @@ private:
u32 num_submits{};
std::mutex submit_mutex;
std::condition_variable_any submit_cv;
std::atomic<bool> submit_done{};
};
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);
@ -1055,6 +1049,8 @@ static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
static_assert(GFX6_3D_REG_INDEX(index_size) == 0xA29D);
static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F);
static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1);
static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_0) == 0xA2A8);
static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_1) == 0xA2A9);
static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF);
static_assert(GFX6_3D_REG_INDEX(aa_config) == 0xA2F8);
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318);

View File

@ -222,6 +222,8 @@ vk::CompareOp DepthCompare(AmdGpu::DepthCompare comp) {
return vk::CompareOp::eGreaterOrEqual;
case AmdGpu::DepthCompare::Always:
return vk::CompareOp::eAlways;
default:
UNREACHABLE();
}
}
@ -321,6 +323,9 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
if (data_format == AmdGpu::DataFormat::FormatBc4 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eBc4UnormBlock;
}
if (data_format == AmdGpu::DataFormat::FormatBc5 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eBc5UnormBlock;
}
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
num_format == AmdGpu::NumberFormat::Sint) {
return vk::Format::eR16G16B16A16Sint;
@ -366,6 +371,9 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
if (data_format == AmdGpu::DataFormat::Format8_8 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eR8G8Unorm;
}
if (data_format == AmdGpu::DataFormat::Format8_8 && num_format == AmdGpu::NumberFormat::Snorm) {
return vk::Format::eR8G8Snorm;
}
if (data_format == AmdGpu::DataFormat::FormatBc7 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eBc7UnormBlock;
}
@ -429,6 +437,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
if (data_format == AmdGpu::DataFormat::Format16 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eR16Unorm;
}
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eR16G16B16A16Unorm;
}
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
}

View File

@ -30,12 +30,19 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
stages[i] = *infos[i];
}
BuildDescSetLayout();
const vk::PushConstantRange push_constants = {
.stageFlags = vk::ShaderStageFlagBits::eVertex,
.offset = 0,
.size = 2 * sizeof(u32),
};
const vk::DescriptorSetLayout set_layout = *desc_layout;
const vk::PipelineLayoutCreateInfo layout_info = {
.setLayoutCount = 1U,
.pSetLayouts = &set_layout,
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &push_constants,
};
pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info);
@ -43,6 +50,12 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> attributes;
const auto& vs_info = stages[0];
for (const auto& input : vs_info.vs_inputs) {
if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 ||
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
// Skip attribute binding as the data will be pulled by shader
continue;
}
const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
attributes.push_back({
.location = input.binding,
@ -420,6 +433,11 @@ void GraphicsPipeline::BindVertexBuffers(StreamBuffer& staging) const {
// Calculate buffers memory overlaps
boost::container::static_vector<BufferRange, MaxVertexBufferCount> ranges{};
for (const auto& input : vs_info.vs_inputs) {
if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 ||
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
continue;
}
const auto& buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
if (buffer.GetSize() == 0) {
continue;

View File

@ -67,20 +67,24 @@ public:
void BindResources(Core::MemoryManager* memory, StreamBuffer& staging,
VideoCore::TextureCache& texture_cache) const;
[[nodiscard]] vk::Pipeline Handle() const noexcept {
vk::Pipeline Handle() const noexcept {
return *pipeline;
}
[[nodiscard]] bool IsEmbeddedVs() const noexcept {
vk::PipelineLayout GetLayout() const {
return *pipeline_layout;
}
bool IsEmbeddedVs() const noexcept {
static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f;
return key.stage_hashes[0] == EmbeddedVsHash;
}
[[nodiscard]] auto GetWriteMasks() const {
auto GetWriteMasks() const {
return key.write_masks;
}
[[nodiscard]] bool IsDepthEnabled() const {
bool IsDepthEnabled() const {
return key.depth.depth_enable.Value();
}

View File

@ -52,16 +52,36 @@ Instance::Instance(Frontend::WindowSDL& window, s32 physical_device_index,
LOG_INFO(Render_Vulkan, "Found {} physical devices", num_physical_devices);
if (physical_device_index < 0) {
std::vector<std::pair<size_t, vk::PhysicalDeviceProperties2>> properties2{};
std::vector<
std::tuple<size_t, vk::PhysicalDeviceProperties2, vk::PhysicalDeviceMemoryProperties>>
properties2{};
for (auto const& physical : physical_devices) {
properties2.emplace_back(properties2.size(), physical.getProperties2());
properties2.emplace_back(properties2.size(), physical.getProperties2(),
physical.getMemoryProperties());
}
std::sort(properties2.begin(), properties2.end(), [](const auto& left, const auto& right) {
if (std::get<1>(left).properties.deviceType ==
std::get<1>(right).properties.deviceType) {
const vk::PhysicalDeviceProperties& left_prop = std::get<1>(left).properties;
const vk::PhysicalDeviceProperties& right_prop = std::get<1>(right).properties;
if (left_prop.apiVersion >= TargetVulkanApiVersion &&
right_prop.apiVersion < TargetVulkanApiVersion) {
return true;
}
return std::get<1>(left).properties.deviceType == vk::PhysicalDeviceType::eDiscreteGpu;
if (left_prop.deviceType != right_prop.deviceType) {
return left_prop.deviceType == vk::PhysicalDeviceType::eDiscreteGpu;
}
constexpr auto get_mem = [](const vk::PhysicalDeviceMemoryProperties& mem) -> size_t {
size_t max = 0;
for (u32 i = 0; i < mem.memoryHeapCount; i++) {
const vk::MemoryHeap& heap = mem.memoryHeaps[i];
if (heap.flags & vk::MemoryHeapFlagBits::eDeviceLocal && heap.size > max) {
max = heap.size;
}
}
return max;
};
size_t left_mem_size = get_mem(std::get<2>(left));
size_t right_mem_size = get_mem(std::get<2>(right));
return left_mem_size > right_mem_size;
});
physical_device = physical_devices[std::get<0>(properties2[0])];
} else {

View File

@ -183,7 +183,7 @@ void PipelineCache::RefreshGraphicsKey() {
int remapped_cb{};
for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) {
auto const& col_buf = regs.color_buffers[cb];
if (!col_buf || skip_cb_binding) {
if (skip_cb_binding || !col_buf || !regs.color_target_mask.GetMask(cb)) {
continue;
}
const auto base_format =

View File

@ -54,6 +54,13 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
UpdateDynamicState(*pipeline);
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
const u32 step_rates[] = {
regs.vgt_instance_step_rate_0,
regs.vgt_instance_step_rate_1,
};
cmdbuf.pushConstants(pipeline->GetLayout(), vk::ShaderStageFlagBits::eVertex, 0u,
sizeof(step_rates), &step_rates);
if (is_indexed) {
cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0);
} else {
@ -99,6 +106,12 @@ void Rasterizer::BeginRendering() {
continue;
}
// If the color buffer is still bound but rendering to it is disabled by the target mask,
// we need to prevent the render area from being affected by unbound render target extents.
if (!regs.color_target_mask.GetMask(col_buf_id)) {
continue;
}
const auto& hint = liverpool->last_cb_extent[col_buf_id];
const auto& image_view = texture_cache.RenderTarget(col_buf, hint);
const auto& image = texture_cache.GetImage(image_view.image_id);

View File

@ -189,10 +189,14 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
case vk::Format::eR32Uint:
return vk::Format::eR32Uint;
case vk::Format::eBc1RgbaUnormBlock:
case vk::Format::eBc4UnormBlock:
case vk::Format::eR32G32Sfloat:
return vk::Format::eR32G32Uint;
case vk::Format::eBc2SrgbBlock:
case vk::Format::eBc2UnormBlock:
case vk::Format::eBc3SrgbBlock:
case vk::Format::eBc3UnormBlock:
case vk::Format::eBc5UnormBlock:
case vk::Format::eBc7SrgbBlock:
case vk::Format::eBc7UnormBlock:
return vk::Format::eR32G32B32A32Uint;