Handle immediate inline samplers (#3015)

* Handle immediate inline sampler

* Simplify inline sampler handling
This commit is contained in:
Marcin Mikołajczyk
2025-06-17 08:42:14 +02:00
committed by GitHub
parent 9dd35c3a42
commit efa8f6a154
7 changed files with 54 additions and 38 deletions

View File

@@ -1964,9 +1964,9 @@ Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, c
Value IREmitter::ImageSampleRaw(const Value& handle, const Value& address1, const Value& address2,
const Value& address3, const Value& address4,
TextureInstInfo info) {
return Inst(Opcode::ImageSampleRaw, Flags{info}, handle, address1, address2, address3,
address4);
const Value& inline_sampler, TextureInstInfo info) {
return Inst(Opcode::ImageSampleRaw, Flags{info}, handle, address1, address2, address3, address4,
inline_sampler);
}
Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias,

View File

@@ -349,7 +349,8 @@ public:
[[nodiscard]] Value ImageSampleRaw(const Value& handle, const Value& address1,
const Value& address2, const Value& address3,
const Value& address4, TextureInstInfo info);
const Value& address4, const Value& inline_sampler,
TextureInstInfo info);
[[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& body,
const F32& bias, const Value& offset,

View File

@@ -412,7 +412,7 @@ OPCODE(ConvertU8U32, U8, U32,
OPCODE(ConvertU32U8, U32, U8, )
// Image operations
OPCODE(ImageSampleRaw, F32x4, Opaque, F32x4, F32x4, F32x4, F32, )
OPCODE(ImageSampleRaw, F32x4, Opaque, F32x4, F32x4, F32x4, F32, Opaque, )
OPCODE(ImageSampleImplicitLod, F32x4, Opaque, F32x4, F32, Opaque, )
OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, F32, Opaque, )
OPCODE(ImageSampleDrefImplicitLod, F32x4, Opaque, Opaque, F32, F32, Opaque, )

View File

@@ -168,7 +168,7 @@ public:
u32 Add(const SamplerResource& desc) {
const u32 index{Add(sampler_resources, desc, [this, &desc](const auto& existing) {
return desc.sharp_idx == existing.sharp_idx;
return desc.sampler == existing.sampler;
})};
return index;
}
@@ -351,8 +351,7 @@ void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
const auto opcode = inst->GetOpcode();
if (opcode == IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler)
opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
if (opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
opcode == IR::Opcode::GetUserData) {
return inst;
}
@@ -360,9 +359,7 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
};
const auto result = IR::BreadthFirstSearch(&inst, pred);
ASSERT_MSG(result, "Unable to find image sharp source");
const IR::Inst* producer = result.value();
const bool has_sampler = producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2;
const auto tsharp_handle = has_sampler ? producer->Arg(0).InstRecursive() : producer;
const IR::Inst* tsharp_handle = result.value();
// Read image sharp.
const auto tsharp = TrackSharp(tsharp_handle, info);
@@ -427,29 +424,32 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) {
// Read sampler sharp.
const auto [sampler_binding, sampler] = [&] -> std::pair<u32, AmdGpu::Sampler> {
ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2);
const IR::Value& handle = producer->Arg(1);
const auto sampler_binding = [&] -> u32 {
const auto sampler = inst.Arg(5).InstRecursive();
ASSERT(sampler && sampler->GetOpcode() == IR::Opcode::CompositeConstructU32x4);
const auto handle = sampler->Arg(0);
// Inline sampler resource.
if (handle.IsImmediate()) {
LOG_WARNING(Render_Vulkan, "Inline sampler detected");
const auto inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()};
const auto binding = descriptors.Add(SamplerResource{
.sharp_idx = std::numeric_limits<u32>::max(),
.inline_sampler = inline_sampler,
});
return {binding, inline_sampler};
LOG_DEBUG(Render_Vulkan, "Inline sampler detected");
const auto [s1, s2, s3, s4] =
std::tuple{sampler->Arg(0), sampler->Arg(1), sampler->Arg(2), sampler->Arg(3)};
ASSERT(s1.IsImmediate() && s2.IsImmediate() && s3.IsImmediate() &&
s4.IsImmediate());
const auto inline_sampler = AmdGpu::Sampler{
.raw0 = u64(s2.U32()) << 32 | u64(s1.U32()),
.raw1 = u64(s4.U32()) << 32 | u64(s3.U32()),
};
const auto binding = descriptors.Add(SamplerResource{inline_sampler});
return binding;
} else {
// Normal sampler resource.
const auto ssharp_handle = handle.InstRecursive();
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
const auto ssharp = TrackSharp(ssharp_ud, info);
const auto binding =
descriptors.Add(SamplerResource{ssharp, image_binding, disable_aniso});
return binding;
}
// Normal sampler resource.
const auto ssharp_handle = handle.InstRecursive();
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
const auto ssharp = TrackSharp(ssharp_ud, info);
const auto binding = descriptors.Add(SamplerResource{
.sharp_idx = ssharp,
.associated_image = image_binding,
.disable_aniso = disable_aniso,
});
return {binding, info.ReadUdSharp<AmdGpu::Sampler>(ssharp)};
}();
// Patch image and sampler handle.
inst.SetArg(0, ir.Imm32(image_binding | sampler_binding << 16));