Hack ReadLane to prevent device loss

This commit is contained in:
Marcin Mikołajczyk 2025-06-08 22:59:56 +01:00
parent ce42eccc9d
commit 3564ff8e03
3 changed files with 14 additions and 5 deletions

View File

@ -498,7 +498,7 @@ Id EmitLaneId(EmitContext& ctx);
Id EmitWarpId(EmitContext& ctx);
Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index);
Id EmitReadFirstLane(EmitContext& ctx, Id value);
Id EmitReadLane(EmitContext& ctx, Id value, u32 lane);
Id EmitReadLane(EmitContext& ctx, Id value, Id lane);
Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane);
Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding);
Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding);

View File

@ -26,9 +26,18 @@ Id EmitReadFirstLane(EmitContext& ctx, Id value) {
return ctx.OpGroupNonUniformBroadcastFirst(ctx.U32[1], SubgroupScope(ctx), value);
}
Id EmitReadLane(EmitContext& ctx, Id value, u32 lane) {
return ctx.OpGroupNonUniformBroadcast(ctx.U32[1], SubgroupScope(ctx), value,
ctx.ConstU32(lane));
Id EmitReadLane(EmitContext& ctx, Id value, Id lane) {
// TODO: proper implementation would need to ensure that `lane` is active in the subgroup
// by tracking EXEC register more closely, extracting the predicate used, and using
// it as a parameter to OpGroupNonUniformBallot. If the condition is not satisfied,
// the result is undefined. It may result in device loss
//
// Excerpt from SPIR-V specification:
// The resulting value is undefined if Id is not part of the scope restricted tangle,
// or is greater than or equal to the size of the scope.
return ctx.OpGroupNonUniformBroadcastFirst(ctx.U32[1], SubgroupScope(ctx), value);
// return ctx.OpGroupNonUniformBroadcast(ctx.U32[1], SubgroupScope(ctx), value,
// lane);
}
Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane) {

View File

@ -439,7 +439,7 @@ OPCODE(LaneId, U32,
OPCODE(WarpId, U32, )
OPCODE(QuadShuffle, U32, U32, U32 )
OPCODE(ReadFirstLane, U32, U32, )
OPCODE(ReadLane, U32, U32, U32 )
OPCODE(ReadLane, U32, U32, Opaque, )
OPCODE(WriteLane, U32, U32, U32, U32 )
OPCODE(DataAppend, U32, U32, U32 )
OPCODE(DataConsume, U32, U32, U32 )