ir3: Fix bug with shaders that only exit via discard
authorConnor Abbott <cwabbott0@gmail.com>
Tue, 21 Apr 2020 09:36:40 +0000 (11:36 +0200)
committerMarge Bot <eric+marge@anholt.net>
Wed, 22 Apr 2020 09:49:40 +0000 (09:49 +0000)
discard is supposed to be a terminator, killing the thread, so that it's
possible to exit main solely by a discard e.g. inside of an infinite
loop. However, it currently isn't treated as a terminator in NIR due to
workarounds turning it into demote (d3d-style kill) and even if that
were fixed, we probably wouldn't want to treat discard_if as a jump
since otherwise the scheduler wouldn't be able to schedule things around
it. So, add this workaround which inserts jump instructions as
necessary to guarantee that the program always terminates.

This fixes a hang in dEQP-VK.graphicsfuzz.while-inside-switch, which
conditionally does a discard inside an infinite loop.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4658>

src/freedreno/ir3/ir3.h
src/freedreno/ir3/ir3_legalize.c

index 6cb882002f04530588319f27e33a7b9a201a52e6..441758882008e45b4045af0f23842d7816d9b74a 100644 (file)
@@ -1168,6 +1168,8 @@ static inline bool __is_false_dep(struct ir3_instruction *instr, unsigned n)
        list_for_each_entry(struct ir3_block, __block, __list, node)
 #define foreach_block_safe(__block, __list) \
        list_for_each_entry_safe(struct ir3_block, __block, __list, node)
+#define foreach_block_rev(__block, __list) \
+       list_for_each_entry_rev(struct ir3_block, __block, __list, node)
 
 /* iterators for arrays: */
 #define foreach_array(__array, __list) \
index bed8cd1c03a8df9c88bd31ea73f8f9200f08ef84..d0b0dd83b46e3abdc3f59f0f4f3ff3dff4d93f97 100644 (file)
@@ -592,6 +592,73 @@ block_sched(struct ir3 *ir)
        }
 }
 
+/* Here we workaround the fact that kill doesn't actually kill the thread as
+ * GL expects. The last instruction always needs to be an end instruction,
+ * which means that if we're stuck in a loop where kill is the only way out,
+ * then we may have to jump out to the end. kill may also have the d3d
+ * semantics of converting the thread to a helper thread, rather than setting
+ * the exec mask to 0, in which case the helper thread could get stuck in an
+ * infinite loop.
+ *
+ * We do this late, both to give the scheduler the opportunity to reschedule
+ * kill instructions earlier and to avoid having to create a separate basic
+ * block.
+ *
+ * TODO: Assuming that the wavefront doesn't stop as soon as all threads are
+ * killed, we might benefit by doing this more aggressively when the remaining
+ * part of the program after the kill is large, since that would let us
+ * skip over the instructions when there are no non-killed threads left.
+ */
+static void
+kill_sched(struct ir3 *ir, struct ir3_shader_variant *so)
+{
+       /* True if we know that this block will always eventually lead to the end
+        * block:
+        */
+       bool always_ends = true;
+       bool added = false;
+       struct ir3_block *last_block =
+               list_last_entry(&ir->block_list, struct ir3_block, node);
+
+       foreach_block_rev (block, &ir->block_list) {
+               for (unsigned i = 0; i < 2 && block->successors[i]; i++) {
+                       if (block->successors[i]->start_ip <= block->end_ip)
+                               always_ends = false;
+               }
+
+               if (always_ends)
+                       continue;
+
+               foreach_instr_safe (instr, &block->instr_list) {
+                       if (instr->opc != OPC_KILL)
+                               continue;
+
+                       struct ir3_instruction *br = ir3_instr_create(block, OPC_BR);
+                       br->regs[1] = instr->regs[1];
+                       br->cat0.target =
+                               list_last_entry(&ir->block_list, struct ir3_block, node);
+
+                       list_del(&br->node);
+                       list_add(&br->node, &instr->node);
+
+                       added = true;
+               }
+       }
+
+       if (added) {
+               /* I'm not entirely sure how the branchstack works, but we probably
+                * need to add at least one entry for the divergence which is resolved
+                * at the end:
+                */
+               so->branchstack++;
+
+               /* We don't update predecessors/successors, so we have to do this
+                * manually:
+                */
+               mark_jp(last_block);
+       }
+}
+
 /* Insert nop's required to make this a legal/valid shader program: */
 static void
 nop_sched(struct ir3 *ir)
@@ -669,6 +736,8 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary)
        *max_bary = ctx->max_bary;
 
        block_sched(ir);
+       if (so->type == MESA_SHADER_FRAGMENT)
+               kill_sched(ir, so);
        nop_sched(ir);
 
        do {