freedreno/ir3: fix potential gpu lockup with kill

author Rob Clark <robclark@freedesktop.org>

Sat, 18 Oct 2014 19:28:16 +0000 (15:28 -0400)

committer Rob Clark <robclark@freedesktop.org>

Tue, 21 Oct 2014 01:42:44 +0000 (21:42 -0400)
author Rob Clark <robclark@freedesktop.org>
Sat, 18 Oct 2014 19:28:16 +0000 (15:28 -0400)
committer Rob Clark <robclark@freedesktop.org>
Tue, 21 Oct 2014 01:42:44 +0000 (21:42 -0400)
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c

index 70d37ffdeb575659789702de8c8d0c08ff04d41e..60d4e4a15d5f0a51189fbcde09fce115e1ecdd60 100644 (file)
--- a/src/gallium/drivers/freedreno/ir3/ir3.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3.c
@@ -81,6 +81,8 @@ void ir3_destroy(struct ir3 *shader)
                 shader->chunk = chunk->next;
                 free(chunk);
         }
+       free(shader->instrs);
+       free(shader->baryfs);
         free(shader);
  }
  
@@ -596,6 +598,15 @@ static void insert_instr(struct ir3 *shader,
                                 shader->instrs_sz * sizeof(shader->instrs[0]));
         }
         shader->instrs[shader->instrs_count++] = instr;
+
+       if (is_input(instr)) {
+               if (shader->baryfs_count == shader->baryfs_sz) {
+                       shader->baryfs_sz = MAX2(2 * shader->baryfs_sz, 16);
+                       shader->baryfs = realloc(shader->baryfs,
+                                       shader->baryfs_sz * sizeof(shader->baryfs[0]));
+               }
+               shader->baryfs[shader->baryfs_count++] = instr;
+       }
  }
  
  struct ir3_block * ir3_block_create(struct ir3 *shader,
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h

index d2d3dcaadb93f9dccf503c7eef597bea48e57c3d..21992f68ced5c02e6fe738385f31d6fa09ea49ca 100644 (file)
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -210,7 +210,11 @@ struct ir3_instruction {
                  * result of moving a const to a reg would have a low cost,  so to
                  * it could make sense to duplicate the instruction at various
                  * points where the result is needed to reduce register footprint.
+                *
+                * DEPTH_UNUSED used to mark unused instructions after depth
+                * calculation pass.
                  */
+#define DEPTH_UNUSED  ~0
                 unsigned depth;
         };
         struct ir3_instruction *next;
@@ -224,6 +228,8 @@ struct ir3_heap_chunk;
  struct ir3 {
         unsigned instrs_count, instrs_sz;
         struct ir3_instruction **instrs;
+       unsigned baryfs_count, baryfs_sz;
+       struct ir3_instruction **baryfs;
         unsigned heap_idx;
         struct ir3_heap_chunk *chunk;
  };
@@ -272,6 +278,10 @@ static inline void ir3_clear_mark(struct ir3 *shader)
         /* TODO would be nice to drop the instruction array.. for
          * new compiler, _clear_mark() is all we use it for, and
          * we could probably manage a linked list instead..
+        *
+        * Also, we'll probably want to mark instructions within
+        * a block, so tracking the list of instrs globally is
+        * unlikely to be what we want.
          */
         unsigned i;
         for (i = 0; i < shader->instrs_count; i++) {
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_depth.c b/src/gallium/drivers/freedreno/ir3/ir3_depth.c

index dcc0362f0c89c85c33b5409a986a11b527a83233..76413d415894a1e894cc6c60c653a50fb2913b09 100644 (file)
--- a/src/gallium/drivers/freedreno/ir3/ir3_depth.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_depth.c
@@ -150,10 +150,22 @@ void ir3_block_depth(struct ir3_block *block)
                 if (block->outputs[i])
                         ir3_instr_depth(block->outputs[i]);
  
-       /* at this point, any unvisited input is unused: */
+       /* mark un-used instructions: */
+       for (i = 0; i < block->shader->instrs_count; i++) {
+               struct ir3_instruction *instr = block->shader->instrs[i];
+
+               /* just consider instructions within this block: */
+               if (instr->block != block)
+                       continue;
+
+               if (!ir3_instr_check_mark(instr))
+                       instr->depth = DEPTH_UNUSED;
+       }
+
+       /* cleanup unused inputs: */
         for (i = 0; i < block->ninputs; i++) {
                 struct ir3_instruction *in = block->inputs[i];
-               if (in && !ir3_instr_check_mark(in))
+               if (in && (in->depth == DEPTH_UNUSED))
                         block->inputs[i] = NULL;
         }
  }
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_sched.c b/src/gallium/drivers/freedreno/ir3/ir3_sched.c

index 24d7c6397c1541d7eed16f524cf3f7aecf165db8..b2ef8111f56a5c0936465cf67505112edc5f16e6 100644 (file)
--- a/src/gallium/drivers/freedreno/ir3/ir3_sched.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_sched.c
@@ -242,6 +242,32 @@ static int trysched(struct ir3_sched_ctx *ctx,
         if (delay)
                 return delay;
  
+       /* if the instruction is a kill, we need to ensure *every*
+        * bary.f is scheduled.  The hw seems unhappy if the thread
+        * gets killed before the end-input (ei) flag is hit.
+        *
+        * We could do this by adding each bary.f instruction as
+        * virtual ssa src for the kill instruction.  But we have
+        * fixed length instr->regs[].
+        *
+        * TODO this wouldn't be quite right if we had multiple
+        * basic blocks, if any block was conditional.  We'd need
+        * to schedule the bary.f's outside of any block which
+        * was conditional that contained a kill.. I think..
+        */
+       if (is_kill(instr)) {
+               struct ir3 *ir = instr->block->shader;
+               unsigned i;
+
+               for (i = 0; i < ir->baryfs_count; i++) {
+                       if (ir->baryfs[i]->depth == DEPTH_UNUSED)
+                               continue;
+                       delay = trysched(ctx, ir->baryfs[i]);
+                       if (delay)
+                               return delay;
+               }
+       }
+
         /* if this is a write to address/predicate register, and that
          * register is currently in use, we need to defer until it is
          * free:
author	Rob Clark <robclark@freedesktop.org>
	Sat, 18 Oct 2014 19:28:16 +0000 (15:28 -0400)
committer	Rob Clark <robclark@freedesktop.org>
	Tue, 21 Oct 2014 01:42:44 +0000 (21:42 -0400)
src/gallium/drivers/freedreno/ir3/ir3.c		patch \| blob \| history
src/gallium/drivers/freedreno/ir3/ir3.h		patch \| blob \| history
src/gallium/drivers/freedreno/ir3/ir3_depth.c		patch \| blob \| history
src/gallium/drivers/freedreno/ir3/ir3_sched.c		patch \| blob \| history