freedreno/ir3: moar better scheduler

author Rob Clark <robdclark@gmail.com>

Tue, 7 Nov 2017 20:12:03 +0000 (15:12 -0500)

committer Rob Clark <robdclark@gmail.com>

Sun, 12 Nov 2017 17:28:59 +0000 (12:28 -0500)
author Rob Clark <robdclark@gmail.com>
Tue, 7 Nov 2017 20:12:03 +0000 (15:12 -0500)
committer Rob Clark <robdclark@gmail.com>
Sun, 12 Nov 2017 17:28:59 +0000 (12:28 -0500)
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c

index 6db0a2a20cda0050a4587b88b4f1ffcf8fec05a7..01a7bbc7dc6ce0e4580d0a33964fd04378de5646 100644 (file)
--- a/src/gallium/drivers/freedreno/ir3/ir3.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3.c
@@ -817,6 +817,12 @@ struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr)
         return new_instr;
  }
  
+/* Add a false dependency to instruction, to ensure it is scheduled first: */
+void ir3_instr_add_dep(struct ir3_instruction *instr, struct ir3_instruction *dep)
+{
+       array_insert(instr, instr->deps, dep);
+}
+
  struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
                 int num, int flags)
  {
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h

index 90f8e3c44d3603f22f9e9a704cc3902de64f23b8..6ef0683ab00f07b8bb5d27ead310d5cf87ec0e7e 100644 (file)
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -326,6 +326,40 @@ struct ir3_instruction {
          */
         struct ir3_instruction *address;
  
+       /* Tracking for additional dependent instructions.  Used to handle
+        * barriers, WAR hazards for arrays/SSBOs/etc.
+        */
+       DECLARE_ARRAY(struct ir3_instruction *, deps);
+
+       /*
+        * From PoV of instruction scheduling, not execution (ie. ignores global/
+        * local distinction):
+        *                            shared  image  atomic  SSBO  everything
+        *   barrier()/            -   R/W     R/W    R/W     R/W       X
+        *     groupMemoryBarrier()
+        *   memoryBarrier()       -           R/W    R/W
+        *     (but only images declared coherent?)
+        *   memoryBarrierAtomic() -                  R/W
+        *   memoryBarrierBuffer() -                          R/W
+        *   memoryBarrierImage()  -           R/W
+        *   memoryBarrierShared() -   R/W
+        *
+        * TODO I think for SSBO/image/shared, in cases where we can determine
+        * which variable is accessed, we don't need to care about accesses to
+        * different variables (unless declared coherent??)
+        */
+       enum {
+               IR3_BARRIER_EVERYTHING = 1 << 0,
+               IR3_BARRIER_SHARED_R   = 1 << 1,
+               IR3_BARRIER_SHARED_W   = 1 << 2,
+               IR3_BARRIER_IMAGE_R    = 1 << 3,
+               IR3_BARRIER_IMAGE_W    = 1 << 4,
+               IR3_BARRIER_BUFFER_R   = 1 << 5,
+               IR3_BARRIER_BUFFER_W   = 1 << 6,
+               IR3_BARRIER_ARRAY_R    = 1 << 7,
+               IR3_BARRIER_ARRAY_W    = 1 << 8,
+       } barrier_class, barrier_conflict;
+
         /* Entry in ir3_block's instruction list: */
         struct list_head node;
  
@@ -417,16 +451,13 @@ struct ir3_array {
  
         nir_register *r;
  
-       /* We track the last write and last access (read or write) to
-        * setup dependencies on instructions that read or write the
-        * array.  Reads can be re-ordered wrt. other reads, but should
-        * not be re-ordered wrt. to writes.  Writes cannot be reordered
-        * wrt. any other access to the array.
-        *
-        * So array reads depend on last write, and array writes depend
-        * on the last access.
+       /* To avoid array write's from getting DCE'd, keep track of the
+        * most recent write.  Any array access depends on the most
+        * recent write.  This way, nothing depends on writes after the
+        * last read.  But all the writes that happen before that have
+        * something depending on them
          */
-       struct ir3_instruction *last_write, *last_access;
+       struct ir3_instruction *last_write;
  
         /* extra stuff used in RA pass: */
         unsigned base;      /* base vreg name */
@@ -493,6 +524,7 @@ struct ir3_instruction * ir3_instr_create(struct ir3_block *block, opc_t opc);
  struct ir3_instruction * ir3_instr_create2(struct ir3_block *block,
                 opc_t opc, int nreg);
  struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr);
+void ir3_instr_add_dep(struct ir3_instruction *instr, struct ir3_instruction *dep);
  const char *ir3_instr_name(struct ir3_instruction *instr);
  
  struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
@@ -907,25 +939,36 @@ static inline unsigned ir3_cat3_absneg(opc_t opc)
  
  static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr)
  {
+       unsigned cnt = instr->regs_count + instr->deps_count;
         if (instr->address)
-               return instr->regs_count + 1;
-       return instr->regs_count;
+               cnt++;
+       return cnt;
  }
  
  static inline struct ir3_instruction * __ssa_src_n(struct ir3_instruction *instr, unsigned n)
  {
-       if (n == (instr->regs_count + 0))
+       if (n == (instr->regs_count + instr->deps_count))
                 return instr->address;
+       if (n >= instr->regs_count)
+               return instr->deps[n - instr->regs_count];
         return ssa(instr->regs[n]);
  }
  
+static inline bool __is_false_dep(struct ir3_instruction *instr, unsigned n)
+{
+       if (n == (instr->regs_count + instr->deps_count))
+               return false;
+       if (n >= instr->regs_count)
+               return true;
+       return false;
+}
+
  #define __src_cnt(__instr) ((__instr)->address ? (__instr)->regs_count : (__instr)->regs_count - 1)
  
  /* iterator for an instruction's SSA sources (instr), also returns src #: */
  #define foreach_ssa_src_n(__srcinst, __n, __instr) \
-       if ((__instr)->regs_count) \
-               for (unsigned __cnt = __ssa_src_cnt(__instr), __n = 0; __n < __cnt; __n++) \
-                       if ((__srcinst = __ssa_src_n(__instr, __n)))
+       for (unsigned __cnt = __ssa_src_cnt(__instr), __n = 0; __n < __cnt; __n++) \
+               if ((__srcinst = __ssa_src_n(__instr, __n)))
  
  /* iterator for an instruction's SSA sources (instr): */
  #define foreach_ssa_src(__srcinst, __instr) \
@@ -950,6 +993,7 @@ void ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so);
  void ir3_group(struct ir3 *ir);
  
  /* scheduling: */
+void ir3_sched_add_deps(struct ir3 *ir);
  int ir3_sched(struct ir3 *ir);
  
  /* register assignment: */
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c

index bd3e0d0cd4a224c0b53ea0caab57c9925e94f4ea..3fd2e50d82f838f9c02415b38a07847b620126b1 100644 (file)
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -74,20 +74,6 @@ struct ir3_context {
         /* Compute shader inputs: */
         struct ir3_instruction *local_invocation_id, *work_group_id;
  
-       /* For SSBO's and atomics, we need to preserve order, such
-        * that reads don't overtake writes, and the order of writes
-        * is preserved.  Atomics are considered as a write.
-        *
-        * To do this, we track last write and last access, in a
-        * similar way to ir3_array.  But since we don't know whether
-        * the same SSBO is bound to multiple slots, so we simply
-        * track this globally rather than per-SSBO.
-        *
-        * TODO should we track this per block instead?  I guess it
-        * shouldn't matter much?
-        */
-       struct ir3_instruction *last_write, *last_access;
-
         /* mapping from nir_register to defining instruction: */
         struct hash_table *def_ht;
  
@@ -345,6 +331,8 @@ create_array_load(struct ir3_context *ctx, struct ir3_array *arr, int n,
         mov = ir3_instr_create(block, OPC_MOV);
         mov->cat1.src_type = TYPE_U32;
         mov->cat1.dst_type = TYPE_U32;
+       mov->barrier_class = IR3_BARRIER_ARRAY_R;
+       mov->barrier_conflict = IR3_BARRIER_ARRAY_W;
         ir3_reg_create(mov, 0, 0);
         src = ir3_reg_create(mov, 0, IR3_REG_ARRAY |
                         COND(address, IR3_REG_RELATIV));
@@ -356,8 +344,6 @@ create_array_load(struct ir3_context *ctx, struct ir3_array *arr, int n,
         if (address)
                 ir3_instr_set_address(mov, address);
  
-       arr->last_access = mov;
-
         return mov;
  }
  
@@ -373,9 +359,11 @@ create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n,
         mov = ir3_instr_create(block, OPC_MOV);
         mov->cat1.src_type = TYPE_U32;
         mov->cat1.dst_type = TYPE_U32;
+       mov->barrier_class = IR3_BARRIER_ARRAY_W;
+       mov->barrier_conflict = IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W;
         dst = ir3_reg_create(mov, 0, IR3_REG_ARRAY |
                         COND(address, IR3_REG_RELATIV));
-       dst->instr = arr->last_access;
+       dst->instr = arr->last_write;
         dst->size  = arr->length;
         dst->array.id = arr->id;
         dst->array.offset = n;
@@ -384,7 +372,7 @@ create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n,
         if (address)
                 ir3_instr_set_address(mov, address);
  
-       arr->last_write = arr->last_access = mov;
+       arr->last_write = mov;
  
         return mov;
  }
@@ -1236,22 +1224,6 @@ emit_intrinsic_load_ubo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
         }
  }
  
-static void
-mark_read(struct ir3_context *ctx, struct ir3_instruction *instr)
-{
-       instr->regs[0]->instr = ctx->last_write;
-       instr->regs[0]->flags |= IR3_REG_SSA;
-       ctx->last_access = instr;
-}
-
-static void
-mark_write(struct ir3_context *ctx, struct ir3_instruction *instr)
-{
-       instr->regs[0]->instr = ctx->last_access;
-       instr->regs[0]->flags |= IR3_REG_SSA;
-       ctx->last_write = ctx->last_access = instr;
-}
-
  /* src[] = { buffer_index, offset }. No const_index */
  static void
  emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
@@ -1280,7 +1252,8 @@ emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
         ldgb->cat6.iim_val = intr->num_components;
         ldgb->cat6.d = 4;
         ldgb->cat6.type = TYPE_U32;
-       mark_read(ctx, ldgb);
+       ldgb->barrier_class = IR3_BARRIER_BUFFER_R;
+       ldgb->barrier_conflict = IR3_BARRIER_BUFFER_W;
  
         split_dest(b, dst, ldgb, 0, intr->num_components);
  }
@@ -1320,7 +1293,8 @@ emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
         stgb->cat6.iim_val = ncomp;
         stgb->cat6.d = 4;
         stgb->cat6.type = TYPE_U32;
-       mark_write(ctx, stgb);
+       stgb->barrier_class = IR3_BARRIER_BUFFER_W;
+       stgb->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
  
         array_insert(b, b->keeps, stgb);
  }
@@ -1430,7 +1404,8 @@ emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
         atomic->cat6.iim_val = 1;
         atomic->cat6.d = 4;
         atomic->cat6.type = type;
-       mark_write(ctx, atomic);
+       atomic->barrier_class = IR3_BARRIER_BUFFER_W;
+       atomic->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
  
         /* even if nothing consume the result, we can't DCE the instruction: */
         array_insert(b, b->keeps, atomic);
@@ -1455,7 +1430,8 @@ emit_intrinsic_load_shared(struct ir3_context *ctx, nir_intrinsic_instr *intr,
         ldl->cat6.type = TYPE_U32;
         ldl->regs[0]->wrmask = MASK(intr->num_components);
  
-       mark_read(ctx, ldl);
+       ldl->barrier_class = IR3_BARRIER_SHARED_R;
+       ldl->barrier_conflict = IR3_BARRIER_SHARED_W;
  
         split_dest(b, dst, ldl, 0, intr->num_components);
  }
@@ -1491,8 +1467,9 @@ emit_intrinsic_store_shared(struct ir3_context *ctx, nir_intrinsic_instr *intr)
                         create_immed(b, length), 0);
                 stl->cat6.dst_offset = first_component + base;
                 stl->cat6.type = TYPE_U32;
+               stl->barrier_class = IR3_BARRIER_SHARED_W;
+               stl->barrier_conflict = IR3_BARRIER_SHARED_R | IR3_BARRIER_SHARED_W;
  
-               mark_write(ctx, stl);
                 array_insert(b, b->keeps, stl);
  
                 /* Clear the bits in the writemask that we just wrote, then try
@@ -1573,7 +1550,8 @@ emit_intrinsic_atomic_shared(struct ir3_context *ctx, nir_intrinsic_instr *intr)
         atomic->cat6.iim_val = 1;
         atomic->cat6.d = 1;
         atomic->cat6.type = type;
-       mark_write(ctx, atomic);
+       atomic->barrier_class = IR3_BARRIER_SHARED_W;
+       atomic->barrier_conflict = IR3_BARRIER_SHARED_R | IR3_BARRIER_SHARED_W;
  
         /* even if nothing consume the result, we can't DCE the instruction: */
         array_insert(b, b->keeps, atomic);
@@ -1702,6 +1680,9 @@ emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr,
         sam = ir3_SAM(b, OPC_ISAM, type, TGSI_WRITEMASK_XYZW, flags,
                         tex_idx, tex_idx, create_collect(b, coords, ncoords), NULL);
  
+       sam->barrier_class = IR3_BARRIER_IMAGE_R;
+       sam->barrier_conflict = IR3_BARRIER_IMAGE_W;
+
         split_dest(b, dst, sam, 0, 4);
  }
  
@@ -1737,7 +1718,8 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
         stib->cat6.d = ncoords;
         stib->cat6.type = get_image_type(var);
         stib->cat6.typed = true;
-       mark_write(ctx, stib);
+       stib->barrier_class = IR3_BARRIER_IMAGE_W;
+       stib->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W;
  
         array_insert(b, b->keeps, stib);
  }
@@ -1821,7 +1803,8 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
         atomic->cat6.d = ncoords;
         atomic->cat6.type = get_image_type(var);
         atomic->cat6.typed = true;
-       mark_write(ctx, atomic);
+       atomic->barrier_class = IR3_BARRIER_IMAGE_W;
+       atomic->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W;
  
         /* even if nothing consume the result, we can't DCE the instruction: */
         array_insert(b, b->keeps, atomic);
@@ -1841,23 +1824,62 @@ emit_intrinsic_barrier(struct ir3_context *ctx, nir_intrinsic_instr *intr)
                 barrier->cat7.g = true;
                 barrier->cat7.l = true;
                 barrier->flags = IR3_INSTR_SS | IR3_INSTR_SY;
+               barrier->barrier_class = IR3_BARRIER_EVERYTHING;
                 break;
         case nir_intrinsic_memory_barrier:
+               barrier = ir3_FENCE(b);
+               barrier->cat7.g = true;
+               barrier->cat7.r = true;
+               barrier->cat7.w = true;
+               barrier->barrier_class = IR3_BARRIER_IMAGE_W |
+                               IR3_BARRIER_BUFFER_W;
+               barrier->barrier_conflict =
+                               IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W |
+                               IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
+               break;
         case nir_intrinsic_memory_barrier_atomic_counter:
         case nir_intrinsic_memory_barrier_buffer:
                 barrier = ir3_FENCE(b);
                 barrier->cat7.g = true;
                 barrier->cat7.r = true;
                 barrier->cat7.w = true;
+               barrier->barrier_class = IR3_BARRIER_BUFFER_W;
+               barrier->barrier_conflict = IR3_BARRIER_BUFFER_R |
+                               IR3_BARRIER_BUFFER_W;
                 break;
-       case nir_intrinsic_group_memory_barrier:
         case nir_intrinsic_memory_barrier_image:
+               // TODO double check if this should have .g set
+               barrier = ir3_FENCE(b);
+               barrier->cat7.g = true;
+               barrier->cat7.r = true;
+               barrier->cat7.w = true;
+               barrier->barrier_class = IR3_BARRIER_IMAGE_W;
+               barrier->barrier_conflict = IR3_BARRIER_IMAGE_R |
+                               IR3_BARRIER_IMAGE_W;
+               break;
         case nir_intrinsic_memory_barrier_shared:
                 barrier = ir3_FENCE(b);
                 barrier->cat7.g = true;
                 barrier->cat7.l = true;
                 barrier->cat7.r = true;
                 barrier->cat7.w = true;
+               barrier->barrier_class = IR3_BARRIER_SHARED_W;
+               barrier->barrier_conflict = IR3_BARRIER_SHARED_R |
+                               IR3_BARRIER_SHARED_W;
+               break;
+       case nir_intrinsic_group_memory_barrier:
+               barrier = ir3_FENCE(b);
+               barrier->cat7.g = true;
+               barrier->cat7.l = true;
+               barrier->cat7.r = true;
+               barrier->cat7.w = true;
+               barrier->barrier_class = IR3_BARRIER_SHARED_W |
+                               IR3_BARRIER_IMAGE_W |
+                               IR3_BARRIER_BUFFER_W;
+               barrier->barrier_conflict =
+                               IR3_BARRIER_SHARED_R | IR3_BARRIER_SHARED_W |
+                               IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W |
+                               IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
                 break;
         default:
                 unreachable("boo");
@@ -3301,6 +3323,8 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
                 ir3_print(ir);
         }
  
+       ir3_sched_add_deps(ir);
+
         /* Group left/right neighbors, inserting mov's where needed to
          * solve conflicts:
          */
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_depth.c b/src/gallium/drivers/freedreno/ir3/ir3_depth.c

index be39027b6a0b79fe895bf6473ae271e9a992683d..55ca5333b4776e78ff6a21cdc260b5c74ad21fe5 100644 (file)
--- a/src/gallium/drivers/freedreno/ir3/ir3_depth.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_depth.c
@@ -55,6 +55,10 @@
  int ir3_delayslots(struct ir3_instruction *assigner,
                 struct ir3_instruction *consumer, unsigned n)
  {
+       /* don't count false-dependencies: */
+       if (__is_false_dep(consumer, n))
+               return 0;
+
         /* worst case is cat1-3 (alu) -> cat4/5 needing 6 cycles, normal
          * alu -> alu needs 3 cycles, cat4 -> alu and texture fetch
          * handled with sync bits
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_sched.c b/src/gallium/drivers/freedreno/ir3/ir3_sched.c

index b56da304f9289e2350ed156b8717262b1e210c5c..9492e9ba650156b088c3c3e77ac9624eebbf701c 100644 (file)
--- a/src/gallium/drivers/freedreno/ir3/ir3_sched.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_sched.c
@@ -669,3 +669,94 @@ int ir3_sched(struct ir3 *ir)
                 return -1;
         return 0;
  }
+
+/* does instruction 'prior' need to be scheduled before 'instr'? */
+static bool
+depends_on(struct ir3_instruction *instr, struct ir3_instruction *prior)
+{
+       /* TODO for dependencies that are related to a specific object, ie
+        * a specific SSBO/image/array, we could relax this constraint to
+        * make accesses to unrelated objects not depend on each other (at
+        * least as long as not declared coherent)
+        */
+       if ((instr->barrier_class & IR3_BARRIER_EVERYTHING) ||
+                       (prior->barrier_class & IR3_BARRIER_EVERYTHING))
+               return true;
+       return !!(instr->barrier_class & prior->barrier_conflict);
+}
+
+static void
+add_barrier_deps(struct ir3_block *block, struct ir3_instruction *instr)
+{
+       struct list_head *prev = instr->node.prev;
+       struct list_head *next = instr->node.next;
+
+       /* add dependencies on previous instructions that must be scheduled
+        * prior to the current instruction
+        */
+       while (prev != &block->instr_list) {
+               struct ir3_instruction *pi =
+                       LIST_ENTRY(struct ir3_instruction, prev, node);
+
+               prev = prev->prev;
+
+               if (is_meta(pi))
+                       continue;
+
+               if (instr->barrier_class == pi->barrier_class) {
+                       ir3_instr_add_dep(instr, pi);
+                       break;
+               }
+
+               if (depends_on(instr, pi))
+                       ir3_instr_add_dep(instr, pi);
+       }
+
+       /* add dependencies on this instruction to following instructions
+        * that must be scheduled after the current instruction:
+        */
+       while (next != &block->instr_list) {
+               struct ir3_instruction *ni =
+                       LIST_ENTRY(struct ir3_instruction, next, node);
+
+               next = next->next;
+
+               if (is_meta(ni))
+                       continue;
+
+               if (instr->barrier_class == ni->barrier_class) {
+                       ir3_instr_add_dep(ni, instr);
+                       break;
+               }
+
+               if (depends_on(ni, instr))
+                       ir3_instr_add_dep(ni, instr);
+       }
+}
+
+/* before scheduling a block, we need to add any necessary false-dependencies
+ * to ensure that:
+ *
+ *  (1) barriers are scheduled in the right order wrt instructions related
+ *      to the barrier
+ *
+ *  (2) reads that come before a write actually get scheduled before the
+ *      write
+ */
+static void
+calculate_deps(struct ir3_block *block)
+{
+       list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
+               if (instr->barrier_class) {
+                       add_barrier_deps(block, instr);
+               }
+       }
+}
+
+void
+ir3_sched_add_deps(struct ir3 *ir)
+{
+       list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+               calculate_deps(block);
+       }
+}
author	Rob Clark <robdclark@gmail.com>
	Tue, 7 Nov 2017 20:12:03 +0000 (15:12 -0500)
committer	Rob Clark <robdclark@gmail.com>
	Sun, 12 Nov 2017 17:28:59 +0000 (12:28 -0500)
src/gallium/drivers/freedreno/ir3/ir3.c		patch \| blob \| history
src/gallium/drivers/freedreno/ir3/ir3.h		patch \| blob \| history
src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c		patch \| blob \| history
src/gallium/drivers/freedreno/ir3/ir3_depth.c		patch \| blob \| history
src/gallium/drivers/freedreno/ir3/ir3_sched.c		patch \| blob \| history