freedreno/ir3: Fix register allocation assertion failures.

author Eric Anholt <eric@anholt.net>

Tue, 21 Apr 2020 20:26:14 +0000 (13:26 -0700)

committer Marge Bot <eric+marge@anholt.net>

Fri, 1 May 2020 16:26:32 +0000 (16:26 +0000)
author Eric Anholt <eric@anholt.net>
Tue, 21 Apr 2020 20:26:14 +0000 (13:26 -0700)
committer Marge Bot <eric+marge@anholt.net>
Fri, 1 May 2020 16:26:32 +0000 (16:26 +0000)
diff --git a/.gitlab-ci/deqp-freedreno-a307-fails.txt b/.gitlab-ci/deqp-freedreno-a307-fails.txt

index 6c835a85b6002dccf0de0cf75e1cc9ab4b9da4e1..7cf581dc204d84e9acfe3e94705b1d3024214047 100644 (file)
--- a/.gitlab-ci/deqp-freedreno-a307-fails.txt
+++ b/.gitlab-ci/deqp-freedreno-a307-fails.txt
@@ -608,14 +608,6 @@ dEQP-GLES3.functional.texture.format.sized.3d.rgb10_a2ui_pot
  dEQP-GLES3.functional.texture.format.sized.cube.rgb10_a2ui_npot
  dEQP-GLES3.functional.texture.format.sized.cube.rgb10_a2ui_pot
  dEQP-GLES3.functional.texture.mipmap.cube.max_level.linear_nearest
  dEQP-GLES3.functional.texture.format.sized.cube.rgb10_a2ui_npot
  dEQP-GLES3.functional.texture.format.sized.cube.rgb10_a2ui_pot
  dEQP-GLES3.functional.texture.mipmap.cube.max_level.linear_nearest
-dEQP-GLES3.functional.texture.shadow.2d.linear.less_or_equal_depth_component32f
-dEQP-GLES3.functional.texture.shadow.2d.linear_mipmap_nearest.equal_depth_component32f
-dEQP-GLES3.functional.texture.shadow.2d_array.linear.equal_depth_component32f
-dEQP-GLES3.functional.texture.shadow.2d_array.nearest.less_depth_component32f
-dEQP-GLES3.functional.texture.shadow.2d_array.nearest_mipmap_linear.less_or_equal_depth_component32f
-dEQP-GLES3.functional.texture.shadow.cube.linear_mipmap_linear.less_or_equal_depth_component32f
-dEQP-GLES3.functional.texture.shadow.cube.nearest.less_or_equal_depth_component32f
-dEQP-GLES3.functional.texture.shadow.cube.nearest_mipmap_nearest.equal_depth_component32f
  dEQP-GLES3.functional.texture.specification.basic_teximage2d.r8i_2d
  dEQP-GLES3.functional.texture.specification.basic_teximage2d.r8i_cube
  dEQP-GLES3.functional.texture.specification.basic_teximage2d.r8ui_2d
  dEQP-GLES3.functional.texture.specification.basic_teximage2d.r8i_2d
  dEQP-GLES3.functional.texture.specification.basic_teximage2d.r8i_cube
  dEQP-GLES3.functional.texture.specification.basic_teximage2d.r8ui_2d
diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c

index 927f91e98ec467cca5c963449f1582933e6f22ac..231bfc6ffc1fcfe157271a48b1a028a6f4bdd69c 100644 (file)
--- a/src/freedreno/ir3/ir3_ra.c
+++ b/src/freedreno/ir3/ir3_ra.c
@@ -497,19 +497,6 @@ ra_select_reg_merged(unsigned int n, BITSET_WORD *regs, void *data)
                                 return reg;
                         }
                 }
                                 return reg;
                         }
                 }
-       } else if (is_tex_or_prefetch(instr)) {
-               /* we could have a tex fetch w/ wrmask .z, for example.. these
-                * cannot land in r0.x since that would underflow when we
-                * subtract the offset.  Ie. if we pick r0.z, and subtract
-                * the offset, the register encoded for dst will be r0.x
-                */
-               unsigned n = ffs(instr->regs[0]->wrmask);
-               debug_assert(n > 0);
-               unsigned offset = n - 1;
-               if (!half)
-                       offset *= 2;
-               base += offset;
-               max_target -= offset;
         }
  
         int r = pick_in_range(regs, base + start, base + max_target);
         }
  
         int r = pick_in_range(regs, base + start, base + max_target);
@@ -571,6 +558,12 @@ ra_init(struct ir3_ra_ctx *ctx)
         }
         ctx->alloc_count += ctx->class_alloc_count[total_class_count];
  
         }
         ctx->alloc_count += ctx->class_alloc_count[total_class_count];
  
+       /* Add vreg names for r0.xyz */
+       ctx->r0_xyz_nodes = ctx->alloc_count;
+       ctx->alloc_count += 3;
+       ctx->hr0_xyz_nodes = ctx->alloc_count;
+       ctx->alloc_count += 3;
+
         ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count);
         ralloc_steal(ctx->g, ctx->instrd);
         ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
         ctx->g = ra_alloc_interference_graph(ctx->set->regs, ctx->alloc_count);
         ralloc_steal(ctx->g, ctx->instrd);
         ctx->def = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
@@ -710,6 +703,20 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block)
  
                         if ((instr->opc == OPC_META_INPUT) && first_non_input)
                                 use(name, first_non_input);
  
                         if ((instr->opc == OPC_META_INPUT) && first_non_input)
                                 use(name, first_non_input);
+
+                       /* Texture instructions with writemasks can be treated as smaller
+                        * vectors (or just scalars!) to allocate knowing that the
+                        * masked-out regs won't be written, but we need to make sure that
+                        * the start of the vector doesn't come before the first register
+                        * or we'll wrap.
+                        */
+                       if (is_tex_or_prefetch(instr)) {
+                               int writemask_skipped_regs = ffs(instr->regs[0]->wrmask) - 1;
+                               int r0_xyz = (instr->regs[0]->flags & IR3_REG_HALF) ?
+                                       ctx->hr0_xyz_nodes : ctx->r0_xyz_nodes;
+                               for (int i = 0; i < writemask_skipped_regs; i++)
+                                       ra_add_node_interference(ctx->g, name, r0_xyz + i);
+                       }
                 }
  
                 foreach_use (name, ctx, instr) {
                 }
  
                 foreach_use (name, ctx, instr) {
@@ -1005,6 +1012,14 @@ ra_add_interference(struct ir3_ra_ctx *ctx)
                 arr->end_ip = 0;
         }
  
                 arr->end_ip = 0;
         }
  
+
+       /* set up the r0.xyz precolor regs. */
+       for (int i = 0; i < 3; i++) {
+               ra_set_node_reg(ctx->g, ctx->r0_xyz_nodes + i, i);
+               ra_set_node_reg(ctx->g, ctx->hr0_xyz_nodes + i,
+                               ctx->set->first_half_reg + i);
+       }
+
         /* compute live ranges (use/def) on a block level, also updating
          * block's def/use bitmasks (used below to calculate per-block
          * livein/liveout):
         /* compute live ranges (use/def) on a block level, also updating
          * block's def/use bitmasks (used below to calculate per-block
          * livein/liveout):
diff --git a/src/freedreno/ir3/ir3_ra.h b/src/freedreno/ir3/ir3_ra.h

index 45df2397ddc3e02a97072a65d8c13b95f482c33b..35fb618c49a0742dabe85bfa681026e0eb78d363 100644 (file)
--- a/src/freedreno/ir3/ir3_ra.h
+++ b/src/freedreno/ir3/ir3_ra.h
@@ -144,6 +144,8 @@ struct ir3_ra_ctx {
         bool scalar_pass;
  
         unsigned alloc_count;
         bool scalar_pass;
  
         unsigned alloc_count;
+       unsigned r0_xyz_nodes; /* ra node numbers for r0.[xyz] precolors */
+       unsigned hr0_xyz_nodes; /* ra node numbers for hr0.[xyz] precolors pre-a6xx */
         /* one per class, plus one slot for arrays: */
         unsigned class_alloc_count[total_class_count + 1];
         unsigned class_base[total_class_count + 1];
         /* one per class, plus one slot for arrays: */
         unsigned class_alloc_count[total_class_count + 1];
         unsigned class_base[total_class_count + 1];
author	Eric Anholt <eric@anholt.net>
	Tue, 21 Apr 2020 20:26:14 +0000 (13:26 -0700)
committer	Marge Bot <eric+marge@anholt.net>
	Fri, 1 May 2020 16:26:32 +0000 (16:26 +0000)
.gitlab-ci/deqp-freedreno-a307-fails.txt		patch \| blob \| history
src/freedreno/ir3/ir3_ra.c		patch \| blob \| history
src/freedreno/ir3/ir3_ra.h		patch \| blob \| history