nir: Rename nir_intrinsic_barrier to control_barrier

[mesa.git] / src / freedreno / ir3 / ir3_context.c
diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c

index e572f33ef24f11869115f77f2ff9573726237bc4..334b0ae0bab5ce39a6d80f0f5b4023613d0629b0 100644 (file)
--- a/src/freedreno/ir3/ir3_context.c
+++ b/src/freedreno/ir3/ir3_context.c
@@ -71,13 +71,9 @@ ir3_context_init(struct ir3_compiler *compiler,
          * creating duplicate variants..
          */
  
-       if (ir3_key_lowers_nir(&so->key)) {
-               nir_shader *s = nir_shader_clone(ctx, so->shader->nir);
-               ctx->s = ir3_optimize_nir(so->shader, s, &so->key);
-       } else {
-               /* fast-path for shader key that lowers nothing in NIR: */
-               ctx->s = nir_shader_clone(ctx, so->shader->nir);
-       }
+       ctx->s = nir_shader_clone(ctx, so->shader->nir);
+       if (ir3_key_lowers_nir(&so->key))
+               ir3_optimize_nir(so->shader, ctx->s, &so->key);
  
         /* this needs to be the last pass run, so do this here instead of
          * in ir3_optimize_nir():
@@ -90,7 +86,7 @@ ir3_context_init(struct ir3_compiler *compiler,
          * However, we want a final swing of a few passes to have a chance
          * at optimizing the result.
          */
-       bool progress;
+       bool progress = false;
         NIR_PASS(progress, ctx->s, ir3_nir_lower_imul);
         if (progress) {
                 NIR_PASS_V(ctx->s, nir_opt_algebraic);
@@ -100,19 +96,18 @@ ir3_context_init(struct ir3_compiler *compiler,
                 NIR_PASS_V(ctx->s, nir_opt_constant_folding);
         }
  
-       NIR_PASS_V(ctx->s, nir_convert_from_ssa, true);
+       /* Enable the texture pre-fetch feature only a4xx onwards.  But
+        * only enable it on generations that have been tested:
+        */
+       if ((so->type == MESA_SHADER_FRAGMENT) && (compiler->gpu_id >= 600))
+               NIR_PASS_V(ctx->s, ir3_nir_lower_tex_prefetch);
  
-       if (ir3_shader_debug & IR3_DBG_DISASM) {
-               DBG("dump nir%dv%d: type=%d, k={cts=%u,hp=%u}",
-                       so->shader->id, so->id, so->type,
-                       so->key.color_two_side, so->key.half_precision);
-               nir_print_shader(ctx->s, stdout);
-       }
+       NIR_PASS_V(ctx->s, nir_convert_from_ssa, true);
  
         if (shader_debug_enabled(so->type)) {
-               fprintf(stderr, "NIR (final form) for %s shader:\n",
-                       _mesa_shader_stage_to_string(so->type));
-               nir_print_shader(ctx->s, stderr);
+               fprintf(stdout, "NIR (final form) for %s shader %s:\n",
+                       ir3_shader_stage(so), so->shader->nir->info.name);
+               nir_print_shader(ctx->s, stdout);
         }
  
         ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures);
@@ -214,7 +209,7 @@ ir3_put_dst(struct ir3_context *ctx, nir_dest *dst)
                 for (unsigned i = 0; i < ctx->last_dst_n; i++) {
                         struct ir3_instruction *dst = ctx->last_dst[i];
                         dst->regs[0]->flags |= IR3_REG_HALF;
-                       if (ctx->last_dst[i]->opc == OPC_META_FO)
+                       if (ctx->last_dst[i]->opc == OPC_META_SPLIT)
                                 dst->regs[1]->instr->regs[0]->flags |= IR3_REG_HALF;
                 }
         }
@@ -244,6 +239,12 @@ ir3_put_dst(struct ir3_context *ctx, nir_dest *dst)
         ctx->last_dst_n = 0;
  }
  
+static unsigned
+dest_flags(struct ir3_instruction *instr)
+{
+       return instr->regs[0]->flags & (IR3_REG_HALF | IR3_REG_HIGH);
+}
+
  struct ir3_instruction *
  ir3_create_collect(struct ir3_context *ctx, struct ir3_instruction *const *arr,
                 unsigned arrsz)
@@ -254,10 +255,10 @@ ir3_create_collect(struct ir3_context *ctx, struct ir3_instruction *const *arr,
         if (arrsz == 0)
                 return NULL;
  
-       unsigned flags = arr[0]->regs[0]->flags & IR3_REG_HALF;
+       unsigned flags = dest_flags(arr[0]);
  
-       collect = ir3_instr_create2(block, OPC_META_FI, 1 + arrsz);
-       ir3_reg_create(collect, 0, flags);     /* dst */
+       collect = ir3_instr_create2(block, OPC_META_COLLECT, 1 + arrsz);
+       __ssa_dst(collect)->flags |= flags;
         for (unsigned i = 0; i < arrsz; i++) {
                 struct ir3_instruction *elem = arr[i];
  
@@ -290,8 +291,8 @@ ir3_create_collect(struct ir3_context *ctx, struct ir3_instruction *const *arr,
                         elem = ir3_MOV(block, elem, type);
                 }
  
-               compile_assert(ctx, (elem->regs[0]->flags & IR3_REG_HALF) == flags);
-               ir3_reg_create(collect, 0, IR3_REG_SSA | flags)->instr = elem;
+               compile_assert(ctx, dest_flags(elem) == flags);
+               __ssa_src(collect, elem, flags);
         }
  
         collect->regs[0]->wrmask = MASK(arrsz);
@@ -300,7 +301,7 @@ ir3_create_collect(struct ir3_context *ctx, struct ir3_instruction *const *arr,
  }
  
  /* helper for instructions that produce multiple consecutive scalar
- * outputs which need to have a split/fanout meta instruction inserted
+ * outputs which need to have a split meta instruction inserted
   */
  void
  ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst,
@@ -313,13 +314,14 @@ ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst,
                 return;
         }
  
-       unsigned flags = src->regs[0]->flags & (IR3_REG_HALF | IR3_REG_HIGH);
+       unsigned flags = dest_flags(src);
  
         for (int i = 0, j = 0; i < n; i++) {
-               struct ir3_instruction *split = ir3_instr_create(block, OPC_META_FO);
-               ir3_reg_create(split, 0, IR3_REG_SSA | flags);
-               ir3_reg_create(split, 0, IR3_REG_SSA | flags)->instr = src;
-               split->fo.off = i + base;
+               struct ir3_instruction *split =
+                               ir3_instr_create(block, OPC_META_SPLIT);
+               __ssa_dst(split)->flags |= flags;
+               __ssa_src(split, src, flags);
+               split->split.off = i + base;
  
                 if (prev) {
                         split->cp.left = prev;
@@ -405,6 +407,7 @@ create_addr(struct ir3_block *block, struct ir3_instruction *src, int align)
  
         instr = ir3_MOV(block, instr, TYPE_S16);
         instr->regs[0]->num = regid(REG_A0, 0);
+       instr->regs[0]->flags &= ~IR3_REG_SSA;
         instr->regs[0]->flags |= IR3_REG_HALF;
         instr->regs[1]->flags |= IR3_REG_HALF;
  
@@ -450,6 +453,7 @@ ir3_get_predicate(struct ir3_context *ctx, struct ir3_instruction *src)
  
         /* condition always goes in predicate register: */
         cond->regs[0]->num = regid(REG_P0, 0);
+       cond->regs[0]->flags &= ~IR3_REG_SSA;
  
         return cond;
  }
@@ -479,7 +483,7 @@ ir3_declare_array(struct ir3_context *ctx, nir_register *reg)
  struct ir3_array *
  ir3_get_array(struct ir3_context *ctx, nir_register *reg)
  {
-       list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
+       foreach_array (arr, &ctx->ir->array_list) {
                 if (arr->r == reg)
                         return arr;
         }
@@ -509,7 +513,7 @@ ir3_create_array_load(struct ir3_context *ctx, struct ir3_array *arr, int n,
  
         mov->barrier_class = IR3_BARRIER_ARRAY_R;
         mov->barrier_conflict = IR3_BARRIER_ARRAY_W;
-       ir3_reg_create(mov, 0, flags);
+       __ssa_dst(mov)->flags |= flags;
         src = ir3_reg_create(mov, 0, IR3_REG_ARRAY |
                         COND(address, IR3_REG_RELATIV) | flags);
         src->instr = arr->last_write;
@@ -534,8 +538,11 @@ ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n,
  
         /* if not relative store, don't create an extra mov, since that
          * ends up being difficult for cp to remove.
+        *
+        * Also, don't skip the mov if the src is meta (like fanout/split),
+        * since that creates a situation that RA can't really handle properly.
          */
-       if (!address) {
+       if (!address && !is_meta(src)) {
                 dst = src->regs[0];
  
                 src->barrier_class |= IR3_BARRIER_ARRAY_W;