util: rename list_empty() to list_is_empty()

[mesa.git] / src / broadcom / compiler / nir_to_vir.c
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c

index 92252b03218e60d06ebb61bab3362946a7d8a522..86b87837f8ed4423b297409052aeb09f0631a410 100644 (file)
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -192,10 +192,11 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
           * need/can to do things slightly different, like not loading the
           * amount to add/sub, as that is implicit.
           */
-        bool atomic_add_replaced = ((instr->intrinsic == nir_intrinsic_ssbo_atomic_add ||
-                                     instr->intrinsic == nir_intrinsic_shared_atomic_add) &&
-                                    (tmu_op == V3D_TMU_OP_WRITE_AND_READ_INC ||
-                                     tmu_op == V3D_TMU_OP_WRITE_OR_READ_DEC));
+        bool atomic_add_replaced =
+                ((instr->intrinsic == nir_intrinsic_ssbo_atomic_add ||
+                  instr->intrinsic == nir_intrinsic_shared_atomic_add) &&
+                 (tmu_op == V3D_TMU_OP_WRITE_AND_READ_INC ||
+                  tmu_op == V3D_TMU_OP_WRITE_OR_READ_DEC));
  
          bool is_store = (instr->intrinsic == nir_intrinsic_store_ssbo ||
                           instr->intrinsic == nir_intrinsic_store_scratch ||
@@ -207,6 +208,9 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
                          instr->intrinsic == nir_intrinsic_load_scratch ||
                          instr->intrinsic == nir_intrinsic_load_shared);
  
+        if (!is_load)
+                c->tmu_dirty_rcl = true;
+
          bool has_index = !is_shared_or_scratch;
  
          int offset_src;
@@ -240,8 +244,9 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
                  /* Note that QUNIFORM_UBO_ADDR takes a UBO index shifted up by
                   * 1 (0 is gallium's constant buffer 0).
                   */
-                base_offset = vir_uniform(c, QUNIFORM_UBO_ADDR,
-                                          v3d_unit_data_create(index, const_offset));
+                base_offset =
+                        vir_uniform(c, QUNIFORM_UBO_ADDR,
+                                    v3d_unit_data_create(index, const_offset));
                  const_offset = 0;
          } else if (is_shared_or_scratch) {
                  /* Shared and scratch variables have no buffer index, and all
@@ -261,6 +266,7 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
                                                                        1 : 0]));
          }
  
+        struct qreg tmud = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUD);
          unsigned writemask = is_store ? nir_intrinsic_write_mask(instr) : 0;
          uint32_t base_const_offset = const_offset;
          int first_component = -1;
@@ -301,29 +307,28 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
                                  BITFIELD_RANGE(first_component, tmu_writes - 1);
                          writemask &= ~written_mask;
                  } else if (!is_load && !atomic_add_replaced) {
-                        vir_MOV_dest(c,
-                                     vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUD),
-                                     ntq_get_src(c, instr->src[1 + has_index], 0));
+                        struct qreg data =
+                                ntq_get_src(c, instr->src[1 + has_index], 0);
+                        vir_MOV_dest(c, tmud, data);
                          tmu_writes++;
                          if (tmu_op == V3D_TMU_OP_WRITE_CMPXCHG_READ_FLUSH) {
-                                vir_MOV_dest(c,
-                                             vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUD),
-                                             ntq_get_src(c, instr->src[2 + has_index],
-                                                         0));
+                                data = ntq_get_src(c, instr->src[2 + has_index],
+                                                   0);
+                                vir_MOV_dest(c, tmud, data);
                                  tmu_writes++;
                          }
                  }
  
-                /* Make sure we won't exceed the 16-entry TMU fifo if each thread is
-                 * storing at the same time.
+                /* Make sure we won't exceed the 16-entry TMU fifo if each
+                 * thread is storing at the same time.
                   */
                  while (tmu_writes > 16 / c->threads)
                          c->threads /= 2;
  
-                /* The spec says that for atomics, the TYPE field is ignored, but that
-                 * doesn't seem to be the case for CMPXCHG.  Just use the number of
-                 * tmud writes we did to decide the type (or choose "32bit" for atomic
-                 * reads, which has been fine).
+                /* The spec says that for atomics, the TYPE field is ignored,
+                 * but that doesn't seem to be the case for CMPXCHG.  Just use
+                 * the number of tmud writes we did to decide the type (or
+                 * choose "32bit" for atomic reads, which has been fine).
                   */
                  uint32_t num_components;
                  if (is_load || atomic_add_replaced) {
@@ -339,7 +344,8 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
                  if (num_components == 1) {
                          config |= GENERAL_TMU_LOOKUP_TYPE_32BIT_UI;
                  } else {
-                        config |= GENERAL_TMU_LOOKUP_TYPE_VEC2 + num_components - 2;
+                        config |= GENERAL_TMU_LOOKUP_TYPE_VEC2 +
+                                  num_components - 2;
                  }
  
                  if (vir_in_nonuniform_control_flow(c)) {
@@ -360,8 +366,9 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
                                  offset = vir_ADD(c, offset,
                                                   vir_uniform_ui(c, const_offset));
                          }
-                        tmu = vir_ADD_dest(c, tmua, offset,
-                                           ntq_get_src(c, instr->src[offset_src], 0));
+                        struct qreg data =
+                                ntq_get_src(c, instr->src[offset_src], 0);
+                        tmu = vir_ADD_dest(c, tmua, offset, data);
                  } else {
                          if (const_offset != 0) {
                                  tmu = vir_ADD_dest(c, tmua, base_offset,
@@ -372,8 +379,9 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
                  }
  
                  if (config != ~0) {
-                        tmu->uniform = vir_get_uniform_index(c, QUNIFORM_CONSTANT,
-                                                             config);
+                        tmu->uniform =
+                                vir_get_uniform_index(c, QUNIFORM_CONSTANT,
+                                                      config);
                  }
  
                  if (vir_in_nonuniform_control_flow(c))
@@ -382,8 +390,10 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
                  vir_emit_thrsw(c);
  
                  /* Read the result, or wait for the TMU op to complete. */
-                for (int i = 0; i < nir_intrinsic_dest_components(instr); i++)
-                        ntq_store_dest(c, &instr->dest, i, vir_MOV(c, vir_LDTMU(c)));
+                for (int i = 0; i < nir_intrinsic_dest_components(instr); i++) {
+                        ntq_store_dest(c, &instr->dest, i,
+                                       vir_MOV(c, vir_LDTMU(c)));
+                }
  
                  if (nir_intrinsic_dest_components(instr) == 0)
                          vir_TMUWT(c);
@@ -419,7 +429,7 @@ ntq_store_dest(struct v3d_compile *c, nir_dest *dest, int chan,
                 struct qreg result)
  {
          struct qinst *last_inst = NULL;
-        if (!list_empty(&c->cur_block->instructions))
+        if (!list_is_empty(&c->cur_block->instructions))
                  last_inst = (struct qinst *)c->cur_block->instructions.prev;
  
          assert((result.file == QFILE_TEMP &&
@@ -1375,7 +1385,7 @@ v3d_optimize_nir(struct nir_shader *s)
                  progress = false;
  
                  NIR_PASS_V(s, nir_lower_vars_to_ssa);
-                NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL);
+                NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL);
                  NIR_PASS(progress, s, nir_lower_phis_to_scalar);
                  NIR_PASS(progress, s, nir_copy_prop);
                  NIR_PASS(progress, s, nir_opt_remove_phis);
@@ -1973,8 +1983,10 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
          case nir_intrinsic_image_deref_load:
          case nir_intrinsic_image_deref_store:
          case nir_intrinsic_image_deref_atomic_add:
-        case nir_intrinsic_image_deref_atomic_min:
-        case nir_intrinsic_image_deref_atomic_max:
+        case nir_intrinsic_image_deref_atomic_imin:
+        case nir_intrinsic_image_deref_atomic_umin:
+        case nir_intrinsic_image_deref_atomic_imax:
+        case nir_intrinsic_image_deref_atomic_umax:
          case nir_intrinsic_image_deref_atomic_and:
          case nir_intrinsic_image_deref_atomic_or:
          case nir_intrinsic_image_deref_atomic_xor:
@@ -2659,6 +2671,7 @@ const nir_shader_compiler_options v3d_nir_options = {
          .lower_mul_high = true,
          .lower_wpos_pntc = true,
          .lower_rotate = true,
+        .lower_to_scalar = true,
  };
  
  /**