util: rename list_empty() to list_is_empty()
[mesa.git] / src / broadcom / compiler / nir_to_vir.c
index 92252b03218e60d06ebb61bab3362946a7d8a522..86b87837f8ed4423b297409052aeb09f0631a410 100644 (file)
@@ -192,10 +192,11 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
          * need/can to do things slightly different, like not loading the
          * amount to add/sub, as that is implicit.
          */
-        bool atomic_add_replaced = ((instr->intrinsic == nir_intrinsic_ssbo_atomic_add ||
-                                     instr->intrinsic == nir_intrinsic_shared_atomic_add) &&
-                                    (tmu_op == V3D_TMU_OP_WRITE_AND_READ_INC ||
-                                     tmu_op == V3D_TMU_OP_WRITE_OR_READ_DEC));
+        bool atomic_add_replaced =
+                ((instr->intrinsic == nir_intrinsic_ssbo_atomic_add ||
+                  instr->intrinsic == nir_intrinsic_shared_atomic_add) &&
+                 (tmu_op == V3D_TMU_OP_WRITE_AND_READ_INC ||
+                  tmu_op == V3D_TMU_OP_WRITE_OR_READ_DEC));
 
         bool is_store = (instr->intrinsic == nir_intrinsic_store_ssbo ||
                          instr->intrinsic == nir_intrinsic_store_scratch ||
@@ -207,6 +208,9 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
                         instr->intrinsic == nir_intrinsic_load_scratch ||
                         instr->intrinsic == nir_intrinsic_load_shared);
 
+        if (!is_load)
+                c->tmu_dirty_rcl = true;
+
         bool has_index = !is_shared_or_scratch;
 
         int offset_src;
@@ -240,8 +244,9 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
                 /* Note that QUNIFORM_UBO_ADDR takes a UBO index shifted up by
                  * 1 (0 is gallium's constant buffer 0).
                  */
-                base_offset = vir_uniform(c, QUNIFORM_UBO_ADDR,
-                                          v3d_unit_data_create(index, const_offset));
+                base_offset =
+                        vir_uniform(c, QUNIFORM_UBO_ADDR,
+                                    v3d_unit_data_create(index, const_offset));
                 const_offset = 0;
         } else if (is_shared_or_scratch) {
                 /* Shared and scratch variables have no buffer index, and all
@@ -261,6 +266,7 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
                                                                       1 : 0]));
         }
 
+        struct qreg tmud = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUD);
         unsigned writemask = is_store ? nir_intrinsic_write_mask(instr) : 0;
         uint32_t base_const_offset = const_offset;
         int first_component = -1;
@@ -301,29 +307,28 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
                                 BITFIELD_RANGE(first_component, tmu_writes - 1);
                         writemask &= ~written_mask;
                 } else if (!is_load && !atomic_add_replaced) {
-                        vir_MOV_dest(c,
-                                     vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUD),
-                                     ntq_get_src(c, instr->src[1 + has_index], 0));
+                        struct qreg data =
+                                ntq_get_src(c, instr->src[1 + has_index], 0);
+                        vir_MOV_dest(c, tmud, data);
                         tmu_writes++;
                         if (tmu_op == V3D_TMU_OP_WRITE_CMPXCHG_READ_FLUSH) {
-                                vir_MOV_dest(c,
-                                             vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUD),
-                                             ntq_get_src(c, instr->src[2 + has_index],
-                                                         0));
+                                data = ntq_get_src(c, instr->src[2 + has_index],
+                                                   0);
+                                vir_MOV_dest(c, tmud, data);
                                 tmu_writes++;
                         }
                 }
 
-                /* Make sure we won't exceed the 16-entry TMU fifo if each thread is
-                 * storing at the same time.
+                /* Make sure we won't exceed the 16-entry TMU fifo if each
+                 * thread is storing at the same time.
                  */
                 while (tmu_writes > 16 / c->threads)
                         c->threads /= 2;
 
-                /* The spec says that for atomics, the TYPE field is ignored, but that
-                 * doesn't seem to be the case for CMPXCHG.  Just use the number of
-                 * tmud writes we did to decide the type (or choose "32bit" for atomic
-                 * reads, which has been fine).
+                /* The spec says that for atomics, the TYPE field is ignored,
+                 * but that doesn't seem to be the case for CMPXCHG.  Just use
+                 * the number of tmud writes we did to decide the type (or
+                 * choose "32bit" for atomic reads, which has been fine).
                  */
                 uint32_t num_components;
                 if (is_load || atomic_add_replaced) {
@@ -339,7 +344,8 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
                 if (num_components == 1) {
                         config |= GENERAL_TMU_LOOKUP_TYPE_32BIT_UI;
                 } else {
-                        config |= GENERAL_TMU_LOOKUP_TYPE_VEC2 + num_components - 2;
+                        config |= GENERAL_TMU_LOOKUP_TYPE_VEC2 +
+                                  num_components - 2;
                 }
 
                 if (vir_in_nonuniform_control_flow(c)) {
@@ -360,8 +366,9 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
                                 offset = vir_ADD(c, offset,
                                                  vir_uniform_ui(c, const_offset));
                         }
-                        tmu = vir_ADD_dest(c, tmua, offset,
-                                           ntq_get_src(c, instr->src[offset_src], 0));
+                        struct qreg data =
+                                ntq_get_src(c, instr->src[offset_src], 0);
+                        tmu = vir_ADD_dest(c, tmua, offset, data);
                 } else {
                         if (const_offset != 0) {
                                 tmu = vir_ADD_dest(c, tmua, base_offset,
@@ -372,8 +379,9 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
                 }
 
                 if (config != ~0) {
-                        tmu->uniform = vir_get_uniform_index(c, QUNIFORM_CONSTANT,
-                                                             config);
+                        tmu->uniform =
+                                vir_get_uniform_index(c, QUNIFORM_CONSTANT,
+                                                      config);
                 }
 
                 if (vir_in_nonuniform_control_flow(c))
@@ -382,8 +390,10 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
                 vir_emit_thrsw(c);
 
                 /* Read the result, or wait for the TMU op to complete. */
-                for (int i = 0; i < nir_intrinsic_dest_components(instr); i++)
-                        ntq_store_dest(c, &instr->dest, i, vir_MOV(c, vir_LDTMU(c)));
+                for (int i = 0; i < nir_intrinsic_dest_components(instr); i++) {
+                        ntq_store_dest(c, &instr->dest, i,
+                                       vir_MOV(c, vir_LDTMU(c)));
+                }
 
                 if (nir_intrinsic_dest_components(instr) == 0)
                         vir_TMUWT(c);
@@ -419,7 +429,7 @@ ntq_store_dest(struct v3d_compile *c, nir_dest *dest, int chan,
                struct qreg result)
 {
         struct qinst *last_inst = NULL;
-        if (!list_empty(&c->cur_block->instructions))
+        if (!list_is_empty(&c->cur_block->instructions))
                 last_inst = (struct qinst *)c->cur_block->instructions.prev;
 
         assert((result.file == QFILE_TEMP &&
@@ -1375,7 +1385,7 @@ v3d_optimize_nir(struct nir_shader *s)
                 progress = false;
 
                 NIR_PASS_V(s, nir_lower_vars_to_ssa);
-                NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL);
+                NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL);
                 NIR_PASS(progress, s, nir_lower_phis_to_scalar);
                 NIR_PASS(progress, s, nir_copy_prop);
                 NIR_PASS(progress, s, nir_opt_remove_phis);
@@ -1973,8 +1983,10 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
         case nir_intrinsic_image_deref_load:
         case nir_intrinsic_image_deref_store:
         case nir_intrinsic_image_deref_atomic_add:
-        case nir_intrinsic_image_deref_atomic_min:
-        case nir_intrinsic_image_deref_atomic_max:
+        case nir_intrinsic_image_deref_atomic_imin:
+        case nir_intrinsic_image_deref_atomic_umin:
+        case nir_intrinsic_image_deref_atomic_imax:
+        case nir_intrinsic_image_deref_atomic_umax:
         case nir_intrinsic_image_deref_atomic_and:
         case nir_intrinsic_image_deref_atomic_or:
         case nir_intrinsic_image_deref_atomic_xor:
@@ -2659,6 +2671,7 @@ const nir_shader_compiler_options v3d_nir_options = {
         .lower_mul_high = true,
         .lower_wpos_pntc = true,
         .lower_rotate = true,
+        .lower_to_scalar = true,
 };
 
 /**