instr->intrinsic == nir_intrinsic_shared_atomic_add) &&
(tmu_op == V3D_TMU_OP_WRITE_AND_READ_INC ||
tmu_op == V3D_TMU_OP_WRITE_OR_READ_DEC));
+
bool is_store = (instr->intrinsic == nir_intrinsic_store_ssbo ||
instr->intrinsic == nir_intrinsic_store_scratch ||
instr->intrinsic == nir_intrinsic_store_shared);
+
+ bool is_load = (instr->intrinsic == nir_intrinsic_load_uniform ||
+ instr->intrinsic == nir_intrinsic_load_ubo ||
+ instr->intrinsic == nir_intrinsic_load_ssbo ||
+ instr->intrinsic == nir_intrinsic_load_scratch ||
+ instr->intrinsic == nir_intrinsic_load_shared);
+
bool has_index = !is_shared_or_scratch;
int offset_src;
- int tmu_writes = 1; /* address */
if (instr->intrinsic == nir_intrinsic_load_uniform) {
offset_src = 0;
} else if (instr->intrinsic == nir_intrinsic_load_ssbo ||
offset_src = 0 + has_index;
} else if (is_store) {
offset_src = 1 + has_index;
- for (int i = 0; i < instr->num_components; i++) {
- vir_MOV_dest(c,
- vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUD),
- ntq_get_src(c, instr->src[0], i));
- tmu_writes++;
- }
} else {
offset_src = 0 + has_index;
- vir_MOV_dest(c,
- vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUD),
- ntq_get_src(c, instr->src[1 + has_index], 0));
- tmu_writes++;
- if (tmu_op == V3D_TMU_OP_WRITE_CMPXCHG_READ_FLUSH) {
- vir_MOV_dest(c,
- vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUD),
- ntq_get_src(c, instr->src[2 + has_index],
- 0));
- tmu_writes++;
- }
}
bool dynamic_src = !nir_src_is_const(instr->src[offset_src]);
if (!dynamic_src)
const_offset = nir_src_as_uint(instr->src[offset_src]);
- /* Make sure we won't exceed the 16-entry TMU fifo if each thread is
- * storing at the same time.
- */
- while (tmu_writes > 16 / c->threads)
- c->threads /= 2;
-
struct qreg offset;
if (instr->intrinsic == nir_intrinsic_load_uniform) {
const_offset += nir_intrinsic_base(instr);
1 : 0]));
}
+ int tmu_writes = 1; /* address */
+ if (is_store) {
+ for (int i = 0; i < instr->num_components; i++) {
+ vir_MOV_dest(c,
+ vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUD),
+ ntq_get_src(c, instr->src[0], i));
+ tmu_writes++;
+ }
+ } else if (!is_load && !atomic_add_replaced) {
+ vir_MOV_dest(c,
+ vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUD),
+ ntq_get_src(c, instr->src[1 + has_index], 0));
+ tmu_writes++;
+ if (tmu_op == V3D_TMU_OP_WRITE_CMPXCHG_READ_FLUSH) {
+ vir_MOV_dest(c,
+ vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUD),
+ ntq_get_src(c, instr->src[2 + has_index],
+ 0));
+ tmu_writes++;
+ }
+ }
+
+ /* Make sure we won't exceed the 16-entry TMU fifo if each thread is
+ * storing at the same time.
+ */
+ while (tmu_writes > 16 / c->threads)
+ c->threads /= 2;
+
/* The spec says that for atomics, the TYPE field is ignored, but that
* doesn't seem to be the case for CMPXCHG. Just use the number of
* tmud writes we did to decide the type (or choose "32bit" for atomic