v3d: Fold comparisons for IF conditions into the flags for the IF.
authorEric Anholt <eric@anholt.net>
Sat, 29 Dec 2018 00:31:07 +0000 (16:31 -0800)
committerEric Anholt <eric@anholt.net>
Wed, 2 Jan 2019 22:12:29 +0000 (14:12 -0800)
total instructions in shared programs: 6193810 -> 6192844 (-0.02%)
instructions in affected programs: 800373 -> 799407 (-0.12%)

src/broadcom/compiler/nir_to_vir.c
src/broadcom/compiler/v3d_compiler.h
src/broadcom/compiler/vir.c
src/broadcom/qpu/qpu_instr.c
src/broadcom/qpu/qpu_instr.h

index e2cbae4174b20722fc9387eff67f96584738d28e..9d20853061252b57bb8430ab83fa96b484a9f5a7 100644 (file)
@@ -1736,19 +1736,33 @@ ntq_emit_if(struct v3d_compile *c, nir_if *if_stmt)
                 was_top_level = true;
         }
 
-        /* Set A for executing (execute == 0) and jumping (if->condition ==
-         * 0) channels, and then update execute flags for those to point to
-         * the ELSE block.
-         *
-         * XXX perf: we could reuse ntq_emit_comparison() to generate our if
-         * condition, and the .uf field to ignore non-executing channels, to
-         * reduce the overhead of if statements.
+        /* Set up the flags for the IF condition (taking the THEN branch). */
+        nir_alu_instr *if_condition_alu = ntq_get_alu_parent(if_stmt->condition);
+        enum v3d_qpu_cond cond;
+        if (!if_condition_alu ||
+            !ntq_emit_comparison(c, if_condition_alu, &cond)) {
+                vir_PF(c, ntq_get_src(c, if_stmt->condition, 0),
+                       V3D_QPU_PF_PUSHZ);
+                cond = V3D_QPU_COND_IFNA;
+        }
+
+        /* Update the flags+cond to mean "Taking the ELSE branch (!cond) and
+         * was previously active (execute Z) for updating the exec flags.
          */
-        vir_PF(c, vir_OR(c,
-                         c->execute,
-                         ntq_get_src(c, if_stmt->condition, 0)),
-                V3D_QPU_PF_PUSHZ);
-        vir_MOV_cond(c, V3D_QPU_COND_IFA,
+        if (was_top_level) {
+                cond = v3d_qpu_cond_invert(cond);
+        } else {
+                struct qinst *inst = vir_MOV_dest(c, vir_reg(QFILE_NULL, 0),
+                                                  c->execute);
+                if (cond == V3D_QPU_COND_IFA) {
+                        vir_set_uf(inst, V3D_QPU_UF_NORNZ);
+                } else {
+                        vir_set_uf(inst, V3D_QPU_UF_ANDZ);
+                        cond = V3D_QPU_COND_IFA;
+                }
+        }
+
+        vir_MOV_cond(c, cond,
                      c->execute,
                      vir_uniform_ui(c, else_block->index));
 
index c7f2f148ac02e85bd7bcb34508ff4446aadd472b..717d85890b0ebb4d8f64872bf2f265581de805ee 100644 (file)
@@ -751,6 +751,7 @@ struct qreg vir_emit_def(struct v3d_compile *c, struct qinst *inst);
 struct qinst *vir_emit_nondef(struct v3d_compile *c, struct qinst *inst);
 void vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond);
 void vir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf);
+void vir_set_uf(struct qinst *inst, enum v3d_qpu_uf uf);
 void vir_set_unpack(struct qinst *inst, int src,
                     enum v3d_qpu_input_unpack unpack);
 
index f49140bcb909cc32f0d074c0a7b2f83713783090..c553e72461670ec91c4cd1c111724bc5f4449168 100644 (file)
@@ -291,6 +291,17 @@ vir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf)
         }
 }
 
+void
+vir_set_uf(struct qinst *inst, enum v3d_qpu_uf uf)
+{
+        if (vir_is_add(inst)) {
+                inst->qpu.flags.auf = uf;
+        } else {
+                assert(vir_is_mul(inst));
+                inst->qpu.flags.muf = uf;
+        }
+}
+
 #if 0
 uint8_t
 vir_channels_written(struct qinst *inst)
index 338a1887f031e7c45b951a412d53d2cb50e0fad0..add2d2a23c8783c16490f4db1f95a4a5261b44e4 100644 (file)
@@ -499,6 +499,23 @@ v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op)
                 return 0;
 }
 
+enum v3d_qpu_cond
+v3d_qpu_cond_invert(enum v3d_qpu_cond cond)
+{
+        switch (cond) {
+        case V3D_QPU_COND_IFA:
+                return V3D_QPU_COND_IFNA;
+        case V3D_QPU_COND_IFNA:
+                return V3D_QPU_COND_IFA;
+        case V3D_QPU_COND_IFB:
+                return V3D_QPU_COND_IFNB;
+        case V3D_QPU_COND_IFNB:
+                return V3D_QPU_COND_IFB;
+        default:
+                unreachable("Non-invertible cond");
+        }
+}
+
 bool
 v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr)
 {
index 0da61fbb5d51769e20f463bb629f24e104e0dc56..1e2dcb78af6e9179f07e360b59e263c80c739044 100644 (file)
@@ -398,6 +398,8 @@ const char *v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack);
 const char *v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond);
 const char *v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign);
 
+enum v3d_qpu_cond v3d_qpu_cond_invert(enum v3d_qpu_cond cond) ATTRIBUTE_CONST;
+
 bool v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op);
 bool v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op);
 int v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op);