From: Eric Anholt Date: Sat, 29 Dec 2018 00:31:07 +0000 (-0800) Subject: v3d: Fold comparisons for IF conditions into the flags for the IF. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=5e9ee6e8419af5089809d4116022af2cb8ca9237;p=mesa.git v3d: Fold comparisons for IF conditions into the flags for the IF. total instructions in shared programs: 6193810 -> 6192844 (-0.02%) instructions in affected programs: 800373 -> 799407 (-0.12%) --- diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index e2cbae4174b..9d208530612 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -1736,19 +1736,33 @@ ntq_emit_if(struct v3d_compile *c, nir_if *if_stmt) was_top_level = true; } - /* Set A for executing (execute == 0) and jumping (if->condition == - * 0) channels, and then update execute flags for those to point to - * the ELSE block. - * - * XXX perf: we could reuse ntq_emit_comparison() to generate our if - * condition, and the .uf field to ignore non-executing channels, to - * reduce the overhead of if statements. + /* Set up the flags for the IF condition (taking the THEN branch). */ + nir_alu_instr *if_condition_alu = ntq_get_alu_parent(if_stmt->condition); + enum v3d_qpu_cond cond; + if (!if_condition_alu || + !ntq_emit_comparison(c, if_condition_alu, &cond)) { + vir_PF(c, ntq_get_src(c, if_stmt->condition, 0), + V3D_QPU_PF_PUSHZ); + cond = V3D_QPU_COND_IFNA; + } + + /* Update the flags+cond to mean "Taking the ELSE branch (!cond) and + * was previously active (execute Z) for updating the exec flags. */ - vir_PF(c, vir_OR(c, - c->execute, - ntq_get_src(c, if_stmt->condition, 0)), - V3D_QPU_PF_PUSHZ); - vir_MOV_cond(c, V3D_QPU_COND_IFA, + if (was_top_level) { + cond = v3d_qpu_cond_invert(cond); + } else { + struct qinst *inst = vir_MOV_dest(c, vir_reg(QFILE_NULL, 0), + c->execute); + if (cond == V3D_QPU_COND_IFA) { + vir_set_uf(inst, V3D_QPU_UF_NORNZ); + } else { + vir_set_uf(inst, V3D_QPU_UF_ANDZ); + cond = V3D_QPU_COND_IFA; + } + } + + vir_MOV_cond(c, cond, c->execute, vir_uniform_ui(c, else_block->index)); diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index c7f2f148ac0..717d85890b0 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -751,6 +751,7 @@ struct qreg vir_emit_def(struct v3d_compile *c, struct qinst *inst); struct qinst *vir_emit_nondef(struct v3d_compile *c, struct qinst *inst); void vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond); void vir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf); +void vir_set_uf(struct qinst *inst, enum v3d_qpu_uf uf); void vir_set_unpack(struct qinst *inst, int src, enum v3d_qpu_input_unpack unpack); diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index f49140bcb90..c553e724616 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -291,6 +291,17 @@ vir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf) } } +void +vir_set_uf(struct qinst *inst, enum v3d_qpu_uf uf) +{ + if (vir_is_add(inst)) { + inst->qpu.flags.auf = uf; + } else { + assert(vir_is_mul(inst)); + inst->qpu.flags.muf = uf; + } +} + #if 0 uint8_t vir_channels_written(struct qinst *inst) diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c index 338a1887f03..add2d2a23c8 100644 --- a/src/broadcom/qpu/qpu_instr.c +++ b/src/broadcom/qpu/qpu_instr.c @@ -499,6 +499,23 @@ v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op) return 0; } +enum v3d_qpu_cond +v3d_qpu_cond_invert(enum v3d_qpu_cond cond) +{ + switch (cond) { + case V3D_QPU_COND_IFA: + return V3D_QPU_COND_IFNA; + case V3D_QPU_COND_IFNA: + return V3D_QPU_COND_IFA; + case V3D_QPU_COND_IFB: + return V3D_QPU_COND_IFNB; + case V3D_QPU_COND_IFNB: + return V3D_QPU_COND_IFB; + default: + unreachable("Non-invertible cond"); + } +} + bool v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr) { diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h index 0da61fbb5d5..1e2dcb78af6 100644 --- a/src/broadcom/qpu/qpu_instr.h +++ b/src/broadcom/qpu/qpu_instr.h @@ -398,6 +398,8 @@ const char *v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack); const char *v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond); const char *v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign); +enum v3d_qpu_cond v3d_qpu_cond_invert(enum v3d_qpu_cond cond) ATTRIBUTE_CONST; + bool v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op); bool v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op); int v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op);