From 3022b4bd82c1f142aa0bfbe754e771f0fa0f26e6 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 13 Feb 2019 12:09:02 -0800 Subject: [PATCH] v3d: Kill off vir_PF(), which is hard to use right. You were allowed to pass in any old temp so that you could hopefully fold the PF up into the def of the temp. If we couldn't find one, it implicitly generated a MOV(nop, reg). However, that PF could have different behavior depending on whether the def being folded into was a float or int opcode, which the caller doesn't necessarily control. Due to the fragility of the function, just switch all callers over to vir_set_pf(). This also encourages the callers to use a _dest call for the inst they're putting the PF on, eliminating a bunch of temps in the pre-optimization VIR. shader-db says the change is in the noise: total instructions in shared programs: 6226247 -> 6227184 (0.02%) instructions in affected programs: 851068 -> 852005 (0.11%) --- src/broadcom/compiler/nir_to_vir.c | 59 +++++++++++++++++----------- src/broadcom/compiler/v3d_compiler.h | 2 - src/broadcom/compiler/vir.c | 45 --------------------- 3 files changed, 36 insertions(+), 70 deletions(-) diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index c0a606d8ef5..8dee69e90de 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -283,8 +283,10 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr, instr->num_components - 2); } - if (c->execute.file != QFILE_NULL) - vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ); + if (c->execute.file != QFILE_NULL) { + vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), c->execute), + V3D_QPU_PF_PUSHZ); + } struct qreg dest; if (config == ~0) @@ -398,7 +400,8 @@ ntq_store_dest(struct v3d_compile *c, nir_dest *dest, int chan, /* Set the flags to the current exec mask. */ c->cursor = vir_before_inst(last_inst); - vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ); + vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), c->execute), + V3D_QPU_PF_PUSHZ); c->cursor = vir_after_inst(last_inst); vir_set_cond(last_inst, V3D_QPU_COND_IFA); @@ -540,9 +543,9 @@ ntq_fsign(struct v3d_compile *c, struct qreg src) struct qreg t = vir_get_temp(c); vir_MOV_dest(c, t, vir_uniform_f(c, 0.0)); - vir_PF(c, vir_FMOV(c, src), V3D_QPU_PF_PUSHZ); + vir_set_pf(vir_FMOV_dest(c, vir_nop_reg(), src), V3D_QPU_PF_PUSHZ); vir_MOV_cond(c, V3D_QPU_COND_IFNA, t, vir_uniform_f(c, 1.0)); - vir_PF(c, vir_FMOV(c, src), V3D_QPU_PF_PUSHN); + vir_set_pf(vir_FMOV_dest(c, vir_nop_reg(), src), V3D_QPU_PF_PUSHN); vir_MOV_cond(c, V3D_QPU_COND_IFA, t, vir_uniform_f(c, -1.0)); return vir_MOV(c, t); } @@ -789,7 +792,8 @@ ntq_emit_bool_to_cond(struct v3d_compile *c, nir_src src) return cond; out: - vir_PF(c, ntq_get_src(c, src, 0), V3D_QPU_PF_PUSHZ); + vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), ntq_get_src(c, src, 0)), + V3D_QPU_PF_PUSHZ); return V3D_QPU_COND_IFNA; } @@ -873,7 +877,8 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr) break; case nir_op_i2b32: case nir_op_f2b32: - vir_PF(c, src[0], V3D_QPU_PF_PUSHZ); + vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), src[0]), + V3D_QPU_PF_PUSHZ); result = vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFNA, vir_uniform_ui(c, ~0), vir_uniform_ui(c, 0))); @@ -967,7 +972,8 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr) break; case nir_op_fcsel: - vir_PF(c, src[0], V3D_QPU_PF_PUSHZ); + vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), src[0]), + V3D_QPU_PF_PUSHZ); result = vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFNA, src[1], src[2])); break; @@ -1033,7 +1039,8 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr) break; case nir_op_uadd_carry: - vir_PF(c, vir_ADD(c, src[0], src[1]), V3D_QPU_PF_PUSHC); + vir_set_pf(vir_ADD_dest(c, vir_nop_reg(), src[0], src[1]), + V3D_QPU_PF_PUSHC); result = vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFA, vir_uniform_ui(c, ~0), vir_uniform_ui(c, 0))); @@ -1832,7 +1839,7 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) break; case nir_intrinsic_load_helper_invocation: - vir_PF(c, vir_MSF(c), V3D_QPU_PF_PUSHZ); + vir_set_pf(vir_MSF_dest(c, vir_nop_reg()), V3D_QPU_PF_PUSHZ); ntq_store_dest(c, &instr->dest, 0, vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFA, vir_uniform_ui(c, ~0), @@ -1886,7 +1893,8 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) case nir_intrinsic_discard: if (c->execute.file != QFILE_NULL) { - vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ); + vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), c->execute), + V3D_QPU_PF_PUSHZ); vir_set_cond(vir_SETMSF_dest(c, vir_nop_reg(), vir_uniform_ui(c, 0)), V3D_QPU_COND_IFA); @@ -2103,7 +2111,7 @@ ntq_emit_nonuniform_if(struct v3d_compile *c, nir_if *if_stmt) /* Jump to ELSE if nothing is active for THEN, otherwise fall * through. */ - vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ); + vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), c->execute), V3D_QPU_PF_PUSHZ); vir_BRANCH(c, V3D_QPU_BRANCH_COND_ALLNA); vir_link_blocks(c->cur_block, else_block); vir_link_blocks(c->cur_block, then_block); @@ -2117,14 +2125,16 @@ ntq_emit_nonuniform_if(struct v3d_compile *c, nir_if *if_stmt) * active channels update their execute flags to point to * ENDIF */ - vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ); + vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), c->execute), + V3D_QPU_PF_PUSHZ); vir_MOV_cond(c, V3D_QPU_COND_IFA, c->execute, vir_uniform_ui(c, after_block->index)); /* If everything points at ENDIF, then jump there immediately. */ - vir_PF(c, vir_XOR(c, c->execute, - vir_uniform_ui(c, after_block->index)), - V3D_QPU_PF_PUSHZ); + vir_set_pf(vir_XOR_dest(c, vir_nop_reg(), + c->execute, + vir_uniform_ui(c, after_block->index)), + V3D_QPU_PF_PUSHZ); vir_BRANCH(c, V3D_QPU_BRANCH_COND_ALLA); vir_link_blocks(c->cur_block, after_block); vir_link_blocks(c->cur_block, else_block); @@ -2159,13 +2169,15 @@ ntq_emit_jump(struct v3d_compile *c, nir_jump_instr *jump) { switch (jump->type) { case nir_jump_break: - vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ); + vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), c->execute), + V3D_QPU_PF_PUSHZ); vir_MOV_cond(c, V3D_QPU_COND_IFA, c->execute, vir_uniform_ui(c, c->loop_break_block->index)); break; case nir_jump_continue: - vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ); + vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), c->execute), + V3D_QPU_PF_PUSHZ); vir_MOV_cond(c, V3D_QPU_COND_IFA, c->execute, vir_uniform_ui(c, c->loop_cont_block->index)); break; @@ -2251,13 +2263,14 @@ ntq_emit_loop(struct v3d_compile *c, nir_loop *loop) * * XXX: Use the .ORZ flags update, instead. */ - vir_PF(c, vir_XOR(c, - c->execute, - vir_uniform_ui(c, c->loop_cont_block->index)), - V3D_QPU_PF_PUSHZ); + vir_set_pf(vir_XOR_dest(c, + vir_nop_reg(), + c->execute, + vir_uniform_ui(c, c->loop_cont_block->index)), + V3D_QPU_PF_PUSHZ); vir_MOV_cond(c, V3D_QPU_COND_IFA, c->execute, vir_uniform_ui(c, 0)); - vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ); + vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), c->execute), V3D_QPU_PF_PUSHZ); struct qinst *branch = vir_BRANCH(c, V3D_QPU_BRANCH_COND_ANYA); /* Pixels that were not dispatched or have been discarded should not diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index bb4b5d0b3b2..d29579b8f35 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -837,8 +837,6 @@ bool vir_init_reg_sets(struct v3d_compiler *compiler); bool v3d_gl_format_is_return_32(GLenum format); -void vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf); - static inline bool quniform_contents_is_texture_p0(enum quniform_contents contents) { diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index 628466a6765..c0fb6b92c22 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -1059,51 +1059,6 @@ vir_uniform(struct v3d_compile *c, return vir_reg(QFILE_UNIF, uniform); } -static bool -vir_can_set_flags(struct v3d_compile *c, struct qinst *inst) -{ - if (c->devinfo->ver >= 40 && (v3d_qpu_reads_vpm(&inst->qpu) || - v3d_qpu_uses_sfu(&inst->qpu))) { - return false; - } - - if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU || - (inst->qpu.alu.add.op == V3D_QPU_A_NOP && - inst->qpu.alu.mul.op == V3D_QPU_M_NOP)) { - return false; - } - - return true; -} - -void -vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf) -{ - struct qinst *last_inst = NULL; - - if (!list_empty(&c->cur_block->instructions)) { - last_inst = (struct qinst *)c->cur_block->instructions.prev; - - /* Can't stuff the PF into the last last inst if our cursor - * isn't pointing after it. - */ - struct vir_cursor after_inst = vir_after_inst(last_inst); - if (c->cursor.mode != after_inst.mode || - c->cursor.link != after_inst.link) - last_inst = NULL; - } - - if (src.file != QFILE_TEMP || - !c->defs[src.index] || - last_inst != c->defs[src.index] || - !vir_can_set_flags(c, last_inst)) { - /* XXX: Make the MOV be the appropriate type */ - last_inst = vir_MOV_dest(c, vir_nop_reg(), src); - } - - vir_set_pf(last_inst, pf); -} - #define OPTPASS(func) \ do { \ bool stage_progress = func(c); \ -- 2.30.2