From: Eric Anholt Date: Mon, 4 Feb 2019 18:35:40 +0000 (-0800) Subject: v3d: Fix copy-propagation of input unpacks. X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=commitdiff_plain;h=940501a446ea954a4437c49d0683e5e7f9681bad v3d: Fix copy-propagation of input unpacks. I had a single function for "does this do float input unpacking" with two major flaws: It was missing the most common thing to try to copy propagate a f32 input nunpack to (the VFPACK to an FP16 render target) along with several other ALU ops, and also would try to propagate an f32 unpack into a VFMUL which only does f16 unpacks. instructions in affected programs: 659232 -> 655895 (-0.51%) uniforms in affected programs: 132613 -> 135336 (2.05%) and a couple of programs increase their thread counts. The uniforms hit appears to be a pattern in generated code of doing (-a >= a) comparisons, which when a is abs(b) can result in the abs instruction being copy propagated once but not fully DCEed. --- diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index cea26484a8f..1b6d2e7c2dc 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -787,7 +787,6 @@ bool vir_is_raw_mov(struct qinst *inst); bool vir_is_tex(struct qinst *inst); bool vir_is_add(struct qinst *inst); bool vir_is_mul(struct qinst *inst); -bool vir_is_float_input(struct qinst *inst); bool vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst); bool vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst); struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg); diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index 10105fbd861..077f9c1ecc9 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -132,38 +132,6 @@ vir_has_side_effects(struct v3d_compile *c, struct qinst *inst) return false; } -bool -vir_is_float_input(struct qinst *inst) -{ - /* XXX: More instrs */ - switch (inst->qpu.type) { - case V3D_QPU_INSTR_TYPE_BRANCH: - return false; - case V3D_QPU_INSTR_TYPE_ALU: - switch (inst->qpu.alu.add.op) { - case V3D_QPU_A_FADD: - case V3D_QPU_A_FSUB: - case V3D_QPU_A_FMIN: - case V3D_QPU_A_FMAX: - case V3D_QPU_A_FTOIN: - return true; - default: - break; - } - - switch (inst->qpu.alu.mul.op) { - case V3D_QPU_M_FMOV: - case V3D_QPU_M_VFMUL: - case V3D_QPU_M_FMUL: - return true; - default: - break; - } - } - - return false; -} - bool vir_is_raw_mov(struct qinst *inst) { diff --git a/src/broadcom/compiler/vir_opt_copy_propagate.c b/src/broadcom/compiler/vir_opt_copy_propagate.c index 2a22a1b5521..dc35701e3db 100644 --- a/src/broadcom/compiler/vir_opt_copy_propagate.c +++ b/src/broadcom/compiler/vir_opt_copy_propagate.c @@ -151,13 +151,36 @@ try_copy_prop(struct v3d_compile *c, struct qinst *inst, struct qinst **movs) * would be the same between the two * instructions. */ - if (vir_is_float_input(inst) != - vir_is_float_input(mov)) { + if (v3d_qpu_unpacks_f32(&inst->qpu) != + v3d_qpu_unpacks_f32(&mov->qpu) || + v3d_qpu_unpacks_f16(&inst->qpu) != + v3d_qpu_unpacks_f16(&mov->qpu)) { continue; } + /* No composing the unpacks. */ if (vir_has_unpack(inst, i)) continue; + + /* these ops can't represent abs. */ + if (mov->qpu.alu.mul.a_unpack == V3D_QPU_UNPACK_ABS) { + switch (inst->qpu.alu.add.op) { + case V3D_QPU_A_VFPACK: + case V3D_QPU_A_FROUND: + case V3D_QPU_A_FTRUNC: + case V3D_QPU_A_FFLOOR: + case V3D_QPU_A_FCEIL: + case V3D_QPU_A_FDX: + case V3D_QPU_A_FDY: + case V3D_QPU_A_FTOIN: + case V3D_QPU_A_FTOIZ: + case V3D_QPU_A_FTOUZ: + case V3D_QPU_A_FTOC: + continue; + default: + break; + } + } } if (debug) { diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c index add2d2a23c8..12a9c32c831 100644 --- a/src/broadcom/qpu/qpu_instr.c +++ b/src/broadcom/qpu/qpu_instr.c @@ -867,3 +867,70 @@ v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst) return false; } + +bool +v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst) +{ + if (inst->type != V3D_QPU_INSTR_TYPE_ALU) + return false; + + switch (inst->alu.add.op) { + case V3D_QPU_A_FADD: + case V3D_QPU_A_FADDNF: + case V3D_QPU_A_FSUB: + case V3D_QPU_A_FMIN: + case V3D_QPU_A_FMAX: + case V3D_QPU_A_FCMP: + case V3D_QPU_A_FROUND: + case V3D_QPU_A_FTRUNC: + case V3D_QPU_A_FFLOOR: + case V3D_QPU_A_FCEIL: + case V3D_QPU_A_FDX: + case V3D_QPU_A_FDY: + case V3D_QPU_A_FTOIN: + case V3D_QPU_A_FTOIZ: + case V3D_QPU_A_FTOUZ: + case V3D_QPU_A_FTOC: + case V3D_QPU_A_VFPACK: + return true; + break; + default: + break; + } + + switch (inst->alu.mul.op) { + case V3D_QPU_M_FMOV: + case V3D_QPU_M_FMUL: + return true; + break; + default: + break; + } + + return false; +} +bool +v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst) +{ + if (inst->type != V3D_QPU_INSTR_TYPE_ALU) + return false; + + switch (inst->alu.add.op) { + case V3D_QPU_A_VFMIN: + case V3D_QPU_A_VFMAX: + return true; + break; + default: + break; + } + + switch (inst->alu.mul.op) { + case V3D_QPU_M_VFMUL: + return true; + break; + default: + break; + } + + return false; +} diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h index 1e2dcb78af6..a77430ff882 100644 --- a/src/broadcom/qpu/qpu_instr.h +++ b/src/broadcom/qpu/qpu_instr.h @@ -464,5 +464,7 @@ bool v3d_qpu_reads_flags(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; bool v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; bool v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo, const struct v3d_qpu_sig *sig) ATTRIBUTE_CONST; +bool v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; +bool v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; #endif