v3d: Fix copy-propagation of input unpacks.
authorEric Anholt <eric@anholt.net>
Mon, 4 Feb 2019 18:35:40 +0000 (10:35 -0800)
committerEric Anholt <eric@anholt.net>
Tue, 5 Feb 2019 23:46:04 +0000 (15:46 -0800)
I had a single function for "does this do float input unpacking" with two
major flaws: It was missing the most common thing to try to copy propagate
a f32 input nunpack to (the VFPACK to an FP16 render target) along with
several other ALU ops, and also would try to propagate an f32 unpack into
a VFMUL which only does f16 unpacks.

instructions in affected programs: 659232 -> 655895 (-0.51%)
uniforms in affected programs: 132613 -> 135336 (2.05%)

and a couple of programs increase their thread counts.

The uniforms hit appears to be a pattern in generated code of doing (-a >=
a) comparisons, which when a is abs(b) can result in the abs instruction
being copy propagated once but not fully DCEed.

src/broadcom/compiler/v3d_compiler.h
src/broadcom/compiler/vir.c
src/broadcom/compiler/vir_opt_copy_propagate.c
src/broadcom/qpu/qpu_instr.c
src/broadcom/qpu/qpu_instr.h

index cea26484a8fcb7582282cbc77e4f99a39fce2bfe..1b6d2e7c2dced0e2e451be76f90cfa32714a57f2 100644 (file)
@@ -787,7 +787,6 @@ bool vir_is_raw_mov(struct qinst *inst);
 bool vir_is_tex(struct qinst *inst);
 bool vir_is_add(struct qinst *inst);
 bool vir_is_mul(struct qinst *inst);
-bool vir_is_float_input(struct qinst *inst);
 bool vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst);
 bool vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst);
 struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg);
index 10105fbd8611e9ea7b2ff91ddd40097dc372ffcb..077f9c1ecc9713c27f87435f59f6772efaeb4544 100644 (file)
@@ -132,38 +132,6 @@ vir_has_side_effects(struct v3d_compile *c, struct qinst *inst)
         return false;
 }
 
-bool
-vir_is_float_input(struct qinst *inst)
-{
-        /* XXX: More instrs */
-        switch (inst->qpu.type) {
-        case V3D_QPU_INSTR_TYPE_BRANCH:
-                return false;
-        case V3D_QPU_INSTR_TYPE_ALU:
-                switch (inst->qpu.alu.add.op) {
-                case V3D_QPU_A_FADD:
-                case V3D_QPU_A_FSUB:
-                case V3D_QPU_A_FMIN:
-                case V3D_QPU_A_FMAX:
-                case V3D_QPU_A_FTOIN:
-                        return true;
-                default:
-                        break;
-                }
-
-                switch (inst->qpu.alu.mul.op) {
-                case V3D_QPU_M_FMOV:
-                case V3D_QPU_M_VFMUL:
-                case V3D_QPU_M_FMUL:
-                        return true;
-                default:
-                        break;
-                }
-        }
-
-        return false;
-}
-
 bool
 vir_is_raw_mov(struct qinst *inst)
 {
index 2a22a1b552138f870ce313349b6822dff27de141..dc35701e3db18ece677ca47671820d500494d57c 100644 (file)
@@ -151,13 +151,36 @@ try_copy_prop(struct v3d_compile *c, struct qinst *inst, struct qinst **movs)
                          * would be the same between the two
                          * instructions.
                          */
-                        if (vir_is_float_input(inst) !=
-                            vir_is_float_input(mov)) {
+                        if (v3d_qpu_unpacks_f32(&inst->qpu) !=
+                            v3d_qpu_unpacks_f32(&mov->qpu) ||
+                            v3d_qpu_unpacks_f16(&inst->qpu) !=
+                            v3d_qpu_unpacks_f16(&mov->qpu)) {
                                 continue;
                         }
+
                         /* No composing the unpacks. */
                         if (vir_has_unpack(inst, i))
                             continue;
+
+                        /* these ops can't represent abs. */
+                        if (mov->qpu.alu.mul.a_unpack == V3D_QPU_UNPACK_ABS) {
+                                switch (inst->qpu.alu.add.op) {
+                                case V3D_QPU_A_VFPACK:
+                                case V3D_QPU_A_FROUND:
+                                case V3D_QPU_A_FTRUNC:
+                                case V3D_QPU_A_FFLOOR:
+                                case V3D_QPU_A_FCEIL:
+                                case V3D_QPU_A_FDX:
+                                case V3D_QPU_A_FDY:
+                                case V3D_QPU_A_FTOIN:
+                                case V3D_QPU_A_FTOIZ:
+                                case V3D_QPU_A_FTOUZ:
+                                case V3D_QPU_A_FTOC:
+                                        continue;
+                                default:
+                                        break;
+                                }
+                        }
                 }
 
                 if (debug) {
index add2d2a23c8783c16490f4db1f95a4a5261b44e4..12a9c32c83108a2dabd697198adcd54bffdf2de7 100644 (file)
@@ -867,3 +867,70 @@ v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst)
 
         return false;
 }
+
+bool
+v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst)
+{
+        if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
+                return false;
+
+        switch (inst->alu.add.op) {
+        case V3D_QPU_A_FADD:
+        case V3D_QPU_A_FADDNF:
+        case V3D_QPU_A_FSUB:
+        case V3D_QPU_A_FMIN:
+        case V3D_QPU_A_FMAX:
+        case V3D_QPU_A_FCMP:
+        case V3D_QPU_A_FROUND:
+        case V3D_QPU_A_FTRUNC:
+        case V3D_QPU_A_FFLOOR:
+        case V3D_QPU_A_FCEIL:
+        case V3D_QPU_A_FDX:
+        case V3D_QPU_A_FDY:
+        case V3D_QPU_A_FTOIN:
+        case V3D_QPU_A_FTOIZ:
+        case V3D_QPU_A_FTOUZ:
+        case V3D_QPU_A_FTOC:
+        case V3D_QPU_A_VFPACK:
+                return true;
+                break;
+        default:
+                break;
+        }
+
+        switch (inst->alu.mul.op) {
+        case V3D_QPU_M_FMOV:
+        case V3D_QPU_M_FMUL:
+                return true;
+                break;
+        default:
+                break;
+        }
+
+        return false;
+}
+bool
+v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst)
+{
+        if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
+                return false;
+
+        switch (inst->alu.add.op) {
+        case V3D_QPU_A_VFMIN:
+        case V3D_QPU_A_VFMAX:
+                return true;
+                break;
+        default:
+                break;
+        }
+
+        switch (inst->alu.mul.op) {
+        case V3D_QPU_M_VFMUL:
+                return true;
+                break;
+        default:
+                break;
+        }
+
+        return false;
+}
index 1e2dcb78af6e9179f07e360b59e263c80c739044..a77430ff8827e76211d5dafe8b57b3ed6c8c3f8a 100644 (file)
@@ -464,5 +464,7 @@ bool v3d_qpu_reads_flags(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
 bool v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
 bool v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
                                 const struct v3d_qpu_sig *sig) ATTRIBUTE_CONST;
+bool v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
+bool v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
 
 #endif