break;
case QOP_FMUL:
- if (replace_x_0_with_0(c, inst, 0) ||
- replace_x_0_with_0(c, inst, 1) ||
- fmul_replace_one(c, inst, 0) ||
- fmul_replace_one(c, inst, 1)) {
+ if (!inst->dst.pack &&
+ (replace_x_0_with_0(c, inst, 0) ||
+ replace_x_0_with_0(c, inst, 1) ||
+ fmul_replace_one(c, inst, 0) ||
+ fmul_replace_one(c, inst, 1))) {
progress = true;
break;
}
break;
case QOP_MUL24:
- if (replace_x_0_with_0(c, inst, 0) ||
- replace_x_0_with_0(c, inst, 1)) {
+ if (!inst->dst.pack &&
+ (replace_x_0_with_0(c, inst, 0) ||
+ replace_x_0_with_0(c, inst, 1))) {
progress = true;
break;
}
c->ubo_ranges[array_id].used = false;
}
+static bool
+ntq_src_is_only_ssa_def_user(nir_src *src)
+{
+ if (!src->is_ssa)
+ return false;
+
+ if (!list_empty(&src->ssa->if_uses))
+ return false;
+
+ return (src->ssa->uses.next == &src->use_link &&
+ src->ssa->uses.next->next == &src->ssa->uses);
+}
+
+/**
+ * In general, emits a nir_pack_unorm_4x8 as a series of MOVs with the pack
+ * bit set.
+ *
+ * However, as an optimization, it tries to find the instructions generating
+ * the sources to be packed and just emit the pack flag there, if possible.
+ */
+static void
+ntq_emit_pack_unorm_4x8(struct vc4_compile *c, nir_alu_instr *instr)
+{
+ struct qreg result = qir_get_temp(c);
+ struct nir_alu_instr *vec4 = NULL;
+
+ /* If packing from a vec4 op (as expected), identify it so that we can
+ * peek back at what generated its sources.
+ */
+ if (instr->src[0].src.is_ssa &&
+ instr->src[0].src.ssa->parent_instr->type == nir_instr_type_alu &&
+ nir_instr_as_alu(instr->src[0].src.ssa->parent_instr)->op ==
+ nir_op_vec4) {
+ vec4 = nir_instr_as_alu(instr->src[0].src.ssa->parent_instr);
+ }
+
+ for (int i = 0; i < 4; i++) {
+ int swiz = instr->src[0].swizzle[i];
+ struct qreg src;
+ if (vec4) {
+ src = ntq_get_src(c, vec4->src[swiz].src,
+ vec4->src[swiz].swizzle[0]);
+ } else {
+ src = ntq_get_src(c, instr->src[0].src, swiz);
+ }
+
+ if (vec4 &&
+ ntq_src_is_only_ssa_def_user(&vec4->src[swiz].src) &&
+ src.file == QFILE_TEMP &&
+ c->defs[src.index] &&
+ qir_is_mul(c->defs[src.index]) &&
+ !c->defs[src.index]->dst.pack) {
+ struct qinst *rewrite = c->defs[src.index];
+ c->defs[src.index] = NULL;
+ rewrite->dst = result;
+ rewrite->dst.pack = QPU_PACK_MUL_8A + i;
+ continue;
+ }
+
+ qir_PACK_8_F(c, result, src, i);
+ }
+
+ struct qreg *dest = ntq_get_dest(c, &instr->dest.dest);
+ *dest = result;
+}
+
static void
ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
{
}
if (instr->op == nir_op_pack_unorm_4x8) {
- struct qreg result = qir_get_temp(c);
-
- for (int i = 0; i < 4; i++) {
- qir_PACK_8_F(c, result,
- ntq_get_src(c, instr->src[0].src,
- instr->src[0].swizzle[i]),
- i);
- }
- struct qreg *dest = ntq_get_dest(c, &instr->dest.dest);
- *dest = result;
+ ntq_emit_pack_unorm_4x8(c, instr);
return;
}
inst->sf ? ".sf" : "");
qir_print_reg(c, inst->dst, true);
+ if (inst->dst.pack) {
+ if (qir_is_mul(inst)) {
+ switch (inst->dst.pack) {
+ case QPU_PACK_MUL_8888:
+ fprintf(stderr, ".8888");
+ break;
+ case QPU_PACK_MUL_8A:
+ fprintf(stderr, ".8a");
+ break;
+ case QPU_PACK_MUL_8B:
+ fprintf(stderr, ".8b");
+ break;
+ case QPU_PACK_MUL_8C:
+ fprintf(stderr, ".8c");
+ break;
+ case QPU_PACK_MUL_8D:
+ fprintf(stderr, ".8d");
+ break;
+ }
+ } else {
+ unreachable("packs only set up for MULs so far.\n");
+ }
+ }
for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
fprintf(stderr, ", ");
qir_print_reg(c, inst->src[i], false);