--- /dev/null
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file vc4_opt_vpm_writes.c
+ *
+ * This modifies instructions that generate the value consumed by a VPM write
+ * to write directly into the VPM.
+ */
+
+#include "vc4_qir.h"
+
+bool
+qir_opt_vpm_writes(struct vc4_compile *c)
+{
+ if (c->stage == QSTAGE_FRAG)
+ return false;
+
+ bool progress = false;
+ struct simple_node *node;
+ struct qinst *defs[c->num_temps];
+ struct qinst *vpm_writes[64] = { 0 };
+ uint32_t use_count[c->num_temps];
+ uint32_t vpm_write_count = 0;
+ memset(&defs, 0, sizeof(defs));
+ memset(&use_count, 0, sizeof(use_count));
+
+ foreach(node, &c->instructions) {
+ struct qinst *inst = (struct qinst *)node;
+
+ switch (inst->dst.file) {
+ case QFILE_TEMP:
+ defs[inst->dst.index] = inst;
+ break;
+ case QFILE_VPM:
+ vpm_writes[vpm_write_count++] = inst;
+ break;
+ default:
+ break;
+ }
+
+ for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
+ if (inst->src[i].file == QFILE_TEMP)
+ use_count[inst->src[i].index]++;
+ }
+ }
+
+ for (int i = 0; i < vpm_write_count; i++) {
+ if (vpm_writes[i]->op != QOP_MOV ||
+ vpm_writes[i]->src[0].file != QFILE_TEMP) {
+ continue;
+ }
+
+ uint32_t temp = vpm_writes[i]->src[0].index;
+ if (use_count[temp] != 1)
+ continue;
+
+ struct qinst *inst = defs[temp];
+ if (qir_is_multi_instruction(inst))
+ continue;
+
+ if (qir_depends_on_flags(inst))
+ continue;
+
+ if (qir_has_side_effects(c, inst))
+ continue;
+
+ /* A QOP_TEX_RESULT destination is r4, so we can't move
+ * accesses to it past another QOP_TEX_RESULT which would
+ * update it.
+ */
+ int src;
+ for (src = 0; src < qir_get_op_nsrc(inst->op); src++) {
+ if (inst->src[src].file == QFILE_TEMP) {
+ if (defs[inst->src[src].index]->op ==
+ QOP_TEX_RESULT) {
+ break;
+ }
+ }
+ }
+ if (src != qir_get_op_nsrc(inst->op))
+ continue;
+
+ /* Move the generating instruction to the end of the program
+ * to maintain the order of the VPM writes.
+ */
+ move_to_tail(&vpm_writes[i]->link, &inst->link);
+ qir_remove_instruction(vpm_writes[i]);
+
+ inst->dst.file = QFILE_VPM;
+ inst->dst.index = 0;
+
+ progress = true;
+ }
+
+ return progress;
+}
const char *name;
uint8_t ndst, nsrc;
bool has_side_effects;
+ bool multi_instruction;
};
static const struct qir_op_info qir_op_info[] = {
[QOP_NOT] = { "not", 1, 1 },
[QOP_SF] = { "sf", 0, 1 },
- [QOP_SEL_X_0_NS] = { "fsel_x_0_ns", 1, 1 },
- [QOP_SEL_X_0_NC] = { "fsel_x_0_nc", 1, 1 },
- [QOP_SEL_X_0_ZS] = { "fsel_x_0_zs", 1, 1 },
- [QOP_SEL_X_0_ZC] = { "fsel_x_0_zc", 1, 1 },
- [QOP_SEL_X_Y_NS] = { "fsel_x_y_ns", 1, 2 },
- [QOP_SEL_X_Y_NC] = { "fsel_x_y_nc", 1, 2 },
- [QOP_SEL_X_Y_ZS] = { "fsel_x_y_zs", 1, 2 },
- [QOP_SEL_X_Y_ZC] = { "fsel_x_y_zc", 1, 2 },
-
- [QOP_RCP] = { "rcp", 1, 1 },
- [QOP_RSQ] = { "rsq", 1, 1 },
- [QOP_EXP2] = { "exp2", 1, 2 },
- [QOP_LOG2] = { "log2", 1, 2 },
- [QOP_PACK_COLORS] = { "pack_colors", 1, 4 },
- [QOP_PACK_SCALED] = { "pack_scaled", 1, 2 },
+ [QOP_SEL_X_0_NS] = { "fsel_x_0_ns", 1, 1, false, true },
+ [QOP_SEL_X_0_NC] = { "fsel_x_0_nc", 1, 1, false, true },
+ [QOP_SEL_X_0_ZS] = { "fsel_x_0_zs", 1, 1, false, true },
+ [QOP_SEL_X_0_ZC] = { "fsel_x_0_zc", 1, 1, false, true },
+ [QOP_SEL_X_Y_NS] = { "fsel_x_y_ns", 1, 2, false, true },
+ [QOP_SEL_X_Y_NC] = { "fsel_x_y_nc", 1, 2, false, true },
+ [QOP_SEL_X_Y_ZS] = { "fsel_x_y_zs", 1, 2, false, true },
+ [QOP_SEL_X_Y_ZC] = { "fsel_x_y_zc", 1, 2, false, true },
+
+ [QOP_RCP] = { "rcp", 1, 1, false, true },
+ [QOP_RSQ] = { "rsq", 1, 1, false, true },
+ [QOP_EXP2] = { "exp2", 1, 2, false, true },
+ [QOP_LOG2] = { "log2", 1, 2, false, true },
+ [QOP_PACK_COLORS] = { "pack_colors", 1, 4, false, true },
+ [QOP_PACK_SCALED] = { "pack_scaled", 1, 2, false, true },
[QOP_VPM_READ] = { "vpm_read", 0, 1, true },
[QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true },
[QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true },
return qir_op_info[inst->op].has_side_effects;
}
+bool
+qir_is_multi_instruction(struct qinst *inst)
+{
+ return qir_op_info[inst->op].multi_instruction;
+}
+
bool
qir_depends_on_flags(struct qinst *inst)
{
OPTPASS(qir_opt_copy_propagation);
OPTPASS(qir_opt_dead_code);
OPTPASS(qir_opt_small_immediates);
+ OPTPASS(qir_opt_vpm_writes);
if (!progress)
break;