return inst;
}
-static uint64_t
-merge_fields(uint64_t merge,
- uint64_t add, uint64_t mul,
+static bool
+merge_fields(uint64_t *merge,
+ uint64_t a, uint64_t b,
uint64_t mask, uint64_t ignore)
{
- if ((add & mask) == ignore)
- return (merge & ~mask) | (mul & mask);
- else if ((mul & mask) == ignore)
- return (merge & ~mask) | (add & mask);
- else {
- assert((add & mask) == (mul & mask));
- return merge;
+ if ((a & mask) == ignore) {
+ *merge = (*merge & ~mask) | (b & mask);
+ } else if ((b & mask) == ignore) {
+ *merge = (*merge & ~mask) | (a & mask);
+ } else {
+ if ((a & mask) != (b & mask))
+ return false;
}
+
+ return true;
}
uint64_t
-qpu_inst(uint64_t add, uint64_t mul)
+qpu_merge_inst(uint64_t a, uint64_t b)
{
- uint64_t merge = ((add & ~QPU_WADDR_MUL_MASK) |
- (mul & ~QPU_WADDR_ADD_MASK));
+ uint64_t merge = a | b;
+ bool ok = true;
+
+ if (QPU_GET_FIELD(a, QPU_OP_ADD) != QPU_A_NOP &&
+ QPU_GET_FIELD(b, QPU_OP_ADD) != QPU_A_NOP)
+ return 0;
- merge = merge_fields(merge, add, mul, QPU_SIG_MASK,
- QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG));
+ if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP &&
+ QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
+ return 0;
- merge = merge_fields(merge, add, mul, QPU_RADDR_A_MASK,
- QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A));
- merge = merge_fields(merge, add, mul, QPU_RADDR_B_MASK,
- QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B));
+ ok = ok && merge_fields(&merge, a, b, QPU_SIG_MASK,
+ QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG));
- return merge;
+ /* Misc fields that have to match exactly. */
+ ok = ok && merge_fields(&merge, a, b, QPU_SF | QPU_WS | QPU_PM,
+ ~0);
+
+ ok = ok && merge_fields(&merge, a, b, QPU_RADDR_A_MASK,
+ QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A));
+ ok = ok && merge_fields(&merge, a, b, QPU_RADDR_B_MASK,
+ QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B));
+
+ ok = ok && merge_fields(&merge, a, b, QPU_WADDR_ADD_MASK,
+ QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD));
+ ok = ok && merge_fields(&merge, a, b, QPU_WADDR_MUL_MASK,
+ QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL));
+
+ if (ok)
+ return merge;
+ else
+ return 0;
}
uint64_t
struct qpu_reg src0, struct qpu_reg src1);
uint64_t qpu_m_alu2(enum qpu_op_mul op, struct qpu_reg dst,
struct qpu_reg src0, struct qpu_reg src1);
-uint64_t qpu_inst(uint64_t add, uint64_t mul);
+uint64_t qpu_merge_inst(uint64_t a, uint64_t b);
uint64_t qpu_load_imm_ui(struct qpu_reg dst, uint32_t val);
uint64_t qpu_set_sig(uint64_t inst, uint32_t sig);
uint64_t qpu_set_cond_add(uint64_t inst, uint32_t cond);
static struct schedule_node *
choose_instruction_to_schedule(struct choose_scoreboard *scoreboard,
- struct simple_node *schedule_list)
+ struct simple_node *schedule_list,
+ uint64_t prev_inst)
{
struct schedule_node *chosen = NULL;
struct simple_node *node;
if (pixel_scoreboard_too_soon(scoreboard, inst))
continue;
+ /* If we're trying to pair with another instruction, check
+ * that they're compatible.
+ */
+ if (prev_inst != 0) {
+ inst = qpu_merge_inst(prev_inst, inst);
+ if (!inst)
+ continue;
+ }
+
int prio = get_instruction_priority(inst);
/* Found a valid instruction. If nothing better comes along,
}
}
+static void
+mark_instruction_scheduled(struct simple_node *schedule_list,
+ struct schedule_node *node)
+{
+ if (!node)
+ return;
+
+ for (int i = node->child_count - 1; i >= 0; i--) {
+ struct schedule_node *child =
+ node->children[i];
+
+ child->parent_count--;
+ if (child->parent_count == 0)
+ insert_at_head(schedule_list, &child->link);
+ }
+}
+
static void
schedule_instructions(struct vc4_compile *c, struct simple_node *schedule_list)
{
while (!is_empty_list(schedule_list)) {
struct schedule_node *chosen =
choose_instruction_to_schedule(&scoreboard,
- schedule_list);
+ schedule_list,
+ 0);
+ struct schedule_node *merge = NULL;
/* If there are no valid instructions to schedule, drop a NOP
* in.
dump_state(schedule_list);
fprintf(stderr, "chose: ");
vc4_qpu_disasm(&inst, 1);
- fprintf(stderr, "\n\n");
+ fprintf(stderr, "\n");
}
- /* Schedule this instruction onto the QPU list. */
- if (chosen)
+ /* Schedule this instruction onto the QPU list. Also try to
+ * find an instruction to pair with it.
+ */
+ if (chosen) {
remove_from_list(&chosen->link);
+
+ merge = choose_instruction_to_schedule(&scoreboard,
+ schedule_list,
+ inst);
+ if (merge) {
+ remove_from_list(&merge->link);
+ inst = qpu_merge_inst(inst, merge->inst->inst);
+ assert(inst != 0);
+
+ if (debug) {
+ fprintf(stderr, "merging: ");
+ vc4_qpu_disasm(&merge->inst->inst, 1);
+ fprintf(stderr, "\n");
+ fprintf(stderr, "resulting in: ");
+ vc4_qpu_disasm(&inst, 1);
+ fprintf(stderr, "\n");
+ }
+ }
+ }
+
+ if (debug) {
+ fprintf(stderr, "\n");
+ }
+
qpu_serialize_one_inst(c, inst);
update_scoreboard_for_chosen(&scoreboard, inst);
* be scheduled. Update the children's unblocked time for this
* DAG edge as we do so.
*/
- if (chosen) {
- for (int i = chosen->child_count - 1; i >= 0; i--) {
- struct schedule_node *child =
- chosen->children[i];
-
- child->parent_count--;
- if (child->parent_count == 0) {
- insert_at_head(schedule_list,
- &child->link);
- }
- }
- }
+ mark_instruction_scheduled(schedule_list, chosen);
+ mark_instruction_scheduled(schedule_list, merge);
scoreboard.tick++;
}