* Find/group instruction neighbors:
*/
-/* bleh.. we need to do the same group_n() thing for both inputs/outputs
- * (where we have a simple instr[] array), and fanin nodes (where we have
- * an extra indirection via reg->instr).
- */
-struct group_ops {
- struct ir3_instruction *(*get)(void *arr, int idx);
- void (*insert_mov)(void *arr, int idx, struct ir3_instruction *instr);
-};
-
-static struct ir3_instruction *arr_get(void *arr, int idx)
-{
- return ((struct ir3_instruction **)arr)[idx];
-}
-static void arr_insert_mov_out(void *arr, int idx, struct ir3_instruction *instr)
-{
- ((struct ir3_instruction **)arr)[idx] =
- ir3_MOV(instr->block, instr, TYPE_F32);
-}
-static void arr_insert_mov_in(void *arr, int idx, struct ir3_instruction *instr)
+static void
+insert_mov(struct ir3_instruction *collect, int idx)
{
- /* so, we can't insert a mov in front of a meta:in.. and the downstream
- * instruction already has a pointer to 'instr'. So we cheat a bit and
- * morph the meta:in instruction into a mov and insert a new meta:in
- * in front.
- */
- struct ir3_instruction *in;
-
- debug_assert(instr->regs_count == 1);
-
- in = ir3_instr_create(instr->block, OPC_META_INPUT);
- in->input.sysval = instr->input.sysval;
- __ssa_dst(in);
+ struct ir3_instruction *src = ssa(collect->regs[idx+1]);
+ struct ir3_instruction *mov = ir3_MOV(src->block, src,
+ (collect->regs[idx+1]->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32);
- /* create src reg for meta:in and fixup to now be a mov: */
- __ssa_src(instr, in, 0);
- instr->opc = OPC_MOV;
- instr->cat1.src_type = TYPE_F32;
- instr->cat1.dst_type = TYPE_F32;
+ collect->regs[idx+1]->instr = mov;
- ((struct ir3_instruction **)arr)[idx] = in;
-}
-static struct group_ops arr_ops_out = { arr_get, arr_insert_mov_out };
-static struct group_ops arr_ops_in = { arr_get, arr_insert_mov_in };
-
-static struct ir3_instruction *instr_get(void *arr, int idx)
-{
- return ssa(((struct ir3_instruction *)arr)->regs[idx+1]);
-}
-static void
-instr_insert_mov(void *arr, int idx, struct ir3_instruction *instr)
-{
- ((struct ir3_instruction *)arr)->regs[idx+1]->instr =
- ir3_MOV(instr->block, instr, TYPE_F32);
+ /* if collect and src are in the same block, move the inserted mov
+ * to just before the collect to avoid a use-before-def. Otherwise
+ * it should be safe to leave at the end of the block it is in:
+ */
+ if (src->block == collect->block) {
+ ir3_instr_move_before(mov, collect);
+ }
}
-static struct group_ops instr_ops = { instr_get, instr_insert_mov };
/* verify that cur != instr, but cur is also not in instr's neighbor-list: */
static bool
}
static void
-group_n(struct group_ops *ops, void *arr, unsigned n)
+group_collect(struct ir3_instruction *collect)
{
- unsigned i, j;
+ struct ir3_register **regs = &collect->regs[1];
+ unsigned n = collect->regs_count - 1;
/* first pass, figure out what has conflicts and needs a mov
* inserted. Do this up front, before starting to setup
* a mov.
*/
restart:
- for (i = 0; i < n; i++) {
- struct ir3_instruction *instr = ops->get(arr, i);
+ for (unsigned i = 0; i < n; i++) {
+ struct ir3_instruction *instr = ssa(regs[i]);
if (instr) {
- struct ir3_instruction *left = (i > 0) ? ops->get(arr, i - 1) : NULL;
- struct ir3_instruction *right = (i < (n-1)) ? ops->get(arr, i + 1) : NULL;
+ struct ir3_instruction *left = (i > 0) ? ssa(regs[i - 1]) : NULL;
+ struct ir3_instruction *right = (i < (n-1)) ? ssa(regs[i + 1]) : NULL;
bool conflict;
/* check for left/right neighbor conflicts: */
conflict = true;
/* we also can't have an instr twice in the group: */
- for (j = i + 1; (j < n) && !conflict; j++)
- if (in_neighbor_list(ops->get(arr, j), instr, i))
+ for (unsigned j = i + 1; (j < n) && !conflict; j++)
+ if (in_neighbor_list(ssa(regs[j]), instr, i))
conflict = true;
if (conflict) {
- ops->insert_mov(arr, i, instr);
+ insert_mov(collect, i);
/* inserting the mov may have caused a conflict
* against the previous:
*/
* neighbors. This is guaranteed to succeed, since by definition
* the newly inserted mov's cannot conflict with anything.
*/
- for (i = 0; i < n; i++) {
- struct ir3_instruction *instr = ops->get(arr, i);
+ for (unsigned i = 0; i < n; i++) {
+ struct ir3_instruction *instr = ssa(regs[i]);
if (instr) {
- struct ir3_instruction *left = (i > 0) ? ops->get(arr, i - 1) : NULL;
- struct ir3_instruction *right = (i < (n-1)) ? ops->get(arr, i + 1) : NULL;
+ struct ir3_instruction *left = (i > 0) ? ssa(regs[i - 1]) : NULL;
+ struct ir3_instruction *right = (i < (n-1)) ? ssa(regs[i + 1]) : NULL;
debug_assert(!conflicts(instr->cp.left, left));
if (left) {
}
}
-static void
+static bool
instr_find_neighbors(struct ir3_instruction *instr)
{
- struct ir3_instruction *src;
+ bool progress = false;
if (ir3_instr_check_mark(instr))
- return;
-
- if (instr->opc == OPC_META_COLLECT)
- group_n(&instr_ops, instr, instr->regs_count - 1);
-
- foreach_ssa_src(src, instr)
- instr_find_neighbors(src);
-}
-
-/* a bit of sadness.. we can't have "holes" in inputs from PoV of
- * register assignment, they still need to be grouped together. So
- * we need to insert dummy/padding instruction for grouping, and
- * then take it back out again before anyone notices.
- */
-static void
-pad_and_group_input(struct ir3_instruction **input, unsigned n)
-{
- int i, mask = 0;
- struct ir3_block *block = NULL;
+ return false;
- for (i = n - 1; i >= 0; i--) {
- struct ir3_instruction *instr = input[i];
- if (instr) {
- block = instr->block;
- } else if (block) {
- instr = ir3_NOP(block);
- __ssa_dst(instr); /* dummy dst */
- input[i] = instr;
- mask |= (1 << i);
- }
+ if (instr->opc == OPC_META_COLLECT) {
+ group_collect(instr);
+ progress = true;
}
- group_n(&arr_ops_in, input, n);
+ foreach_ssa_src (src, instr)
+ progress |= instr_find_neighbors(src);
- for (i = 0; i < n; i++) {
- if (mask & (1 << i))
- input[i] = NULL;
- }
+ return progress;
}
-static void
+static bool
find_neighbors(struct ir3 *ir)
{
+ bool progress = false;
unsigned i;
- /* shader inputs/outputs themselves must be contiguous as well:
- *
- * NOTE: group inputs first, since we only insert mov's
- * *before* the conflicted instr (and that would go badly
- * for inputs). By doing inputs first, we should never
- * have a conflict on inputs.. pushing any conflict to
- * resolve to the outputs, for stuff like:
- *
- * MOV OUT[n], IN[m].wzyx
- *
- * NOTE: we assume here inputs/outputs are grouped in vec4.
- * This logic won't quite cut it if we don't align smaller
- * on vec4 boundaries
- */
- for (i = 0; i < ir->ninputs; i += 4)
- pad_and_group_input(&ir->inputs[i], 4);
- for (i = 0; i < ir->noutputs; i += 4)
- group_n(&arr_ops_out, &ir->outputs[i], 4);
+ foreach_output (out, ir)
+ progress |= instr_find_neighbors(out);
- for (i = 0; i < ir->noutputs; i++) {
- if (ir->outputs[i]) {
- struct ir3_instruction *instr = ir->outputs[i];
- instr_find_neighbors(instr);
- }
- }
-
- list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
+ foreach_block (block, &ir->block_list) {
for (i = 0; i < block->keeps_count; i++) {
struct ir3_instruction *instr = block->keeps[i];
- instr_find_neighbors(instr);
+ progress |= instr_find_neighbors(instr);
}
/* We also need to account for if-condition: */
if (block->condition)
- instr_find_neighbors(block->condition);
+ progress |= instr_find_neighbors(block->condition);
}
+
+ return progress;
}
-void
+bool
ir3_group(struct ir3 *ir)
{
ir3_clear_mark(ir);
- find_neighbors(ir);
+ return find_neighbors(ir);
}