From 876550ff97b9c97df02f9bf0e29198be963d8e89 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 12 Jan 2015 23:32:25 -0500 Subject: [PATCH] freedreno/ir3: handle "holes" in inputs If, for example, only the x/y/w components of in.xyzw are actually used, we still need to have a group of four registers and assign all four components. The hardware can't write in.xy and in.w to discontiguous registers. To handle this, pad with a dummy NOP instruction, to keep the neighbor chain contiguous. This fixes a problem noticed with firefox OMTC. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/ir3/ir3_group.c | 32 ++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_group.c b/src/gallium/drivers/freedreno/ir3/ir3_group.c index f215c1c15d2..da2142e69a8 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_group.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_group.c @@ -187,6 +187,36 @@ static void instr_find_neighbors(struct ir3_instruction *instr) } } +/* a bit of sadness.. we can't have "holes" in inputs from PoV of + * register assignment, they still need to be grouped together. So + * we need to insert dummy/padding instruction for grouping, and + * then take it back out again before anyone notices. + */ +static void pad_and_group_input(struct ir3_instruction **input, unsigned n) +{ + int i, mask = 0; + struct ir3_block *block = NULL; + + for (i = n - 1; i >= 0; i--) { + struct ir3_instruction *instr = input[i]; + if (instr) { + block = instr->block; + } else if (block) { + instr = ir3_instr_create(block, 0, OPC_NOP); + ir3_reg_create(instr, 0, IR3_REG_SSA); /* dst */ + input[i] = instr; + mask |= (1 << i); + } + } + + group_n(&arr_ops_in, input, n); + + for (i = 0; i < n; i++) { + if (mask & (1 << i)) + input[i] = NULL; + } +} + static void block_find_neighbors(struct ir3_block *block) { unsigned i; @@ -214,7 +244,7 @@ static void block_find_neighbors(struct ir3_block *block) * on vec4 boundaries */ for (i = 0; i < block->ninputs; i += 4) - group_n(&arr_ops_in, &block->inputs[i], 4); + pad_and_group_input(&block->inputs[i], 4); for (i = 0; i < block->noutputs; i += 4) group_n(&arr_ops_out, &block->outputs[i], 4); -- 2.30.2