fs_reg dst;
fs_reg src;
unsigned global_idx;
- uint8_t size_written;
- uint8_t size_read;
+ unsigned size_written;
+ unsigned size_read;
enum opcode opcode;
bool saturate;
};
{
public:
fs_copy_prop_dataflow(void *mem_ctx, cfg_t *cfg,
- const fs_live_variables *live,
+ const fs_live_variables &live,
exec_list *out_acp[ACP_HASH_SIZE]);
void setup_initial_values();
void *mem_ctx;
cfg_t *cfg;
- const fs_live_variables *live;
+ const fs_live_variables &live;
acp_entry **acp;
int num_acp;
} /* anonymous namespace */
fs_copy_prop_dataflow::fs_copy_prop_dataflow(void *mem_ctx, cfg_t *cfg,
- const fs_live_variables *live,
+ const fs_live_variables &live,
exec_list *out_acp[ACP_HASH_SIZE])
: mem_ctx(mem_ctx), cfg(cfg), live(live)
{
for (int i = 0; i < num_acp; i++) {
BITSET_SET(bd[block->num].undef, i);
for (unsigned off = 0; off < acp[i]->size_written; off += REG_SIZE) {
- if (BITSET_TEST(live->block_data[block->num].defout,
- live->var_from_reg(byte_offset(acp[i]->dst, off))))
+ if (BITSET_TEST(live.block_data[block->num].defout,
+ live.var_from_reg(byte_offset(acp[i]->dst, off))))
BITSET_CLEAR(bd[block->num].undef, i);
}
}
assert(entry->src.file == VGRF || entry->src.file == UNIFORM ||
entry->src.file == ATTR || entry->src.file == FIXED_GRF);
+ /* Avoid propagating a LOAD_PAYLOAD instruction into another if there is a
+ * good chance that we'll be able to eliminate the latter through register
+ * coalescing. If only part of the sources of the second LOAD_PAYLOAD can
+ * be simplified through copy propagation we would be making register
+ * coalescing impossible, ending up with unnecessary copies in the program.
+ * This is also the case for is_multi_copy_payload() copies that can only
+ * be coalesced when the instruction is lowered into a sequence of MOVs.
+ *
+ * Worse -- In cases where the ACP entry was the result of CSE combining
+ * multiple LOAD_PAYLOAD subexpressions, propagating the first LOAD_PAYLOAD
+ * into the second would undo the work of CSE, leading to an infinite
+ * optimization loop. Avoid this by detecting LOAD_PAYLOAD copies from CSE
+ * temporaries which should match is_coalescing_payload().
+ */
if (entry->opcode == SHADER_OPCODE_LOAD_PAYLOAD &&
- inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD)
+ (is_coalescing_payload(alloc, inst) || is_multi_copy_payload(inst)))
return false;
assert(entry->dst.file == VGRF);
if (i == 1) {
inst->src[i] = val;
progress = true;
- } else if (i == 0 && inst->src[1].file != IMM) {
+ } else if (i == 0 && inst->src[1].file != IMM &&
+ (inst->conditional_mod == BRW_CONDITIONAL_NONE ||
+ /* Only GE and L are commutative. */
+ inst->conditional_mod == BRW_CONDITIONAL_GE ||
+ inst->conditional_mod == BRW_CONDITIONAL_L)) {
inst->src[0] = inst->src[1];
inst->src[1] = val;
(inst->src[0].file == FIXED_GRF &&
inst->src[0].is_contiguous())) &&
inst->src[0].type == inst->dst.type &&
- !inst->is_partial_write());
+ !inst->is_partial_write()) ||
+ is_identity_payload(FIXED_GRF, inst);
}
/* Walks a basic block and does copy propagation on it using the acp
* operand of another instruction, add it to the ACP.
*/
if (can_propagate_from(inst)) {
- acp_entry *entry = ralloc(copy_prop_ctx, acp_entry);
+ acp_entry *entry = rzalloc(copy_prop_ctx, acp_entry);
entry->dst = inst->dst;
entry->src = inst->src[0];
entry->size_written = inst->size_written;
- entry->size_read = inst->size_read(0);
+ for (unsigned i = 0; i < inst->sources; i++)
+ entry->size_read += inst->size_read(i);
entry->opcode = inst->opcode;
entry->saturate = inst->saturate;
acp[entry->dst.nr % ACP_HASH_SIZE].push_tail(entry);
for (int i = 0; i < cfg->num_blocks; i++)
out_acp[i] = new exec_list [ACP_HASH_SIZE];
- calculate_live_intervals();
+ const fs_live_variables &live = live_analysis.require();
/* First, walk through each block doing local copy propagation and getting
* the set of copies available at the end of the block.
for (unsigned a = 0; a < ACP_HASH_SIZE; a++) {
foreach_in_list_safe(acp_entry, entry, &out_acp[block->num][a]) {
assert(entry->dst.file == VGRF);
- if (block->start_ip <= virtual_grf_start[entry->dst.nr] &&
- virtual_grf_end[entry->dst.nr] <= block->end_ip)
+ if (block->start_ip <= live.vgrf_start[entry->dst.nr] &&
+ live.vgrf_end[entry->dst.nr] <= block->end_ip)
entry->remove();
}
}
}
/* Do dataflow analysis for those available copies. */
- fs_copy_prop_dataflow dataflow(copy_prop_ctx, cfg, live_intervals, out_acp);
+ fs_copy_prop_dataflow dataflow(copy_prop_ctx, cfg, live, out_acp);
/* Next, re-run local copy propagation, this time with the set of copies
* provided by the dataflow analysis available at the start of a block.
ralloc_free(copy_prop_ctx);
if (progress)
- invalidate_live_intervals();
+ invalidate_analysis(DEPENDENCY_INSTRUCTION_DATA_FLOW |
+ DEPENDENCY_INSTRUCTION_DETAIL);
return progress;
}