#define ACP_HASH_SIZE 16
+#include "main/bitset.h"
#include "brw_fs.h"
#include "brw_cfg.h"
* it lets us plug those into the local copy propagation on the second
* pass.
*/
- bool *livein;
+ BITSET_WORD *livein;
/**
* Which entries in the fs_copy_prop_dataflow acp table are live at the end
* of this block. This is done in initial setup from the per-block acps
* returned by the first local copy prop pass.
*/
- bool *liveout;
+ BITSET_WORD *liveout;
+
+ /**
+ * Which entries in the fs_copy_prop_dataflow acp table are generated by
+ * instructions in this block which reach the end of the block without
+ * being killed.
+ */
+ BITSET_WORD *copy;
/**
* Which entries in the fs_copy_prop_dataflow acp table are killed over the
* course of this block.
*/
- bool *kill;
+ BITSET_WORD *kill;
};
class fs_copy_prop_dataflow
{
public:
fs_copy_prop_dataflow(void *mem_ctx, cfg_t *cfg,
- exec_list out_acp[][ACP_HASH_SIZE]);
+ exec_list *out_acp[ACP_HASH_SIZE]);
- void setup_kills();
+ void setup_initial_values();
void run();
+ void dump_block_data() const;
+
void *mem_ctx;
cfg_t *cfg;
acp_entry **acp;
int num_acp;
+ int bitset_words;
struct block_data *bd;
};
} /* anonymous namespace */
fs_copy_prop_dataflow::fs_copy_prop_dataflow(void *mem_ctx, cfg_t *cfg,
- exec_list out_acp[][ACP_HASH_SIZE])
+ exec_list *out_acp[ACP_HASH_SIZE])
: mem_ctx(mem_ctx), cfg(cfg)
{
bd = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
acp = rzalloc_array(mem_ctx, struct acp_entry *, num_acp);
+ bitset_words = BITSET_WORDS(num_acp);
+
int next_acp = 0;
for (int b = 0; b < cfg->num_blocks; b++) {
- bd[b].livein = rzalloc_array(bd, bool, num_acp);
- bd[b].liveout = rzalloc_array(bd, bool, num_acp);
- bd[b].kill = rzalloc_array(bd, bool, num_acp);
+ bd[b].livein = rzalloc_array(bd, BITSET_WORD, bitset_words);
+ bd[b].liveout = rzalloc_array(bd, BITSET_WORD, bitset_words);
+ bd[b].copy = rzalloc_array(bd, BITSET_WORD, bitset_words);
+ bd[b].kill = rzalloc_array(bd, BITSET_WORD, bitset_words);
for (int i = 0; i < ACP_HASH_SIZE; i++) {
foreach_list(entry_node, &out_acp[b][i]) {
acp_entry *entry = (acp_entry *)entry_node;
acp[next_acp] = entry;
- bd[b].liveout[next_acp] = true;
+
+ /* opt_copy_propagate_local populates out_acp with copies created
+ * in a block which are still live at the end of the block. This
+ * is exactly what we want in the COPY set.
+ */
+ BITSET_SET(bd[b].copy, next_acp);
+
next_acp++;
}
}
assert(next_acp == num_acp);
- setup_kills();
+ setup_initial_values();
run();
}
/**
- * Walk the set of instructions in the block, marking which entries in the acp
- * are killed by the block.
+ * Set up initial values for each of the data flow sets, prior to running
+ * the fixed-point algorithm.
*/
void
-fs_copy_prop_dataflow::setup_kills()
+fs_copy_prop_dataflow::setup_initial_values()
{
+ /* Initialize the COPY and KILL sets. */
for (int b = 0; b < cfg->num_blocks; b++) {
bblock_t *block = cfg->blocks[b];
if (inst->dst.file != GRF)
continue;
+ /* Mark ACP entries which are killed by this instruction. */
for (int i = 0; i < num_acp; i++) {
if (inst->overwrites_reg(acp[i]->dst) ||
inst->overwrites_reg(acp[i]->src)) {
- bd[b].kill[i] = true;
+ BITSET_SET(bd[b].kill, i);
}
}
}
}
+
+ /* Populate the initial values for the livein and liveout sets. For the
+ * block at the start of the program, livein = 0 and liveout = copy.
+ * For the others, set liveout to 0 (the empty set) and livein to ~0
+ * (the universal set).
+ */
+ for (int b = 0; b < cfg->num_blocks; b++) {
+ bblock_t *block = cfg->blocks[b];
+ if (block->parents.is_empty()) {
+ for (int i = 0; i < bitset_words; i++) {
+ bd[b].livein[i] = 0u;
+ bd[b].liveout[i] = bd[b].copy[i];
+ }
+ } else {
+ for (int i = 0; i < bitset_words; i++) {
+ bd[b].liveout[i] = 0u;
+ bd[b].livein[i] = ~0u;
+ }
+ }
+ }
}
/**
void
fs_copy_prop_dataflow::run()
{
- bool cont = true;
+ bool progress;
- while (cont) {
- cont = false;
+ do {
+ progress = false;
+ /* Update liveout for all blocks. */
for (int b = 0; b < cfg->num_blocks; b++) {
- for (int i = 0; i < num_acp; i++) {
- if (!bd[b].liveout[i]) {
- /* Update liveout */
- if (bd[b].livein[i] && !bd[b].kill[i]) {
- bd[b].liveout[i] = true;
- cont = true;
- }
- }
+ if (cfg->blocks[b]->parents.is_empty())
+ continue;
- if (!bd[b].livein[i]) {
- /* Update livein: if it's live at the end of all parents, it's
- * live at our start.
- */
- bool add = true;
- foreach_list(block_node, &cfg->blocks[b]->parents) {
- bblock_link *link = (bblock_link *)block_node;
- bblock_t *block = link->block;
- if (!bd[block->block_num].liveout[i]) {
- add = false;
- break;
- }
- }
- if (add) {
- bd[b].livein[i] = true;
- cont = true;
- }
+ for (int i = 0; i < bitset_words; i++) {
+ const BITSET_WORD old_liveout = bd[b].liveout[i];
+
+ bd[b].liveout[i] =
+ bd[b].copy[i] | (bd[b].livein[i] & ~bd[b].kill[i]);
+
+ if (old_liveout != bd[b].liveout[i])
+ progress = true;
+ }
+ }
+
+ /* Update livein for all blocks. If a copy is live out of all parent
+ * blocks, it's live coming in to this block.
+ */
+ for (int b = 0; b < cfg->num_blocks; b++) {
+ if (cfg->blocks[b]->parents.is_empty())
+ continue;
+
+ for (int i = 0; i < bitset_words; i++) {
+ const BITSET_WORD old_livein = bd[b].livein[i];
+
+ bd[b].livein[i] = ~0u;
+ foreach_list(block_node, &cfg->blocks[b]->parents) {
+ bblock_link *link = (bblock_link *)block_node;
+ bblock_t *block = link->block;
+ bd[b].livein[i] &= bd[block->block_num].liveout[i];
}
+
+ if (old_livein != bd[b].livein[i])
+ progress = true;
}
}
+ } while (progress);
+}
+
+void
+fs_copy_prop_dataflow::dump_block_data() const
+{
+ for (int b = 0; b < cfg->num_blocks; b++) {
+ bblock_t *block = cfg->blocks[b];
+ printf("Block %d [%d, %d] (parents ", block->block_num,
+ block->start_ip, block->end_ip);
+ foreach_list(block_node, &block->parents) {
+ bblock_t *parent = ((bblock_link *) block_node)->block;
+ printf("%d ", parent->block_num);
+ }
+ printf("):\n");
+ printf(" livein = 0x");
+ for (int i = 0; i < bitset_words; i++)
+ printf("%08x", bd[b].livein[i]);
+ printf(", liveout = 0x");
+ for (int i = 0; i < bitset_words; i++)
+ printf("%08x", bd[b].liveout[i]);
+ printf(",\n copy = 0x");
+ for (int i = 0; i < bitset_words; i++)
+ printf("%08x", bd[b].copy[i]);
+ printf(", kill = 0x");
+ for (int i = 0; i < bitset_words; i++)
+ printf("%08x", bd[b].kill[i]);
+ printf("\n");
}
}
bool has_source_modifiers = entry->src.abs || entry->src.negate;
- if (intel->gen == 6 && inst->is_math() &&
- (has_source_modifiers || entry->src.file == UNIFORM ||
- entry->src.smear != -1))
+ if ((has_source_modifiers || entry->src.file == UNIFORM ||
+ entry->src.smear != -1) && !can_do_source_mods(inst))
+ return false;
+
+ if (has_source_modifiers && entry->dst.type != inst->src[arg].type)
return false;
inst->src[arg].file = entry->src.file;
inst->src[arg].reg = entry->src.reg;
inst->src[arg].reg_offset = entry->src.reg_offset;
- inst->src[arg].smear = entry->src.smear;
+ if (entry->src.smear != -1)
+ inst->src[arg].smear = entry->src.smear;
if (!inst->src[arg].abs) {
inst->src[arg].abs = entry->src.abs;
progress = true;
break;
+ case BRW_OPCODE_MACH:
case BRW_OPCODE_MUL:
case BRW_OPCODE_ADD:
if (i == 1) {
progress = true;
} else if (i == 0 && inst->src[1].file != IMM) {
/* Fit this constant in by commuting the operands.
- * Exception: we can't do this for 32-bit integer MUL
+ * Exception: we can't do this for 32-bit integer MUL/MACH
* because it's asymmetric.
*/
- if (inst->opcode == BRW_OPCODE_MUL &&
+ if ((inst->opcode == BRW_OPCODE_MUL ||
+ inst->opcode == BRW_OPCODE_MACH) &&
(inst->src[1].type == BRW_REGISTER_TYPE_D ||
inst->src[1].type == BRW_REGISTER_TYPE_UD))
break;
}
break;
- case FS_OPCODE_PULL_CONSTANT_LOAD:
+ case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
inst->src[i] = entry->src;
progress = true;
break;
inst->src[0].file == IMM) &&
inst->src[0].type == inst->dst.type &&
!inst->saturate &&
- !inst->predicate &&
- !inst->force_uncompressed &&
- !inst->force_sechalf) {
+ !inst->is_partial_write()) {
acp_entry *entry = ralloc(mem_ctx, acp_entry);
entry->dst = inst->dst;
entry->src = inst->src[0];
bool progress = false;
void *mem_ctx = ralloc_context(this->mem_ctx);
cfg_t cfg(this);
- exec_list out_acp[cfg.num_blocks][ACP_HASH_SIZE];
+ exec_list *out_acp[cfg.num_blocks];
+ for (int i = 0; i < cfg.num_blocks; i++)
+ out_acp[i] = new exec_list [ACP_HASH_SIZE];
/* First, walk through each block doing local copy propagation and getting
* the set of copies available at the end of the block.
exec_list in_acp[ACP_HASH_SIZE];
for (int i = 0; i < dataflow.num_acp; i++) {
- if (dataflow.bd[b].livein[i]) {
+ if (BITSET_TEST(dataflow.bd[b].livein, i)) {
struct acp_entry *entry = dataflow.acp[i];
in_acp[entry->dst.reg % ACP_HASH_SIZE].push_tail(entry);
}
progress = opt_copy_propagate_local(mem_ctx, block, in_acp) || progress;
}
+ for (int i = 0; i < cfg.num_blocks; i++)
+ delete [] out_acp[i];
ralloc_free(mem_ctx);
if (progress)