* highest register that works.
*/
if (inst->eot) {
- int size = alloc.sizes[inst->src[0].nr];
+ const int vgrf = inst->opcode == SHADER_OPCODE_SEND ?
+ inst->src[2].nr : inst->src[0].nr;
+ int size = alloc.sizes[vgrf];
int reg = compiler->fs_reg_sets[rsi].class_to_ra_reg_range[size] - 1;
/* If something happened to spill, we want to push the EOT send
*/
reg -= BRW_MAX_MRF(devinfo->gen) - first_used_mrf;
- ra_set_node_reg(g, inst->src[0].nr, reg);
+ ra_set_node_reg(g, vgrf, reg);
break;
}
}
* messages adding a node interference to the grf127_send_hack_node.
* This node has a fixed asignment to grf127.
*
- * We don't apply it to SIMD16 because previous code avoids any register
- * overlap between sources and destination.
+ * We don't apply it to SIMD16 instructions because previous code avoids
+ * any register overlap between sources and destination.
*/
ra_set_node_reg(g, grf127_send_hack_node, 127);
- if (dispatch_width == 8) {
- foreach_block_and_inst(block, fs_inst, inst, cfg) {
- if (inst->is_send_from_grf() && inst->dst.file == VGRF)
- ra_add_node_interference(g, inst->dst.nr, grf127_send_hack_node);
- }
+ foreach_block_and_inst(block, fs_inst, inst, cfg) {
+ if (inst->exec_size < 16 && inst->is_send_from_grf() &&
+ inst->dst.file == VGRF)
+ ra_add_node_interference(g, inst->dst.nr, grf127_send_hack_node);
}
if (spilled_any_registers) {
}
}
+ /* From the Skylake PRM Vol. 2a docs for sends:
+ *
+ * "It is required that the second block of GRFs does not overlap with
+ * the first block."
+ *
+ * Normally, this is taken care of by fixup_sends_duplicate_payload() but
+ * in the case where one of the registers is an undefined value, the
+ * register allocator may decide that they don't interfere even though
+ * they're used as sources in the same instruction. We also need to add
+ * interference here.
+ */
+ if (devinfo->gen >= 9) {
+ foreach_block_and_inst(block, fs_inst, inst, cfg) {
+ if (inst->opcode == SHADER_OPCODE_SEND && inst->ex_mlen > 0 &&
+ inst->src[2].file == VGRF &&
+ inst->src[3].file == VGRF &&
+ inst->src[2].nr != inst->src[3].nr) {
+ for (unsigned i = 0; i < inst->mlen; i++) {
+ for (unsigned j = 0; j < inst->ex_mlen; j++) {
+ ra_add_node_interference(g, inst->src[2].nr + i,
+ inst->src[3].nr + j);
+ }
+ }
+ }
+ }
+ }
+
/* Debug of register spilling: Go spill everything. */
if (unlikely(spill_all)) {
int reg = choose_spill_reg(g);
}
for (unsigned i = 0; i < this->alloc.count; i++) {
+ int live_length = virtual_grf_end[i] - virtual_grf_start[i];
+ if (live_length <= 0)
+ continue;
+
+ /* Divide the cost (in number of spills/fills) by the log of the length
+ * of the live range of the register. This will encourage spill logic
+ * to spill long-living things before spilling short-lived things where
+ * spilling is less likely to actually do us any good. We use the log
+ * of the length because it will fall off very quickly and not cause us
+ * to spill medium length registers with more uses.
+ */
+ float adjusted_cost = spill_costs[i] / logf(live_length);
if (!no_spill[i])
- ra_set_node_spill_cost(g, i, spill_costs[i]);
+ ra_set_node_spill_cost(g, i, adjusted_cost);
}
return ra_get_best_spill_node(g);
}
void
-fs_visitor::spill_reg(int spill_reg)
+fs_visitor::spill_reg(unsigned spill_reg)
{
int size = alloc.sizes[spill_reg];
unsigned int spill_offset = last_scratch;