#include "brw_fs.h"
#include "brw_fs_live_variables.h"
+static bool
+is_nop_mov(const fs_inst *inst)
+{
+ if (inst->opcode == BRW_OPCODE_MOV) {
+ return inst->dst.equals(inst->src[0]);
+ }
+
+ return false;
+}
+
+static bool
+is_coalesce_candidate(const fs_inst *inst, const int *virtual_grf_sizes)
+{
+ if (inst->opcode != BRW_OPCODE_MOV ||
+ inst->is_partial_write() ||
+ inst->saturate ||
+ inst->src[0].file != GRF ||
+ inst->src[0].negate ||
+ inst->src[0].abs ||
+ !inst->src[0].is_contiguous() ||
+ inst->dst.file != GRF ||
+ inst->dst.type != inst->src[0].type) {
+ return false;
+ }
+
+ if (virtual_grf_sizes[inst->src[0].reg] >
+ virtual_grf_sizes[inst->dst.reg])
+ return false;
+
+ return true;
+}
+
+static bool
+can_coalesce_vars(brw::fs_live_variables *live_intervals,
+ const exec_list *instructions, const fs_inst *inst, int ip,
+ int var_to, int var_from)
+{
+ if (!live_intervals->vars_interfere(var_from, var_to))
+ return true;
+
+ assert(ip >= live_intervals->start[var_to]);
+
+ fs_inst *scan_inst;
+ for (scan_inst = (fs_inst *)inst->next;
+ !scan_inst->is_tail_sentinel() && ip <= live_intervals->end[var_to];
+ scan_inst = (fs_inst *)scan_inst->next, ip++) {
+ if (scan_inst->opcode == BRW_OPCODE_WHILE)
+ return false;
+
+ if (scan_inst->dst.equals(inst->dst) ||
+ scan_inst->dst.equals(inst->src[0]))
+ return false;
+ }
+
+ return true;
+}
+
bool
fs_visitor::register_coalesce()
{
int reg_from = -1, reg_to = -1;
int reg_to_offset[MAX_SAMPLER_MESSAGE_SIZE];
fs_inst *mov[MAX_SAMPLER_MESSAGE_SIZE];
+ int var_to[MAX_SAMPLER_MESSAGE_SIZE];
+ int var_from[MAX_SAMPLER_MESSAGE_SIZE];
+ int ip = -1;
foreach_list(node, &this->instructions) {
fs_inst *inst = (fs_inst *)node;
+ ip++;
- if (inst->opcode != BRW_OPCODE_MOV ||
- inst->is_partial_write() ||
- inst->saturate ||
- inst->src[0].file != GRF ||
- inst->src[0].negate ||
- inst->src[0].abs ||
- !inst->src[0].is_contiguous() ||
- inst->dst.file != GRF ||
- inst->dst.type != inst->src[0].type) {
- continue;
- }
-
- if (virtual_grf_sizes[inst->src[0].reg] >
- virtual_grf_sizes[inst->dst.reg])
+ if (!is_coalesce_candidate(inst, virtual_grf_sizes))
continue;
- int var_from = live_intervals->var_from_reg(&inst->src[0]);
- int var_to = live_intervals->var_from_reg(&inst->dst);
-
- if (live_intervals->vars_interfere(var_from, var_to) &&
- !inst->dst.equals(inst->src[0])) {
-
- /* We know that the live ranges of A (var_from) and B (var_to)
- * interfere because of the ->vars_interfere() call above. If the end
- * of B's live range is after the end of A's range, then we know two
- * things:
- * - the start of B's live range must be in A's live range (since we
- * already know the two ranges interfere, this is the only remaining
- * possibility)
- * - the interference isn't of the form we're looking for (where B is
- * entirely inside A)
- */
- if (live_intervals->end[var_to] > live_intervals->end[var_from])
- continue;
-
- bool overwritten = false;
- int scan_ip = -1;
-
- foreach_list(n, &this->instructions) {
- fs_inst *scan_inst = (fs_inst *)n;
- scan_ip++;
-
- if (scan_inst->is_control_flow()) {
- overwritten = true;
- break;
- }
-
- if (scan_ip <= live_intervals->start[var_to])
- continue;
-
- if (scan_ip > live_intervals->end[var_to])
- break;
-
- if (scan_inst->dst.equals(inst->dst) ||
- scan_inst->dst.equals(inst->src[0])) {
- overwritten = true;
- break;
- }
- }
-
- if (overwritten)
- continue;
+ if (is_nop_mov(inst)) {
+ inst->opcode = BRW_OPCODE_NOP;
+ progress = true;
+ continue;
}
if (reg_from != inst->src[0].reg) {
if (channels_remaining)
continue;
- bool removed = false;
+ bool can_coalesce = true;
for (int i = 0; i < src_size; i++) {
- if (mov[i]) {
- removed = true;
+ var_to[i] = live_intervals->var_from_vgrf[reg_to] + reg_to_offset[i];
+ var_from[i] = live_intervals->var_from_vgrf[reg_from] + i;
+
+ if (!can_coalesce_vars(live_intervals, &instructions, inst, ip,
+ var_to[i], var_from[i])) {
+ can_coalesce = false;
+ reg_from = -1;
+ break;
+ }
+ }
+ if (!can_coalesce)
+ continue;
+
+ progress = true;
+
+ for (int i = 0; i < src_size; i++) {
+ if (mov[i]) {
mov[i]->opcode = BRW_OPCODE_NOP;
mov[i]->conditional_mod = BRW_CONDITIONAL_NONE;
mov[i]->dst = reg_undef;
}
}
- if (removed) {
- live_intervals->start[var_to] = MIN2(live_intervals->start[var_to],
- live_intervals->start[var_from]);
- live_intervals->end[var_to] = MAX2(live_intervals->end[var_to],
- live_intervals->end[var_from]);
- reg_from = -1;
+ for (int i = 0; i < src_size; i++) {
+ live_intervals->start[var_to[i]] =
+ MIN2(live_intervals->start[var_to[i]],
+ live_intervals->start[var_from[i]]);
+ live_intervals->end[var_to[i]] =
+ MAX2(live_intervals->end[var_to[i]],
+ live_intervals->end[var_from[i]]);
}
+ reg_from = -1;
}
- foreach_list_safe(node, &this->instructions) {
- fs_inst *inst = (fs_inst *)node;
+ if (progress) {
+ foreach_list_safe(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
- if (inst->opcode == BRW_OPCODE_NOP) {
- inst->remove();
- progress = true;
+ if (inst->opcode == BRW_OPCODE_NOP) {
+ inst->remove();
+ }
}
- }
- if (progress)
invalidate_live_intervals();
+ }
return progress;
}