Instead of adding transitive conflicts as we go, we now add regular
conflicts and them make them all transitive at the end. This reduces
screen creation time substantially on BDW. The time spent in eglInitialize
is reduced from 27.78 ms/call to 9.92 ms/call in debug mode and from 13.15
ms/call to 4.54 ms/call in release mode (about 65% in either case).
Reviewed-by: Eric Anholt <eric@anholt.net>
for (int base_reg = j;
base_reg < j + (class_sizes[i] + 1) / 2;
base_reg++) {
for (int base_reg = j;
base_reg < j + (class_sizes[i] + 1) / 2;
base_reg++) {
- ra_add_transitive_reg_conflict(regs, base_reg, reg);
+ ra_add_reg_conflict(regs, base_reg, reg);
for (int base_reg = j;
base_reg < j + class_sizes[i];
base_reg++) {
for (int base_reg = j;
base_reg < j + class_sizes[i];
base_reg++) {
- ra_add_transitive_reg_conflict(regs, base_reg, reg);
+ ra_add_reg_conflict(regs, base_reg, reg);
}
assert(reg == ra_reg_count);
}
assert(reg == ra_reg_count);
+ /* Applying transitivity to all of the base registers gives us the
+ * appropreate register conflict relationships everywhere.
+ */
+ for (int reg = 0; reg < base_reg_count; reg++)
+ ra_make_reg_conflicts_transitive(regs, reg);
+
/* Add a special class for aligned pairs, which we'll put delta_xy
* in on Gen <= 6 so that we can do PLN.
*/
/* Add a special class for aligned pairs, which we'll put delta_xy
* in on Gen <= 6 so that we can do PLN.
*/
for (int base_reg = j;
base_reg < j + class_sizes[i];
base_reg++) {
for (int base_reg = j;
base_reg < j + class_sizes[i];
base_reg++) {
- ra_add_transitive_reg_conflict(compiler->vec4_reg_set.regs, base_reg, reg);
+ ra_add_reg_conflict(compiler->vec4_reg_set.regs, base_reg, reg);
}
assert(reg == ra_reg_count);
}
assert(reg == ra_reg_count);
+ for (int reg = 0; reg < base_reg_count; reg++)
+ ra_make_reg_conflicts_transitive(compiler->vec4_reg_set.regs, reg);
+
ra_set_finalize(compiler->vec4_reg_set.regs, q_values);
for (int i = 0; i < MAX_VGRF_SIZE; i++)
ra_set_finalize(compiler->vec4_reg_set.regs, q_values);
for (int i = 0; i < MAX_VGRF_SIZE; i++)