#include "ir3_ra.h"
static void
-build_q_values(unsigned int **q_values, unsigned off,
- const unsigned *sizes, unsigned count)
+setup_conflicts(struct ir3_ra_reg_set *set)
{
- for (unsigned i = 0; i < count; i++) {
- q_values[i + off] = rzalloc_array(q_values, unsigned, total_class_count);
-
- /* From register_allocate.c:
- *
- * q(B,C) (indexed by C, B is this register class) in
- * Runeson/Nyström paper. This is "how many registers of B could
- * the worst choice register from C conflict with".
- *
- * If we just let the register allocation algorithm compute these
- * values, is extremely expensive. However, since all of our
- * registers are laid out, we can very easily compute them
- * ourselves. View the register from C as fixed starting at GRF n
- * somewhere in the middle, and the register from B as sliding back
- * and forth. Then the first register to conflict from B is the
- * one starting at n - class_size[B] + 1 and the last register to
- * conflict will start at n + class_size[B] - 1. Therefore, the
- * number of conflicts from B is class_size[B] + class_size[C] - 1.
- *
- * +-+-+-+-+-+-+ +-+-+-+-+-+-+
- * B | | | | | |n| --> | | | | | | |
- * +-+-+-+-+-+-+ +-+-+-+-+-+-+
- * +-+-+-+-+-+
- * C |n| | | | |
- * +-+-+-+-+-+
- *
- * (Idea copied from brw_fs_reg_allocate.cpp)
- */
- for (unsigned j = 0; j < count; j++)
- q_values[i + off][j + off] = sizes[i] + sizes[j] - 1;
+ unsigned reg;
+
+ reg = 0;
+ for (unsigned i = 0; i < class_count; i++) {
+ for (unsigned j = 0; j < CLASS_REGS(i); j++) {
+ for (unsigned br = j; br < j + class_sizes[i]; br++) {
+ ra_add_transitive_reg_conflict(set->regs, br, reg);
+ }
+
+ reg++;
+ }
+ }
+
+ for (unsigned i = 0; i < half_class_count; i++) {
+ for (unsigned j = 0; j < HALF_CLASS_REGS(i); j++) {
+ for (unsigned br = j; br < j + half_class_sizes[i]; br++) {
+ ra_add_transitive_reg_conflict(set->regs,
+ br + set->first_half_reg, reg);
+ }
+
+ reg++;
+ }
+ }
+
+ for (unsigned i = 0; i < high_class_count; i++) {
+ for (unsigned j = 0; j < HIGH_CLASS_REGS(i); j++) {
+ for (unsigned br = j; br < j + high_class_sizes[i]; br++) {
+ ra_add_transitive_reg_conflict(set->regs,
+ br + set->first_high_reg, reg);
+ }
+
+ reg++;
+ }
+ }
+
+ /*
+ * Setup conflicts with registers over 0x3f for the special vreg
+ * that exists to use as interference for tex-prefetch:
+ */
+
+ for (unsigned i = 0x40; i < CLASS_REGS(0); i++) {
+ ra_add_transitive_reg_conflict(set->regs, i,
+ set->prefetch_exclude_reg);
+ }
+
+ for (unsigned i = 0x40; i < HALF_CLASS_REGS(0); i++) {
+ ra_add_transitive_reg_conflict(set->regs, i + set->first_half_reg,
+ set->prefetch_exclude_reg);
}
}
* really just four scalar registers. Don't let that confuse you.)
*/
struct ir3_ra_reg_set *
-ir3_ra_alloc_reg_set(struct ir3_compiler *compiler)
+ir3_ra_alloc_reg_set(struct ir3_compiler *compiler, bool mergedregs)
{
struct ir3_ra_reg_set *set = rzalloc(compiler, struct ir3_ra_reg_set);
- unsigned ra_reg_count, reg, first_half_reg, first_high_reg, base;
- unsigned int **q_values;
+ unsigned ra_reg_count, reg, base;
/* calculate # of regs across all classes: */
ra_reg_count = 0;
for (unsigned i = 0; i < high_class_count; i++)
ra_reg_count += HIGH_CLASS_REGS(i);
- /* allocate and populate q_values: */
- q_values = ralloc_array(set, unsigned *, total_class_count);
-
- build_q_values(q_values, 0, class_sizes, class_count);
- build_q_values(q_values, HALF_OFFSET, half_class_sizes, half_class_count);
- build_q_values(q_values, HIGH_OFFSET, high_class_sizes, high_class_count);
+ ra_reg_count += 1; /* for tex-prefetch excludes */
/* allocate the reg-set.. */
set->regs = ra_alloc_reg_set(set, ra_reg_count, true);
set->ra_reg_to_gpr[reg] = j;
set->gpr_to_ra_reg[i][j] = reg;
- for (unsigned br = j; br < j + class_sizes[i]; br++)
- ra_add_transitive_reg_conflict(set->regs, br, reg);
-
reg++;
}
}
- first_half_reg = reg;
+ set->first_half_reg = reg;
base = HALF_OFFSET;
for (unsigned i = 0; i < half_class_count; i++) {
set->ra_reg_to_gpr[reg] = j;
set->gpr_to_ra_reg[base + i][j] = reg;
- for (unsigned br = j; br < j + half_class_sizes[i]; br++)
- ra_add_transitive_reg_conflict(set->regs, br + first_half_reg, reg);
-
reg++;
}
}
- first_high_reg = reg;
+ set->first_high_reg = reg;
base = HIGH_OFFSET;
for (unsigned i = 0; i < high_class_count; i++) {
set->ra_reg_to_gpr[reg] = j;
set->gpr_to_ra_reg[base + i][j] = reg;
- for (unsigned br = j; br < j + high_class_sizes[i]; br++)
- ra_add_transitive_reg_conflict(set->regs, br + first_high_reg, reg);
-
reg++;
}
}
- /* starting a6xx, half precision regs conflict w/ full precision regs: */
- if (compiler->gpu_id >= 600) {
- /* because of transitivity, we can get away with just setting up
- * conflicts between the first class of full and half regs:
- */
- for (unsigned i = 0; i < half_class_count; i++) {
- /* NOTE there are fewer half class sizes, but they match the
- * first N full class sizes.. but assert in case that ever
- * accidentally changes:
- */
- debug_assert(class_sizes[i] == half_class_sizes[i]);
- for (unsigned j = 0; j < CLASS_REGS(i) / 2; j++) {
- unsigned freg = set->gpr_to_ra_reg[i][j];
- unsigned hreg0 = set->gpr_to_ra_reg[i + HALF_OFFSET][(j * 2) + 0];
- unsigned hreg1 = set->gpr_to_ra_reg[i + HALF_OFFSET][(j * 2) + 1];
-
- ra_add_transitive_reg_pair_conflict(set->regs, freg, hreg0, hreg1);
- }
+ /*
+ * Setup an additional class, with one vreg, to simply conflict
+ * with registers that are too high to encode tex-prefetch. This
+ * vreg is only used to setup additional conflicts so that RA
+ * knows to allocate prefetch dst regs below the limit:
+ */
+ set->prefetch_exclude_class = ra_alloc_reg_class(set->regs);
+ ra_class_add_reg(set->regs, set->prefetch_exclude_class, reg);
+ set->prefetch_exclude_reg = reg++;
+
+ /*
+ * And finally setup conflicts. Starting a6xx, half precision regs
+ * conflict w/ full precision regs (when using MERGEDREGS):
+ */
+ if (mergedregs) {
+ for (unsigned i = 0; i < CLASS_REGS(0) / 2; i++) {
+ unsigned freg = set->gpr_to_ra_reg[0][i];
+ unsigned hreg0 = set->gpr_to_ra_reg[0 + HALF_OFFSET][(i * 2) + 0];
+ unsigned hreg1 = set->gpr_to_ra_reg[0 + HALF_OFFSET][(i * 2) + 1];
+
+ ra_add_transitive_reg_pair_conflict(set->regs, freg, hreg0, hreg1);
}
-
- // TODO also need to update q_values, but for now:
- ra_set_finalize(set->regs, NULL);
- } else {
- ra_set_finalize(set->regs, q_values);
}
- ralloc_free(q_values);
+ setup_conflicts(set);
+
+ ra_set_finalize(set->regs, NULL);
return set;
}