-static const unsigned class_sizes[] = {
- 1, 2, 3, 4,
- 4 + 4, /* txd + 1d/2d */
- 4 + 6, /* txd + 3d */
-};
-#define class_count ARRAY_SIZE(class_sizes)
-
-static const unsigned half_class_sizes[] = {
- 1, 2, 3, 4,
-};
-#define half_class_count ARRAY_SIZE(half_class_sizes)
-
-/* seems to just be used for compute shaders? Seems like vec1 and vec3
- * are sufficient (for now?)
- */
-static const unsigned high_class_sizes[] = {
- 1, 3,
-};
-#define high_class_count ARRAY_SIZE(high_class_sizes)
-
-#define total_class_count (class_count + half_class_count + high_class_count)
-
-/* Below a0.x are normal regs. RA doesn't need to assign a0.x/p0.x. */
-#define NUM_REGS (4 * 48) /* r0 to r47 */
-#define NUM_HIGH_REGS (4 * 8) /* r48 to r55 */
-#define FIRST_HIGH_REG (4 * 48)
-/* Number of virtual regs in a given class: */
-#define CLASS_REGS(i) (NUM_REGS - (class_sizes[i] - 1))
-#define HALF_CLASS_REGS(i) (NUM_REGS - (half_class_sizes[i] - 1))
-#define HIGH_CLASS_REGS(i) (NUM_HIGH_REGS - (high_class_sizes[i] - 1))
-
-#define HALF_OFFSET (class_count)
-#define HIGH_OFFSET (class_count + half_class_count)
-
-/* register-set, created one time, used for all shaders: */
-struct ir3_ra_reg_set {
- struct ra_regs *regs;
- unsigned int classes[class_count];
- unsigned int half_classes[half_class_count];
- unsigned int high_classes[high_class_count];
- /* maps flat virtual register space to base gpr: */
- uint16_t *ra_reg_to_gpr;
- /* maps cls,gpr to flat virtual register space: */
- uint16_t **gpr_to_ra_reg;
-};
-
-static void
-build_q_values(unsigned int **q_values, unsigned off,
- const unsigned *sizes, unsigned count)
-{
- for (unsigned i = 0; i < count; i++) {
- q_values[i + off] = rzalloc_array(q_values, unsigned, total_class_count);
-
- /* From register_allocate.c:
- *
- * q(B,C) (indexed by C, B is this register class) in
- * Runeson/Nyström paper. This is "how many registers of B could
- * the worst choice register from C conflict with".
- *
- * If we just let the register allocation algorithm compute these
- * values, is extremely expensive. However, since all of our
- * registers are laid out, we can very easily compute them
- * ourselves. View the register from C as fixed starting at GRF n
- * somewhere in the middle, and the register from B as sliding back
- * and forth. Then the first register to conflict from B is the
- * one starting at n - class_size[B] + 1 and the last register to
- * conflict will start at n + class_size[B] - 1. Therefore, the
- * number of conflicts from B is class_size[B] + class_size[C] - 1.
- *
- * +-+-+-+-+-+-+ +-+-+-+-+-+-+
- * B | | | | | |n| --> | | | | | | |
- * +-+-+-+-+-+-+ +-+-+-+-+-+-+
- * +-+-+-+-+-+
- * C |n| | | | |
- * +-+-+-+-+-+
- *
- * (Idea copied from brw_fs_reg_allocate.cpp)
- */
- for (unsigned j = 0; j < count; j++)
- q_values[i + off][j + off] = sizes[i] + sizes[j] - 1;
- }
-}
-
-/* One-time setup of RA register-set, which describes all the possible
- * "virtual" registers and their interferences. Ie. double register
- * occupies (and conflicts with) two single registers, and so forth.
- * Since registers do not need to be aligned to their class size, they
- * can conflict with other registers in the same class too. Ie:
- *
- * Single (base) | Double
- * --------------+---------------
- * R0 | D0
- * R1 | D0 D1
- * R2 | D1 D2
- * R3 | D2
- * .. and so on..
- *
- * (NOTE the disassembler uses notation like r0.x/y/z/w but those are
- * really just four scalar registers. Don't let that confuse you.)
- */
-struct ir3_ra_reg_set *
-ir3_ra_alloc_reg_set(struct ir3_compiler *compiler)
-{
- struct ir3_ra_reg_set *set = rzalloc(compiler, struct ir3_ra_reg_set);
- unsigned ra_reg_count, reg, first_half_reg, first_high_reg, base;
- unsigned int **q_values;
-
- /* calculate # of regs across all classes: */
- ra_reg_count = 0;
- for (unsigned i = 0; i < class_count; i++)
- ra_reg_count += CLASS_REGS(i);
- for (unsigned i = 0; i < half_class_count; i++)
- ra_reg_count += HALF_CLASS_REGS(i);
- for (unsigned i = 0; i < high_class_count; i++)
- ra_reg_count += HIGH_CLASS_REGS(i);
-
- /* allocate and populate q_values: */
- q_values = ralloc_array(set, unsigned *, total_class_count);
-
- build_q_values(q_values, 0, class_sizes, class_count);
- build_q_values(q_values, HALF_OFFSET, half_class_sizes, half_class_count);
- build_q_values(q_values, HIGH_OFFSET, high_class_sizes, high_class_count);
-
- /* allocate the reg-set.. */
- set->regs = ra_alloc_reg_set(set, ra_reg_count, true);
- set->ra_reg_to_gpr = ralloc_array(set, uint16_t, ra_reg_count);
- set->gpr_to_ra_reg = ralloc_array(set, uint16_t *, total_class_count);
-
- /* .. and classes */
- reg = 0;
- for (unsigned i = 0; i < class_count; i++) {
- set->classes[i] = ra_alloc_reg_class(set->regs);
-
- set->gpr_to_ra_reg[i] = ralloc_array(set, uint16_t, CLASS_REGS(i));
-
- for (unsigned j = 0; j < CLASS_REGS(i); j++) {
- ra_class_add_reg(set->regs, set->classes[i], reg);
-
- set->ra_reg_to_gpr[reg] = j;
- set->gpr_to_ra_reg[i][j] = reg;
-
- for (unsigned br = j; br < j + class_sizes[i]; br++)
- ra_add_transitive_reg_conflict(set->regs, br, reg);
-
- reg++;
- }
- }
-
- first_half_reg = reg;
- base = HALF_OFFSET;
-
- for (unsigned i = 0; i < half_class_count; i++) {
- set->half_classes[i] = ra_alloc_reg_class(set->regs);
-
- set->gpr_to_ra_reg[base + i] =
- ralloc_array(set, uint16_t, HALF_CLASS_REGS(i));
-
- for (unsigned j = 0; j < HALF_CLASS_REGS(i); j++) {
- ra_class_add_reg(set->regs, set->half_classes[i], reg);
-
- set->ra_reg_to_gpr[reg] = j;
- set->gpr_to_ra_reg[base + i][j] = reg;
-
- for (unsigned br = j; br < j + half_class_sizes[i]; br++)
- ra_add_transitive_reg_conflict(set->regs, br + first_half_reg, reg);
-
- reg++;
- }
- }
-
- first_high_reg = reg;
- base = HIGH_OFFSET;
-
- for (unsigned i = 0; i < high_class_count; i++) {
- set->high_classes[i] = ra_alloc_reg_class(set->regs);
-
- set->gpr_to_ra_reg[base + i] =
- ralloc_array(set, uint16_t, HIGH_CLASS_REGS(i));
-
- for (unsigned j = 0; j < HIGH_CLASS_REGS(i); j++) {
- ra_class_add_reg(set->regs, set->high_classes[i], reg);