freedreno/ir3/ra: split-up
authorRob Clark <robdclark@chromium.org>
Sat, 21 Mar 2020 17:33:48 +0000 (10:33 -0700)
committerMarge Bot <eric+marge@anholt.net>
Fri, 27 Mar 2020 22:41:36 +0000 (22:41 +0000)
Split out regset and shared header, since the RA pass is already getting
large-ish.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4272>

src/freedreno/Makefile.sources
src/freedreno/ir3/ir3_ra.c
src/freedreno/ir3/ir3_ra.h [new file with mode: 0644]
src/freedreno/ir3/ir3_ra_regset.c [new file with mode: 0644]
src/freedreno/ir3/meson.build

index fd78c169916eccd71138dbc6450e0b0690c590e0..07a59c97764b6a17bca13c91bdca9ee8bf9de111 100644 (file)
@@ -49,6 +49,8 @@ ir3_SOURCES := \
        ir3/ir3_postsched.c \
        ir3/ir3_print.c \
        ir3/ir3_ra.c \
+       ir3/ir3_ra.h \
+       ir3/ir3_ra_regset.c \
        ir3/ir3_sched.c \
        ir3/ir3_shader.c \
        ir3/ir3_shader.h \
index 215ce251f418400a4fc4c4e3dc65a16f0015b6e6..6398cc381b246840878dfa354f33b9835fa57af2 100644 (file)
@@ -31,6 +31,7 @@
 
 #include "ir3.h"
 #include "ir3_compiler.h"
+#include "ir3_ra.h"
 
 
 #ifdef DEBUG
  * the result.
  */
 
-static const unsigned class_sizes[] = {
-       1, 2, 3, 4,
-       4 + 4, /* txd + 1d/2d */
-       4 + 6, /* txd + 3d */
-};
-#define class_count ARRAY_SIZE(class_sizes)
-
-static const unsigned half_class_sizes[] = {
-       1, 2, 3, 4,
-};
-#define half_class_count  ARRAY_SIZE(half_class_sizes)
-
-/* seems to just be used for compute shaders?  Seems like vec1 and vec3
- * are sufficient (for now?)
- */
-static const unsigned high_class_sizes[] = {
-       1, 3,
-};
-#define high_class_count ARRAY_SIZE(high_class_sizes)
-
-#define total_class_count (class_count + half_class_count + high_class_count)
-
-/* Below a0.x are normal regs.  RA doesn't need to assign a0.x/p0.x. */
-#define NUM_REGS             (4 * 48)  /* r0 to r47 */
-#define NUM_HIGH_REGS        (4 * 8)   /* r48 to r55 */
-#define FIRST_HIGH_REG       (4 * 48)
-/* Number of virtual regs in a given class: */
-#define CLASS_REGS(i)        (NUM_REGS - (class_sizes[i] - 1))
-#define HALF_CLASS_REGS(i)   (NUM_REGS - (half_class_sizes[i] - 1))
-#define HIGH_CLASS_REGS(i)   (NUM_HIGH_REGS - (high_class_sizes[i] - 1))
-
-#define HALF_OFFSET          (class_count)
-#define HIGH_OFFSET          (class_count + half_class_count)
-
-/* register-set, created one time, used for all shaders: */
-struct ir3_ra_reg_set {
-       struct ra_regs *regs;
-       unsigned int classes[class_count];
-       unsigned int half_classes[half_class_count];
-       unsigned int high_classes[high_class_count];
-       /* maps flat virtual register space to base gpr: */
-       uint16_t *ra_reg_to_gpr;
-       /* maps cls,gpr to flat virtual register space: */
-       uint16_t **gpr_to_ra_reg;
-};
-
-static void
-build_q_values(unsigned int **q_values, unsigned off,
-               const unsigned *sizes, unsigned count)
-{
-       for (unsigned i = 0; i < count; i++) {
-               q_values[i + off] = rzalloc_array(q_values, unsigned, total_class_count);
-
-               /* From register_allocate.c:
-                *
-                * q(B,C) (indexed by C, B is this register class) in
-                * Runeson/Nyström paper.  This is "how many registers of B could
-                * the worst choice register from C conflict with".
-                *
-                * If we just let the register allocation algorithm compute these
-                * values, is extremely expensive.  However, since all of our
-                * registers are laid out, we can very easily compute them
-                * ourselves.  View the register from C as fixed starting at GRF n
-                * somewhere in the middle, and the register from B as sliding back
-                * and forth.  Then the first register to conflict from B is the
-                * one starting at n - class_size[B] + 1 and the last register to
-                * conflict will start at n + class_size[B] - 1.  Therefore, the
-                * number of conflicts from B is class_size[B] + class_size[C] - 1.
-                *
-                *   +-+-+-+-+-+-+     +-+-+-+-+-+-+
-                * B | | | | | |n| --> | | | | | | |
-                *   +-+-+-+-+-+-+     +-+-+-+-+-+-+
-                *             +-+-+-+-+-+
-                * C           |n| | | | |
-                *             +-+-+-+-+-+
-                *
-                * (Idea copied from brw_fs_reg_allocate.cpp)
-                */
-               for (unsigned j = 0; j < count; j++)
-                       q_values[i + off][j + off] = sizes[i] + sizes[j] - 1;
-       }
-}
-
-/* One-time setup of RA register-set, which describes all the possible
- * "virtual" registers and their interferences.  Ie. double register
- * occupies (and conflicts with) two single registers, and so forth.
- * Since registers do not need to be aligned to their class size, they
- * can conflict with other registers in the same class too.  Ie:
- *
- *    Single (base) |  Double
- *    --------------+---------------
- *       R0         |  D0
- *       R1         |  D0 D1
- *       R2         |     D1 D2
- *       R3         |        D2
- *           .. and so on..
- *
- * (NOTE the disassembler uses notation like r0.x/y/z/w but those are
- * really just four scalar registers.  Don't let that confuse you.)
- */
-struct ir3_ra_reg_set *
-ir3_ra_alloc_reg_set(struct ir3_compiler *compiler)
-{
-       struct ir3_ra_reg_set *set = rzalloc(compiler, struct ir3_ra_reg_set);
-       unsigned ra_reg_count, reg, first_half_reg, first_high_reg, base;
-       unsigned int **q_values;
-
-       /* calculate # of regs across all classes: */
-       ra_reg_count = 0;
-       for (unsigned i = 0; i < class_count; i++)
-               ra_reg_count += CLASS_REGS(i);
-       for (unsigned i = 0; i < half_class_count; i++)
-               ra_reg_count += HALF_CLASS_REGS(i);
-       for (unsigned i = 0; i < high_class_count; i++)
-               ra_reg_count += HIGH_CLASS_REGS(i);
-
-       /* allocate and populate q_values: */
-       q_values = ralloc_array(set, unsigned *, total_class_count);
-
-       build_q_values(q_values, 0, class_sizes, class_count);
-       build_q_values(q_values, HALF_OFFSET, half_class_sizes, half_class_count);
-       build_q_values(q_values, HIGH_OFFSET, high_class_sizes, high_class_count);
-
-       /* allocate the reg-set.. */
-       set->regs = ra_alloc_reg_set(set, ra_reg_count, true);
-       set->ra_reg_to_gpr = ralloc_array(set, uint16_t, ra_reg_count);
-       set->gpr_to_ra_reg = ralloc_array(set, uint16_t *, total_class_count);
-
-       /* .. and classes */
-       reg = 0;
-       for (unsigned i = 0; i < class_count; i++) {
-               set->classes[i] = ra_alloc_reg_class(set->regs);
-
-               set->gpr_to_ra_reg[i] = ralloc_array(set, uint16_t, CLASS_REGS(i));
-
-               for (unsigned j = 0; j < CLASS_REGS(i); j++) {
-                       ra_class_add_reg(set->regs, set->classes[i], reg);
-
-                       set->ra_reg_to_gpr[reg] = j;
-                       set->gpr_to_ra_reg[i][j] = reg;
-
-                       for (unsigned br = j; br < j + class_sizes[i]; br++)
-                               ra_add_transitive_reg_conflict(set->regs, br, reg);
-
-                       reg++;
-               }
-       }
-
-       first_half_reg = reg;
-       base = HALF_OFFSET;
-
-       for (unsigned i = 0; i < half_class_count; i++) {
-               set->half_classes[i] = ra_alloc_reg_class(set->regs);
-
-               set->gpr_to_ra_reg[base + i] =
-                               ralloc_array(set, uint16_t, HALF_CLASS_REGS(i));
-
-               for (unsigned j = 0; j < HALF_CLASS_REGS(i); j++) {
-                       ra_class_add_reg(set->regs, set->half_classes[i], reg);
-
-                       set->ra_reg_to_gpr[reg] = j;
-                       set->gpr_to_ra_reg[base + i][j] = reg;
-
-                       for (unsigned br = j; br < j + half_class_sizes[i]; br++)
-                               ra_add_transitive_reg_conflict(set->regs, br + first_half_reg, reg);
-
-                       reg++;
-               }
-       }
-
-       first_high_reg = reg;
-       base = HIGH_OFFSET;
-
-       for (unsigned i = 0; i < high_class_count; i++) {
-               set->high_classes[i] = ra_alloc_reg_class(set->regs);
-
-               set->gpr_to_ra_reg[base + i] =
-                               ralloc_array(set, uint16_t, HIGH_CLASS_REGS(i));
-
-               for (unsigned j = 0; j < HIGH_CLASS_REGS(i); j++) {
-                       ra_class_add_reg(set->regs, set->high_classes[i], reg);
-
-                       set->ra_reg_to_gpr[reg] = j;
-                       set->gpr_to_ra_reg[base + i][j] = reg;
-
-                       for (unsigned br = j; br < j + high_class_sizes[i]; br++)
-                               ra_add_transitive_reg_conflict(set->regs, br + first_high_reg, reg);
-
-                       reg++;
-               }
-       }
-
-       /* starting a6xx, half precision regs conflict w/ full precision regs: */
-       if (compiler->gpu_id >= 600) {
-               /* because of transitivity, we can get away with just setting up
-                * conflicts between the first class of full and half regs:
-                */
-               for (unsigned i = 0; i < half_class_count; i++) {
-                       /* NOTE there are fewer half class sizes, but they match the
-                        * first N full class sizes.. but assert in case that ever
-                        * accidentally changes:
-                        */
-                       debug_assert(class_sizes[i] == half_class_sizes[i]);
-                       for (unsigned j = 0; j < CLASS_REGS(i) / 2; j++) {
-                               unsigned freg  = set->gpr_to_ra_reg[i][j];
-                               unsigned hreg0 = set->gpr_to_ra_reg[i + HALF_OFFSET][(j * 2) + 0];
-                               unsigned hreg1 = set->gpr_to_ra_reg[i + HALF_OFFSET][(j * 2) + 1];
-
-                               ra_add_transitive_reg_pair_conflict(set->regs, freg, hreg0, hreg1);
-                       }
-               }
-
-               // TODO also need to update q_values, but for now:
-               ra_set_finalize(set->regs, NULL);
-       } else {
-               ra_set_finalize(set->regs, q_values);
-       }
-
-       ralloc_free(q_values);
-
-       return set;
-}
-
-/* additional block-data (per-block) */
-struct ir3_ra_block_data {
-       BITSET_WORD *def;        /* variables defined before used in block */
-       BITSET_WORD *use;        /* variables used before defined in block */
-       BITSET_WORD *livein;     /* which defs reach entry point of block */
-       BITSET_WORD *liveout;    /* which defs reach exit point of block */
-};
-
-/* additional instruction-data (per-instruction) */
-struct ir3_ra_instr_data {
-       /* cached instruction 'definer' info: */
-       struct ir3_instruction *defn;
-       int off, sz, cls;
-};
-
-/* register-assign context, per-shader */
-struct ir3_ra_ctx {
-       struct ir3_shader_variant *v;
-       struct ir3 *ir;
-
-       struct ir3_ra_reg_set *set;
-       struct ra_graph *g;
-
-       /* Are we in the scalar assignment pass?  In this pass, all larger-
-        * than-vec1 vales have already been assigned and pre-colored, so
-        * we only consider scalar values.
-        */
-       bool scalar_pass;
-
-       unsigned alloc_count;
-       /* one per class, plus one slot for arrays: */
-       unsigned class_alloc_count[total_class_count + 1];
-       unsigned class_base[total_class_count + 1];
-       unsigned instr_cnt;
-       unsigned *def, *use;     /* def/use table */
-       struct ir3_ra_instr_data *instrd;
-
-       /* Mapping vreg name back to instruction, used select reg callback: */
-       struct hash_table *name_to_instr;
-
-       /* Tracking for max half/full register assigned.  We don't need to
-        * track high registers.
-        *
-        * The feedback about registers used in first pass is used to choose
-        * a target register usage to round-robin between in the 2nd pass.
-        */
-       unsigned max_assigned;
-       unsigned max_half_assigned;
-
-       /* Tracking for select_reg callback */
-       unsigned start_search_reg;
-       unsigned max_target;
-};
-
-static int scalar_name(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, unsigned n);
 
 /* does it conflict? */
 static inline bool
@@ -396,40 +119,6 @@ reg_size_for_array(struct ir3_array *arr)
        return arr->length;
 }
 
-static int
-size_to_class(unsigned sz, bool half, bool high)
-{
-       if (high) {
-               for (unsigned i = 0; i < high_class_count; i++)
-                       if (high_class_sizes[i] >= sz)
-                               return i + HIGH_OFFSET;
-       } else if (half) {
-               for (unsigned i = 0; i < half_class_count; i++)
-                       if (half_class_sizes[i] >= sz)
-                               return i + HALF_OFFSET;
-       } else {
-               for (unsigned i = 0; i < class_count; i++)
-                       if (class_sizes[i] >= sz)
-                               return i;
-       }
-       debug_assert(0);
-       return -1;
-}
-
-static bool
-writes_gpr(struct ir3_instruction *instr)
-{
-       if (dest_regs(instr) == 0)
-               return false;
-       /* is dest a normal temp register: */
-       struct ir3_register *reg = instr->regs[0];
-       debug_assert(!(reg->flags & (IR3_REG_CONST | IR3_REG_IMMED)));
-       if ((reg->num == regid(REG_A0, 0)) ||
-                       (reg->num == regid(REG_P0, 0)))
-               return false;
-       return true;
-}
-
 static bool
 instr_before(struct ir3_instruction *a, struct ir3_instruction *b)
 {
@@ -577,7 +266,7 @@ ra_block_find_definers(struct ir3_ra_ctx *ctx, struct ir3_block *block)
                } else {
                        /* and the normal case: */
                        id->defn = get_definer(ctx, instr, &id->sz, &id->off);
-                       id->cls = size_to_class(id->sz, is_half(id->defn), is_high(id->defn));
+                       id->cls = ra_size_to_class(id->sz, is_half(id->defn), is_high(id->defn));
 
                        /* this is a bit of duct-tape.. if we have a scenario like:
                         *
@@ -809,45 +498,6 @@ ra_init(struct ir3_ra_ctx *ctx)
        }
 }
 
-static unsigned
-__ra_name(struct ir3_ra_ctx *ctx, int cls, struct ir3_instruction *defn)
-{
-       unsigned name;
-       debug_assert(cls >= 0);
-       debug_assert(cls < total_class_count);  /* we shouldn't get arrays here.. */
-       name = ctx->class_base[cls] + defn->name;
-       debug_assert(name < ctx->alloc_count);
-       return name;
-}
-
-static int
-ra_name(struct ir3_ra_ctx *ctx, struct ir3_ra_instr_data *id)
-{
-       /* TODO handle name mapping for arrays */
-       return __ra_name(ctx, id->cls, id->defn);
-}
-
-/* Get the scalar name of the n'th component of an instruction dst: */
-static int
-scalar_name(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, unsigned n)
-{
-       if (ctx->scalar_pass) {
-               if (instr->opc == OPC_META_SPLIT) {
-                       debug_assert(n == 0);     /* split results in a scalar */
-                       struct ir3_instruction *src = instr->regs[1]->instr;
-                       return scalar_name(ctx, src, instr->split.off);
-               } else if (instr->opc == OPC_META_COLLECT) {
-                       debug_assert(n < (instr->regs_count + 1));
-                       struct ir3_instruction *src = instr->regs[n + 1]->instr;
-                       return scalar_name(ctx, src, 0);
-               }
-       } else {
-               debug_assert(n == 0);
-       }
-
-       return ra_name(ctx, &ctx->instrd[instr->ip]) + n;
-}
-
 static void
 ra_destroy(struct ir3_ra_ctx *ctx)
 {
diff --git a/src/freedreno/ir3/ir3_ra.h b/src/freedreno/ir3/ir3_ra.h
new file mode 100644 (file)
index 0000000..f9c2155
--- /dev/null
@@ -0,0 +1,187 @@
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef IR3_RA_H_
+#define IR3_RA_H_
+
+//#include "util/u_math.h"
+//#include "util/register_allocate.h"
+//#include "util/ralloc.h"
+#include "util/bitset.h"
+
+//#include "ir3.h"
+//#include "ir3_compiler.h"
+
+
+static const unsigned class_sizes[] = {
+       1, 2, 3, 4,
+       4 + 4, /* txd + 1d/2d */
+       4 + 6, /* txd + 3d */
+};
+#define class_count ARRAY_SIZE(class_sizes)
+
+static const unsigned half_class_sizes[] = {
+       1, 2, 3, 4,
+};
+#define half_class_count  ARRAY_SIZE(half_class_sizes)
+
+/* seems to just be used for compute shaders?  Seems like vec1 and vec3
+ * are sufficient (for now?)
+ */
+static const unsigned high_class_sizes[] = {
+       1, 3,
+};
+#define high_class_count ARRAY_SIZE(high_class_sizes)
+
+#define total_class_count (class_count + half_class_count + high_class_count)
+
+/* Below a0.x are normal regs.  RA doesn't need to assign a0.x/p0.x. */
+#define NUM_REGS             (4 * 48)  /* r0 to r47 */
+#define NUM_HIGH_REGS        (4 * 8)   /* r48 to r55 */
+#define FIRST_HIGH_REG       (4 * 48)
+/* Number of virtual regs in a given class: */
+#define CLASS_REGS(i)        (NUM_REGS - (class_sizes[i] - 1))
+#define HALF_CLASS_REGS(i)   (NUM_REGS - (half_class_sizes[i] - 1))
+#define HIGH_CLASS_REGS(i)   (NUM_HIGH_REGS - (high_class_sizes[i] - 1))
+
+#define HALF_OFFSET          (class_count)
+#define HIGH_OFFSET          (class_count + half_class_count)
+
+/* register-set, created one time, used for all shaders: */
+struct ir3_ra_reg_set {
+       struct ra_regs *regs;
+       unsigned int classes[class_count];
+       unsigned int half_classes[half_class_count];
+       unsigned int high_classes[high_class_count];
+       /* maps flat virtual register space to base gpr: */
+       uint16_t *ra_reg_to_gpr;
+       /* maps cls,gpr to flat virtual register space: */
+       uint16_t **gpr_to_ra_reg;
+};
+
+/* additional block-data (per-block) */
+struct ir3_ra_block_data {
+       BITSET_WORD *def;        /* variables defined before used in block */
+       BITSET_WORD *use;        /* variables used before defined in block */
+       BITSET_WORD *livein;     /* which defs reach entry point of block */
+       BITSET_WORD *liveout;    /* which defs reach exit point of block */
+};
+
+/* additional instruction-data (per-instruction) */
+struct ir3_ra_instr_data {
+       /* cached instruction 'definer' info: */
+       struct ir3_instruction *defn;
+       int off, sz, cls;
+};
+
+/* register-assign context, per-shader */
+struct ir3_ra_ctx {
+       struct ir3_shader_variant *v;
+       struct ir3 *ir;
+
+       struct ir3_ra_reg_set *set;
+       struct ra_graph *g;
+
+       /* Are we in the scalar assignment pass?  In this pass, all larger-
+        * than-vec1 vales have already been assigned and pre-colored, so
+        * we only consider scalar values.
+        */
+       bool scalar_pass;
+
+       unsigned alloc_count;
+       /* one per class, plus one slot for arrays: */
+       unsigned class_alloc_count[total_class_count + 1];
+       unsigned class_base[total_class_count + 1];
+       unsigned instr_cnt;
+       unsigned *def, *use;     /* def/use table */
+       struct ir3_ra_instr_data *instrd;
+
+       /* Mapping vreg name back to instruction, used select reg callback: */
+       struct hash_table *name_to_instr;
+
+       /* Tracking for max half/full register assigned.  We don't need to
+        * track high registers.
+        *
+        * The feedback about registers used in first pass is used to choose
+        * a target register usage to round-robin between in the 2nd pass.
+        */
+       unsigned max_assigned;
+       unsigned max_half_assigned;
+
+       /* Tracking for select_reg callback */
+       unsigned start_search_reg;
+       unsigned max_target;
+};
+
+static inline int
+ra_name(struct ir3_ra_ctx *ctx, struct ir3_ra_instr_data *id)
+{
+       unsigned name;
+       debug_assert(id->cls >= 0);
+       debug_assert(id->cls < total_class_count);  /* we shouldn't get arrays here.. */
+       name = ctx->class_base[id->cls] + id->defn->name;
+       debug_assert(name < ctx->alloc_count);
+       return name;
+}
+
+/* Get the scalar name of the n'th component of an instruction dst: */
+static inline int
+scalar_name(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, unsigned n)
+{
+       if (ctx->scalar_pass) {
+               if (instr->opc == OPC_META_SPLIT) {
+                       debug_assert(n == 0);     /* split results in a scalar */
+                       struct ir3_instruction *src = instr->regs[1]->instr;
+                       return scalar_name(ctx, src, instr->split.off);
+               } else if (instr->opc == OPC_META_COLLECT) {
+                       debug_assert(n < (instr->regs_count + 1));
+                       struct ir3_instruction *src = instr->regs[n + 1]->instr;
+                       return scalar_name(ctx, src, 0);
+               }
+       } else {
+               debug_assert(n == 0);
+       }
+
+       return ra_name(ctx, &ctx->instrd[instr->ip]) + n;
+}
+
+static inline bool
+writes_gpr(struct ir3_instruction *instr)
+{
+       if (dest_regs(instr) == 0)
+               return false;
+       /* is dest a normal temp register: */
+       struct ir3_register *reg = instr->regs[0];
+       debug_assert(!(reg->flags & (IR3_REG_CONST | IR3_REG_IMMED)));
+       if ((reg->num == regid(REG_A0, 0)) ||
+                       (reg->num == regid(REG_P0, 0)))
+               return false;
+       return true;
+}
+
+int ra_size_to_class(unsigned sz, bool half, bool high);
+
+#endif  /* IR3_RA_H_ */
diff --git a/src/freedreno/ir3/ir3_ra_regset.c b/src/freedreno/ir3/ir3_ra_regset.c
new file mode 100644 (file)
index 0000000..d0e77bc
--- /dev/null
@@ -0,0 +1,231 @@
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "util/u_math.h"
+#include "util/register_allocate.h"
+#include "util/ralloc.h"
+#include "util/bitset.h"
+
+#include "ir3.h"
+#include "ir3_compiler.h"
+#include "ir3_ra.h"
+
+static void
+build_q_values(unsigned int **q_values, unsigned off,
+               const unsigned *sizes, unsigned count)
+{
+       for (unsigned i = 0; i < count; i++) {
+               q_values[i + off] = rzalloc_array(q_values, unsigned, total_class_count);
+
+               /* From register_allocate.c:
+                *
+                * q(B,C) (indexed by C, B is this register class) in
+                * Runeson/Nyström paper.  This is "how many registers of B could
+                * the worst choice register from C conflict with".
+                *
+                * If we just let the register allocation algorithm compute these
+                * values, is extremely expensive.  However, since all of our
+                * registers are laid out, we can very easily compute them
+                * ourselves.  View the register from C as fixed starting at GRF n
+                * somewhere in the middle, and the register from B as sliding back
+                * and forth.  Then the first register to conflict from B is the
+                * one starting at n - class_size[B] + 1 and the last register to
+                * conflict will start at n + class_size[B] - 1.  Therefore, the
+                * number of conflicts from B is class_size[B] + class_size[C] - 1.
+                *
+                *   +-+-+-+-+-+-+     +-+-+-+-+-+-+
+                * B | | | | | |n| --> | | | | | | |
+                *   +-+-+-+-+-+-+     +-+-+-+-+-+-+
+                *             +-+-+-+-+-+
+                * C           |n| | | | |
+                *             +-+-+-+-+-+
+                *
+                * (Idea copied from brw_fs_reg_allocate.cpp)
+                */
+               for (unsigned j = 0; j < count; j++)
+                       q_values[i + off][j + off] = sizes[i] + sizes[j] - 1;
+       }
+}
+
+/* One-time setup of RA register-set, which describes all the possible
+ * "virtual" registers and their interferences.  Ie. double register
+ * occupies (and conflicts with) two single registers, and so forth.
+ * Since registers do not need to be aligned to their class size, they
+ * can conflict with other registers in the same class too.  Ie:
+ *
+ *    Single (base) |  Double
+ *    --------------+---------------
+ *       R0         |  D0
+ *       R1         |  D0 D1
+ *       R2         |     D1 D2
+ *       R3         |        D2
+ *           .. and so on..
+ *
+ * (NOTE the disassembler uses notation like r0.x/y/z/w but those are
+ * really just four scalar registers.  Don't let that confuse you.)
+ */
+struct ir3_ra_reg_set *
+ir3_ra_alloc_reg_set(struct ir3_compiler *compiler)
+{
+       struct ir3_ra_reg_set *set = rzalloc(compiler, struct ir3_ra_reg_set);
+       unsigned ra_reg_count, reg, first_half_reg, first_high_reg, base;
+       unsigned int **q_values;
+
+       /* calculate # of regs across all classes: */
+       ra_reg_count = 0;
+       for (unsigned i = 0; i < class_count; i++)
+               ra_reg_count += CLASS_REGS(i);
+       for (unsigned i = 0; i < half_class_count; i++)
+               ra_reg_count += HALF_CLASS_REGS(i);
+       for (unsigned i = 0; i < high_class_count; i++)
+               ra_reg_count += HIGH_CLASS_REGS(i);
+
+       /* allocate and populate q_values: */
+       q_values = ralloc_array(set, unsigned *, total_class_count);
+
+       build_q_values(q_values, 0, class_sizes, class_count);
+       build_q_values(q_values, HALF_OFFSET, half_class_sizes, half_class_count);
+       build_q_values(q_values, HIGH_OFFSET, high_class_sizes, high_class_count);
+
+       /* allocate the reg-set.. */
+       set->regs = ra_alloc_reg_set(set, ra_reg_count, true);
+       set->ra_reg_to_gpr = ralloc_array(set, uint16_t, ra_reg_count);
+       set->gpr_to_ra_reg = ralloc_array(set, uint16_t *, total_class_count);
+
+       /* .. and classes */
+       reg = 0;
+       for (unsigned i = 0; i < class_count; i++) {
+               set->classes[i] = ra_alloc_reg_class(set->regs);
+
+               set->gpr_to_ra_reg[i] = ralloc_array(set, uint16_t, CLASS_REGS(i));
+
+               for (unsigned j = 0; j < CLASS_REGS(i); j++) {
+                       ra_class_add_reg(set->regs, set->classes[i], reg);
+
+                       set->ra_reg_to_gpr[reg] = j;
+                       set->gpr_to_ra_reg[i][j] = reg;
+
+                       for (unsigned br = j; br < j + class_sizes[i]; br++)
+                               ra_add_transitive_reg_conflict(set->regs, br, reg);
+
+                       reg++;
+               }
+       }
+
+       first_half_reg = reg;
+       base = HALF_OFFSET;
+
+       for (unsigned i = 0; i < half_class_count; i++) {
+               set->half_classes[i] = ra_alloc_reg_class(set->regs);
+
+               set->gpr_to_ra_reg[base + i] =
+                               ralloc_array(set, uint16_t, HALF_CLASS_REGS(i));
+
+               for (unsigned j = 0; j < HALF_CLASS_REGS(i); j++) {
+                       ra_class_add_reg(set->regs, set->half_classes[i], reg);
+
+                       set->ra_reg_to_gpr[reg] = j;
+                       set->gpr_to_ra_reg[base + i][j] = reg;
+
+                       for (unsigned br = j; br < j + half_class_sizes[i]; br++)
+                               ra_add_transitive_reg_conflict(set->regs, br + first_half_reg, reg);
+
+                       reg++;
+               }
+       }
+
+       first_high_reg = reg;
+       base = HIGH_OFFSET;
+
+       for (unsigned i = 0; i < high_class_count; i++) {
+               set->high_classes[i] = ra_alloc_reg_class(set->regs);
+
+               set->gpr_to_ra_reg[base + i] =
+                               ralloc_array(set, uint16_t, HIGH_CLASS_REGS(i));
+
+               for (unsigned j = 0; j < HIGH_CLASS_REGS(i); j++) {
+                       ra_class_add_reg(set->regs, set->high_classes[i], reg);
+
+                       set->ra_reg_to_gpr[reg] = j;
+                       set->gpr_to_ra_reg[base + i][j] = reg;
+
+                       for (unsigned br = j; br < j + high_class_sizes[i]; br++)
+                               ra_add_transitive_reg_conflict(set->regs, br + first_high_reg, reg);
+
+                       reg++;
+               }
+       }
+
+       /* starting a6xx, half precision regs conflict w/ full precision regs: */
+       if (compiler->gpu_id >= 600) {
+               /* because of transitivity, we can get away with just setting up
+                * conflicts between the first class of full and half regs:
+                */
+               for (unsigned i = 0; i < half_class_count; i++) {
+                       /* NOTE there are fewer half class sizes, but they match the
+                        * first N full class sizes.. but assert in case that ever
+                        * accidentally changes:
+                        */
+                       debug_assert(class_sizes[i] == half_class_sizes[i]);
+                       for (unsigned j = 0; j < CLASS_REGS(i) / 2; j++) {
+                               unsigned freg  = set->gpr_to_ra_reg[i][j];
+                               unsigned hreg0 = set->gpr_to_ra_reg[i + HALF_OFFSET][(j * 2) + 0];
+                               unsigned hreg1 = set->gpr_to_ra_reg[i + HALF_OFFSET][(j * 2) + 1];
+
+                               ra_add_transitive_reg_pair_conflict(set->regs, freg, hreg0, hreg1);
+                       }
+               }
+
+               // TODO also need to update q_values, but for now:
+               ra_set_finalize(set->regs, NULL);
+       } else {
+               ra_set_finalize(set->regs, q_values);
+       }
+
+       ralloc_free(q_values);
+
+       return set;
+}
+
+int
+ra_size_to_class(unsigned sz, bool half, bool high)
+{
+       if (high) {
+               for (unsigned i = 0; i < high_class_count; i++)
+                       if (high_class_sizes[i] >= sz)
+                               return i + HIGH_OFFSET;
+       } else if (half) {
+               for (unsigned i = 0; i < half_class_count; i++)
+                       if (half_class_sizes[i] >= sz)
+                               return i + HALF_OFFSET;
+       } else {
+               for (unsigned i = 0; i < class_count; i++)
+                       if (class_sizes[i] >= sz)
+                               return i;
+       }
+       debug_assert(0);
+       return -1;
+}
index 1e64fcda1f9e7dc08a780572e534cd06907d1326..2b5656faf808b6c34d16389e8ac7167e333687c9 100644 (file)
@@ -75,6 +75,8 @@ libfreedreno_ir3_files = files(
   'ir3_postsched.c',
   'ir3_print.c',
   'ir3_ra.c',
+  'ir3_ra.h',
+  'ir3_ra_regset.c',
   'ir3_sched.c',
   'ir3_shader.c',
   'ir3_shader.h',