freedreno/ir3: move mergedreg state out of reg
authorRob Clark <robdclark@chromium.org>
Sat, 13 Jun 2020 03:42:36 +0000 (20:42 -0700)
committerMarge Bot <eric+marge@anholt.net>
Thu, 18 Jun 2020 02:46:28 +0000 (02:46 +0000)
It is only needed one place, let's move it there.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5458>

src/freedreno/ir3/ir3.c
src/freedreno/ir3/ir3.h
src/freedreno/ir3/ir3_legalize.c

index 463c7664332edd786971b8ac64ef28e5500ce0f7..209dfb36b25dadeec83de1682d3f9d79b215c39b 100644 (file)
@@ -996,8 +996,6 @@ static struct ir3_register * reg_create(struct ir3 *shader,
        reg->wrmask = 1;
        reg->flags = flags;
        reg->num = num;
-       if (shader->compiler->gpu_id >= 600)
-               reg->merged = true;
        return reg;
 }
 
index b191c0af44c0517ef2cd09d33fe45a31731d5b13..d3e4a988dd92d5af342c649baa8a732ac0d77942 100644 (file)
@@ -121,9 +121,7 @@ struct ir3_register {
         * Note the size field isn't important for relative const (since
         * we don't have to do register allocation for constants).
         */
-       unsigned size : 15;
-
-       bool merged : 1;    /* half-regs conflict with full regs (ie >= a6xx) */
+       unsigned size : 16;
 
        /* normal registers:
         * the component is in the low two bits of the reg #, so
@@ -1763,13 +1761,14 @@ INSTR0(META_TEX_PREFETCH);
 typedef BITSET_DECLARE(regmaskstate_t, 2 * MAX_REG);
 
 typedef struct {
+       bool mergedregs;
        regmaskstate_t mask;
 } regmask_t;
 
 static inline bool
 __regmask_get(regmask_t *regmask, struct ir3_register *reg, unsigned n)
 {
-       if (reg->merged) {
+       if (regmask->mergedregs) {
                /* a6xx+ case, with merged register file, we track things in terms
                 * of half-precision registers, with a full precisions register
                 * using two half-precision slots:
@@ -1794,7 +1793,7 @@ __regmask_get(regmask_t *regmask, struct ir3_register *reg, unsigned n)
 static inline void
 __regmask_set(regmask_t *regmask, struct ir3_register *reg, unsigned n)
 {
-       if (reg->merged) {
+       if (regmask->mergedregs) {
                /* a6xx+ case, with merged register file, we track things in terms
                 * of half-precision registers, with a full precisions register
                 * using two half-precision slots:
@@ -1816,9 +1815,10 @@ __regmask_set(regmask_t *regmask, struct ir3_register *reg, unsigned n)
        }
 }
 
-static inline void regmask_init(regmask_t *regmask)
+static inline void regmask_init(regmask_t *regmask, bool mergedregs)
 {
-       memset(regmask, 0, sizeof(*regmask));
+       memset(&regmask->mask, 0, sizeof(regmask->mask));
+       regmask->mergedregs = mergedregs;
 }
 
 static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg)
@@ -1835,6 +1835,9 @@ static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg)
 
 static inline void regmask_or(regmask_t *dst, regmask_t *a, regmask_t *b)
 {
+       assert(dst->mergedregs == a->mergedregs);
+       assert(dst->mergedregs == b->mergedregs);
+
        for (unsigned i = 0; i < ARRAY_SIZE(dst->mask); i++)
                dst->mask[i] = a->mask[i] | b->mask[i];
 }
index 3dc6a6c7371f009c40d67a9aabf42e0d0d07ebdb..9b5dae7b39ec13f531ccffbac76ed64b8a50ba14 100644 (file)
@@ -88,6 +88,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
        struct ir3_legalize_state *state = &bd->state;
        bool last_input_needs_ss = false;
        bool has_tex_prefetch = false;
+       bool mergedregs = ctx->compiler->gpu_id >= 600;
 
        /* our input state is the OR of all predecessor blocks' state: */
        set_foreach(block->predecessors, entry) {
@@ -132,15 +133,15 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
                if (last_n && is_barrier(last_n)) {
                        n->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
                        last_input_needs_ss = false;
-                       regmask_init(&state->needs_ss_war);
-                       regmask_init(&state->needs_ss);
-                       regmask_init(&state->needs_sy);
+                       regmask_init(&state->needs_ss_war, mergedregs);
+                       regmask_init(&state->needs_ss, mergedregs);
+                       regmask_init(&state->needs_sy, mergedregs);
                }
 
                if (last_n && (last_n->opc == OPC_PREDT)) {
                        n->flags |= IR3_INSTR_SS;
-                       regmask_init(&state->needs_ss_war);
-                       regmask_init(&state->needs_ss);
+                       regmask_init(&state->needs_ss_war, mergedregs);
+                       regmask_init(&state->needs_ss, mergedregs);
                }
 
                /* NOTE: consider dst register too.. it could happen that
@@ -161,13 +162,13 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
                                if (regmask_get(&state->needs_ss, reg)) {
                                        n->flags |= IR3_INSTR_SS;
                                        last_input_needs_ss = false;
-                                       regmask_init(&state->needs_ss_war);
-                                       regmask_init(&state->needs_ss);
+                                       regmask_init(&state->needs_ss_war, mergedregs);
+                                       regmask_init(&state->needs_ss, mergedregs);
                                }
 
                                if (regmask_get(&state->needs_sy, reg)) {
                                        n->flags |= IR3_INSTR_SY;
-                                       regmask_init(&state->needs_sy);
+                                       regmask_init(&state->needs_sy, mergedregs);
                                }
                        }
 
@@ -184,8 +185,8 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
                        if (regmask_get(&state->needs_ss_war, reg)) {
                                n->flags |= IR3_INSTR_SS;
                                last_input_needs_ss = false;
-                               regmask_init(&state->needs_ss_war);
-                               regmask_init(&state->needs_ss);
+                               regmask_init(&state->needs_ss_war, mergedregs);
+                               regmask_init(&state->needs_ss, mergedregs);
                        }
 
                        if (last_rel && (reg->num == regid(REG_A0, 0))) {
@@ -710,6 +711,7 @@ bool
 ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary)
 {
        struct ir3_legalize_ctx *ctx = rzalloc(ir, struct ir3_legalize_ctx);
+       bool mergedregs = ctx->compiler->gpu_id >= 600;
        bool progress;
 
        ctx->so = so;
@@ -719,7 +721,14 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary)
 
        /* allocate per-block data: */
        foreach_block (block, &ir->block_list) {
-               block->data = rzalloc(ctx, struct ir3_legalize_block_data);
+               struct ir3_legalize_block_data *bd =
+                               rzalloc(ctx, struct ir3_legalize_block_data);
+
+               regmask_init(&bd->state.needs_ss_war, mergedregs);
+               regmask_init(&bd->state.needs_ss, mergedregs);
+               regmask_init(&bd->state.needs_sy, mergedregs);
+
+               block->data = bd;
        }
 
        ir3_remove_nops(ir);