From c822460f851ae6f3c74a01b9eec9ea924a0de12d Mon Sep 17 00:00:00 2001 From: Hyunjun Ko Date: Mon, 18 Nov 2019 09:54:09 +0000 Subject: [PATCH] freedreno/ir3: handle half registers for arrays during register allocation. So far we only handle full regs of arrays during pre-allocation. This patch is to handle half regs of arrays and also consider the size of half regs when finding out conflicts. Part-of: --- src/freedreno/ir3/ir3.h | 3 +++ src/freedreno/ir3/ir3_context.c | 1 + src/freedreno/ir3/ir3_ra.c | 43 ++++++++++++++++++++++++++------- 3 files changed, 38 insertions(+), 9 deletions(-) diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index fc82932ba22..f927d1ff204 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -500,6 +500,9 @@ struct ir3_array { unsigned base; /* base vreg name */ unsigned reg; /* base physical reg */ uint16_t start_ip, end_ip; + + /* Indicates if half-precision */ + bool half; }; struct ir3_array * ir3_lookup_array(struct ir3 *ir, unsigned id); diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c index b44dc4a67f3..b8ec4a7da44 100644 --- a/src/freedreno/ir3/ir3_context.c +++ b/src/freedreno/ir3/ir3_context.c @@ -525,6 +525,7 @@ ir3_create_array_load(struct ir3_context *ctx, struct ir3_array *arr, int n, mov->cat1.src_type = TYPE_U16; mov->cat1.dst_type = TYPE_U16; flags |= IR3_REG_HALF; + arr->half = true; } else { mov->cat1.src_type = TYPE_U32; mov->cat1.dst_type = TYPE_U32; diff --git a/src/freedreno/ir3/ir3_ra.c b/src/freedreno/ir3/ir3_ra.c index f04db4f29ac..6d32c0d08e0 100644 --- a/src/freedreno/ir3/ir3_ra.c +++ b/src/freedreno/ir3/ir3_ra.c @@ -366,6 +366,15 @@ is_high(struct ir3_instruction *instr) return !!(instr->regs[0]->flags & IR3_REG_HIGH); } +static unsigned +reg_size_for_array(struct ir3_array *arr) +{ + if (arr->half) + return DIV_ROUND_UP(arr->length, 2); + + return arr->length; +} + static int size_to_class(unsigned sz, bool half, bool high) { @@ -665,8 +674,8 @@ ra_init(struct ir3_ra_ctx *ctx) base = ctx->class_base[total_class_count]; foreach_array (arr, &ctx->ir->array_list) { arr->base = base; - ctx->class_alloc_count[total_class_count] += arr->length; - base += arr->length; + ctx->class_alloc_count[total_class_count] += reg_size_for_array(arr); + base += reg_size_for_array(arr); } ctx->alloc_count += ctx->class_alloc_count[total_class_count]; @@ -791,7 +800,10 @@ ra_block_compute_live_ranges(struct ir3_ra_ctx *ctx, struct ir3_block *block) */ for (i = 0; i < arr->length; i++) { unsigned name = arr->base + i; - ra_set_node_class(ctx->g, name, ctx->set->classes[0]); + if(arr->half) + ra_set_node_class(ctx->g, name, ctx->set->half_classes[0]); + else + ra_set_node_class(ctx->g, name, ctx->set->classes[0]); } /* indirect write is treated like a write to all array @@ -1333,9 +1345,9 @@ retry: /* if it intersects with liverange AND register range.. */ if (intersects(arr->start_ip, arr->end_ip, arr2->start_ip, arr2->end_ip) && - intersects(base, base + arr->length, - arr2->reg, arr2->reg + arr2->length)) { - base = MAX2(base, arr2->reg + arr2->length); + intersects(base, base + reg_size_for_array(arr), + arr2->reg, arr2->reg + reg_size_for_array(arr2))) { + base = MAX2(base, arr2->reg + reg_size_for_array(arr2)); goto retry; } } @@ -1361,7 +1373,7 @@ retry: */ if (intersects(arr->start_ip, arr->end_ip, ctx->def[name], ctx->use[name]) && - intersects(base, base + arr->length, + intersects(base, base + reg_size_for_array(arr), regid, regid + class_sizes[id->cls])) { base = MAX2(base, regid + class_sizes[id->cls]); goto retry; @@ -1373,9 +1385,22 @@ retry: for (unsigned i = 0; i < arr->length; i++) { unsigned name, reg; - name = arr->base + i; - reg = ctx->set->gpr_to_ra_reg[0][base++]; + if (arr->half) { + /* Doesn't need to do this on older generations than a6xx, + * since there's no conflict between full regs and half regs + * on them. + * + * TODO Presumably "base" could start from 0 respectively + * for half regs of arrays on older generations. + */ + unsigned base_half = base * 2 + i; + reg = ctx->set->gpr_to_ra_reg[0+HALF_OFFSET][base_half]; + base = base_half / 2 + 1; + } else { + reg = ctx->set->gpr_to_ra_reg[0][base++]; + } + name = arr->base + i; ra_set_node_reg(ctx->g, name, reg); } } -- 2.30.2