From: Alyssa Rosenzweig Date: Mon, 11 May 2020 19:03:58 +0000 (-0400) Subject: pan/lcra: Allow per-variable bounds to be set X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=51582e54541a35b4eddd7dab98d8f676bcc46c53;p=mesa.git pan/lcra: Allow per-variable bounds to be set Different variables need to respect different bounds. In general, 16-bytes is okay, but for 4-channel 16-bit vectors, we can't cross 8 byte boundaries (else the swizzles will not be packable after), so we update LCRA to allow this more general form. Signed-off-by: Alyssa Rosenzweig Part-of: --- diff --git a/src/panfrost/bifrost/bi_ra.c b/src/panfrost/bifrost/bi_ra.c index b305afdf574..1936958c28f 100644 --- a/src/panfrost/bifrost/bi_ra.c +++ b/src/panfrost/bifrost/bi_ra.c @@ -67,7 +67,7 @@ bi_allocate_registers(bi_context *ctx, bool *success) unsigned node_count = bi_max_temp(ctx); struct lcra_state *l = - lcra_alloc_equations(node_count, 16, 1); + lcra_alloc_equations(node_count, 1); l->class_start[BI_REG_CLASS_WORK] = 0; l->class_size[BI_REG_CLASS_WORK] = 64 * 4; /* R0 - R63, all 32-bit */ @@ -79,7 +79,7 @@ bi_allocate_registers(bi_context *ctx, bool *success) continue; l->class[dest] = BI_REG_CLASS_WORK; - lcra_set_alignment(l, dest, 2); /* 2^2 = 4 */ + lcra_set_alignment(l, dest, 2, 16); /* 2^2 = 4 */ lcra_restrict_range(l, dest, 4); } diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c index b2844ae9f38..52f6f11f0a0 100644 --- a/src/panfrost/midgard/midgard_ra.c +++ b/src/panfrost/midgard/midgard_ra.c @@ -449,7 +449,7 @@ allocate_registers(compiler_context *ctx, bool *spilled) if (!ctx->temp_count) return NULL; - struct lcra_state *l = lcra_alloc_equations(ctx->temp_count, 16, 5); + struct lcra_state *l = lcra_alloc_equations(ctx->temp_count, 5); /* Starts of classes, in bytes */ l->class_start[REG_CLASS_WORK] = 16 * 0; @@ -476,6 +476,7 @@ allocate_registers(compiler_context *ctx, bool *spilled) unsigned *found_class = calloc(sizeof(unsigned), ctx->temp_count); unsigned *min_alignment = calloc(sizeof(unsigned), ctx->temp_count); + unsigned *min_bound = calloc(sizeof(unsigned), ctx->temp_count); mir_foreach_instr_global(ctx, ins) { /* Swizzles of 32-bit sources on 64-bit instructions need to be @@ -535,12 +536,14 @@ allocate_registers(compiler_context *ctx, bool *spilled) } for (unsigned i = 0; i < ctx->temp_count; ++i) { - lcra_set_alignment(l, i, min_alignment[i] ? min_alignment[i] : 2); + lcra_set_alignment(l, i, min_alignment[i] ? min_alignment[i] : 2, + min_bound[i] ? min_bound[i] : 16); lcra_restrict_range(l, i, found_class[i]); } free(found_class); free(min_alignment); + free(min_bound); /* Next, we'll determine semantic class. We default to zero (work). * But, if we're used with a special operation, that will force us to a diff --git a/src/panfrost/util/lcra.c b/src/panfrost/util/lcra.c index 0e30c1d1082..d922972b619 100644 --- a/src/panfrost/util/lcra.c +++ b/src/panfrost/util/lcra.c @@ -41,14 +41,12 @@ struct lcra_state * lcra_alloc_equations( - unsigned node_count, - unsigned bound, unsigned class_count) + unsigned node_count, unsigned class_count) { struct lcra_state *l = calloc(1, sizeof(*l)); l->node_count = node_count; l->class_count = class_count; - l->bound = bound; l->alignment = calloc(sizeof(l->alignment[0]), node_count); l->linear = calloc(sizeof(l->linear[0]), node_count * node_count); @@ -85,9 +83,9 @@ lcra_free(struct lcra_state *l) } void -lcra_set_alignment(struct lcra_state *l, unsigned node, unsigned align_log2) +lcra_set_alignment(struct lcra_state *l, unsigned node, unsigned align_log2, unsigned bound) { - l->alignment[node] = align_log2 + 1; + l->alignment[node] = (align_log2 + 1) | (bound << 16); } void @@ -100,8 +98,12 @@ lcra_set_disjoint_class(struct lcra_state *l, unsigned c1, unsigned c2) void lcra_restrict_range(struct lcra_state *l, unsigned node, unsigned len) { - if (node < l->node_count && l->alignment[node]) - l->modulus[node] = DIV_ROUND_UP(l->bound - len + 1, 1 << (l->alignment[node] - 1)); + if (node < l->node_count && l->alignment[node]) { + unsigned BA = l->alignment[node]; + unsigned alignment = (BA & 0xffff) - 1; + unsigned bound = BA >> 16; + l->modulus[node] = DIV_ROUND_UP(bound - len + 1, 1 << alignment); + } } void @@ -163,9 +165,11 @@ lcra_solve(struct lcra_state *l) unsigned _class = l->class[step]; unsigned class_start = l->class_start[_class]; - unsigned shift = l->alignment[step] - 1; + unsigned BA = l->alignment[step]; + unsigned shift = (BA & 0xffff) - 1; + unsigned bound = BA >> 16; - unsigned P = l->bound >> shift; + unsigned P = bound >> shift; unsigned Q = l->modulus[step]; unsigned r_max = l->class_size[_class]; unsigned k_max = r_max >> shift; diff --git a/src/panfrost/util/lcra.h b/src/panfrost/util/lcra.h index 61908953af9..fd47fdc3543 100644 --- a/src/panfrost/util/lcra.h +++ b/src/panfrost/util/lcra.h @@ -33,12 +33,10 @@ struct lcra_state { unsigned node_count; - /* Word boundary where vectors can't cross */ - unsigned bound; - /* Alignment for node in log2(bytes)+1. Since alignment must be * non-negative power-of-two, the elements are strictly positive - * integers. Zero is the sentinel for a missing node */ + * integers. Zero is the sentinel for a missing node. In upper word, + * bound. */ unsigned *alignment; /* Linear constraints imposed. Nested array sized upfront, organized as @@ -81,8 +79,7 @@ struct lcra_state { struct lcra_state * lcra_alloc_equations( - unsigned node_count, - unsigned bound, unsigned class_count); + unsigned node_count, unsigned class_count); void lcra_free(struct lcra_state *l); @@ -91,7 +88,7 @@ void lcra_set_disjoint_class(struct lcra_state *l, unsigned c1, unsigned c2); void -lcra_set_alignment(struct lcra_state *l, unsigned node, unsigned align_log2); +lcra_set_alignment(struct lcra_state *l, unsigned node, unsigned align_log2, unsigned bound); void lcra_restrict_range(struct lcra_state *l, unsigned node, unsigned len);