From: Alyssa Rosenzweig Date: Sat, 19 Oct 2019 23:43:47 +0000 (-0400) Subject: pan/midgard: Implement linearly-constrained register allocation X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=66ad64d73d244501f5c70c742564cb5ab4fb56da;p=mesa.git pan/midgard: Implement linearly-constrained register allocation Signed-off-by: Alyssa Rosenzweig --- diff --git a/src/panfrost/midgard/lcra.c b/src/panfrost/midgard/lcra.c new file mode 100644 index 00000000000..d86c35ffe09 --- /dev/null +++ b/src/panfrost/midgard/lcra.c @@ -0,0 +1,228 @@ +/* + * Copyright (C) 2019 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors (Collabora): + * Alyssa Rosenzweig + */ + +#include +#include +#include +#include +#include +#include "util/macros.h" +#include "util/u_math.h" +#include "lcra.h" + +/* This module is the reference implementation of "Linearly Constrained + * Register Allocation". The paper is available in PDF form + * (https://people.collabora.com/~alyssa/LCRA.pdf) as well as Markdown+LaTeX + * (https://gitlab.freedesktop.org/alyssa/lcra/blob/master/LCRA.md) + */ + +struct lcra_state * +lcra_alloc_equations( + unsigned node_count, + unsigned min_alignment, unsigned max_alignment, + unsigned bound, unsigned class_count) +{ + struct lcra_state *l = calloc(1, sizeof(*l)); + + l->node_count = node_count; + l->class_count = class_count; + l->bound = bound; + + l->alignment = calloc(sizeof(l->alignment[0]), node_count); + l->linear = calloc(sizeof(l->linear[0]), node_count * node_count); + l->modulus = calloc(sizeof(l->modulus[0]), node_count); + l->class = calloc(sizeof(l->class[0]), node_count); + l->class_start = calloc(sizeof(l->class_start[0]), class_count); + l->class_disjoint = calloc(sizeof(l->class_disjoint[0]), class_count * class_count); + l->class_size = calloc(sizeof(l->class_size[0]), class_count); + l->spill_cost = calloc(sizeof(l->spill_cost[0]), node_count); + l->solutions = calloc(sizeof(l->solutions[0]), node_count); + + memset(l->solutions, ~0, sizeof(l->solutions[0]) * node_count); + + return l; +} + +void +lcra_set_alignment(struct lcra_state *l, unsigned node, unsigned align_log2) +{ + l->alignment[node] = align_log2 + 1; +} + +void +lcra_set_disjoint_class(struct lcra_state *l, unsigned c1, unsigned c2) +{ + l->class_disjoint[(c1 * l->class_count) + c2] = true; + l->class_disjoint[(c2 * l->class_count) + c1] = true; +} + +void +lcra_restrict_range(struct lcra_state *l, unsigned node, unsigned len) +{ + if (l->alignment[node]) + l->modulus[node] = DIV_ROUND_UP(l->bound - len + 1, 1 << (l->alignment[node] - 1)); +} + +void +lcra_add_node_interference(struct lcra_state *l, unsigned i, unsigned cmask_i, unsigned j, unsigned cmask_j) +{ + if (i == j) + return; + + if (l->class_disjoint[(l->class[i] * l->class_count) + l->class[j]]) + return; + + uint32_t constraint_fw = 0; + uint32_t constraint_bw = 0; + + for (unsigned D = 0; D < 16; ++D) { + if (cmask_i & (cmask_j << D)) { + constraint_bw |= (1 << (15 + D)); + constraint_fw |= (1 << (15 - D)); + } + + if (cmask_i & (cmask_j >> D)) { + constraint_fw |= (1 << (15 + D)); + constraint_bw |= (1 << (15 - D)); + } + } + + l->linear[j * l->node_count + i] |= constraint_fw; + l->linear[i * l->node_count + j] |= constraint_bw; +} + +static bool +lcra_test_linear(struct lcra_state *l, unsigned *solutions, unsigned i) +{ + unsigned *row = &l->linear[i * l->node_count]; + signed constant = solutions[i]; + + for (unsigned j = 0; j < l->node_count; ++j) { + if (solutions[j] == ~0) continue; + + signed lhs = solutions[j] - constant; + + if (lhs < -15 || lhs > 15) + continue; + + if (row[j] & (1 << (lhs + 15))) + return false; + } + + return true; +} + +bool +lcra_solve(struct lcra_state *l) +{ + for (unsigned step = 0; step < l->node_count; ++step) { + if (l->solutions[step] != ~0) continue; + if (l->alignment[step] == 0) continue; + + unsigned _class = l->class[step]; + unsigned class_start = l->class_start[_class]; + + unsigned shift = l->alignment[step] - 1; + + unsigned P = l->bound >> shift; + unsigned Q = l->modulus[step]; + unsigned r_max = l->class_size[_class]; + unsigned k_max = r_max >> shift; + unsigned m_max = k_max / P; + bool succ = false; + + for (unsigned m = 0; m < m_max; ++m) { + for (unsigned n = 0; n < Q; ++n) { + l->solutions[step] = ((m * P + n) << shift) + class_start; + succ = lcra_test_linear(l, l->solutions, step); + + if (succ) break; + } + + if (succ) break; + } + + /* Out of registers - prepare to spill */ + if (!succ) { + l->spill_class = l->class[step]; + return false; + } + } + + return true; +} + +/* Register spilling is implemented with a cost-benefit system. Costs are set + * by the user. Benefits are calculated from the constraints. */ + +void +lcra_set_node_spill_cost(struct lcra_state *l, unsigned node, signed cost) +{ + l->spill_cost[node] = cost; +} + +/* Count along the lower triangle */ + +static unsigned +lcra_count_constraints(struct lcra_state *l, unsigned i) +{ + unsigned count = 0; + unsigned *constraints = &l->linear[i * l->node_count]; + + for (unsigned j = 0; j < i; ++j) + count += util_bitcount(constraints[j]); + + return count; +} + +signed +lcra_get_best_spill_node(struct lcra_state *l) +{ + signed best_benefit = INT_MIN; + signed best_node = -1; + + for (unsigned i = 0; i < l->node_count; ++i) { + /* Find spillable nodes */ + if (l->class[i] != l->spill_class) continue; + if (l->spill_cost[i] < 0) continue; + + /* Compute the benefit of spilling a node as the number of + * constraints on the node plus the number of slots it occupies + * minus a multiple of the cost. TODO: Come up with a formula + * whose use I can justify beyond "it makes my shaderdb look + * happy" */ + + signed benefit = lcra_count_constraints(l, i); + benefit -= l->spill_cost[i] * 2; + + if (benefit > best_benefit) { + best_benefit = benefit; + best_node = i; + } + } + + return best_node; +} diff --git a/src/panfrost/midgard/lcra.h b/src/panfrost/midgard/lcra.h new file mode 100644 index 00000000000..a9bdd57026c --- /dev/null +++ b/src/panfrost/midgard/lcra.h @@ -0,0 +1,110 @@ +/* + * Copyright (C) 2019 Collabora, Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors (Collabora): + * Alyssa Rosenzweig + */ + +#ifndef __LCRA_H +#define __LCRA_H + +#include +#include + +struct lcra_state { + unsigned node_count; + + /* Word boundary where vectors can't cross */ + unsigned bound; + + /* Alignment for node in log2(bytes)+1. Since alignment must be + * non-negative power-of-two, the elements are strictly positive + * integers. Zero is the sentinel for a missing node */ + unsigned *alignment; + + /* Linear constraints imposed. Nested array sized upfront, organized as + * linear[node_left][node_right]. That is, calculate indices as: + * + * Each element is itself a bit field denoting whether (c_j - c_i) bias + * is present or not, including negative biases. + * + * Note for Midgard, there are 16 components so the bias is in range + * [-15, 15] so encoded by 32-bit field. */ + + uint32_t *linear; + + /* Per node max modulus constraints */ + uint8_t *modulus; + + /* Classes allow nodes to be partitioned with a starting register. + * Classes cannot interfere; that is, they are true partitions in the + * usual sense of the word. class_count is the number of classes. + * class[] is indexed by a node to get the mapped class. class_start is + * biased to all solutions in the class. */ + + unsigned class_count; + unsigned *class; + unsigned *class_start; + unsigned *class_size; + bool *class_disjoint; + + /* Before solving, forced registers; after solving, solutions. */ + unsigned *solutions; + + /* For register spilling, the costs to spill nodes (as set by the user) + * are in spill_cost[], negative if a node is unspillable. Internally, + * spill_class specifies which class to spill (whichever class failed + * to allocate) */ + + signed *spill_cost; + unsigned spill_class; +}; + +struct lcra_state * +lcra_alloc_equations( + unsigned node_count, + unsigned min_alignment, unsigned max_alignment, + unsigned bound, unsigned class_count); + + +void +lcra_set_disjoint_class(struct lcra_state *l, unsigned c1, unsigned c2); + +void +lcra_set_alignment(struct lcra_state *l, unsigned node, unsigned align_log2); + +void +lcra_restrict_range(struct lcra_state *l, unsigned node, unsigned len); + +void +lcra_add_node_interference(struct lcra_state *l, unsigned i, unsigned cmask_i, unsigned j, unsigned cmask_j); + +bool +lcra_solve(struct lcra_state *l); + +void +lcra_set_node_spill_cost(struct lcra_state *l, unsigned node, signed cost); + +signed +lcra_get_best_spill_node(struct lcra_state *l); + +#endif diff --git a/src/panfrost/midgard/meson.build b/src/panfrost/midgard/meson.build index 0d8722d139a..e6fa2019bb0 100644 --- a/src/panfrost/midgard/meson.build +++ b/src/panfrost/midgard/meson.build @@ -37,6 +37,7 @@ libpanfrost_midgard_files = files( 'midgard_opt_float.c', 'midgard_opt_perspective.c', 'disassemble.c', + 'lcra.c' ) midgard_nir_algebraic_c = custom_target(