--- /dev/null
+/*
+ * Copyright (C) 2019 Collabora, Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors (Collabora):
+ * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
+ */
+
+#include <stdio.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include "util/macros.h"
+#include "util/u_math.h"
+#include "lcra.h"
+
+/* This module is the reference implementation of "Linearly Constrained
+ * Register Allocation". The paper is available in PDF form
+ * (https://people.collabora.com/~alyssa/LCRA.pdf) as well as Markdown+LaTeX
+ * (https://gitlab.freedesktop.org/alyssa/lcra/blob/master/LCRA.md)
+ */
+
+struct lcra_state *
+lcra_alloc_equations(
+ unsigned node_count,
+ unsigned min_alignment, unsigned max_alignment,
+ unsigned bound, unsigned class_count)
+{
+ struct lcra_state *l = calloc(1, sizeof(*l));
+
+ l->node_count = node_count;
+ l->class_count = class_count;
+ l->bound = bound;
+
+ l->alignment = calloc(sizeof(l->alignment[0]), node_count);
+ l->linear = calloc(sizeof(l->linear[0]), node_count * node_count);
+ l->modulus = calloc(sizeof(l->modulus[0]), node_count);
+ l->class = calloc(sizeof(l->class[0]), node_count);
+ l->class_start = calloc(sizeof(l->class_start[0]), class_count);
+ l->class_disjoint = calloc(sizeof(l->class_disjoint[0]), class_count * class_count);
+ l->class_size = calloc(sizeof(l->class_size[0]), class_count);
+ l->spill_cost = calloc(sizeof(l->spill_cost[0]), node_count);
+ l->solutions = calloc(sizeof(l->solutions[0]), node_count);
+
+ memset(l->solutions, ~0, sizeof(l->solutions[0]) * node_count);
+
+ return l;
+}
+
+void
+lcra_set_alignment(struct lcra_state *l, unsigned node, unsigned align_log2)
+{
+ l->alignment[node] = align_log2 + 1;
+}
+
+void
+lcra_set_disjoint_class(struct lcra_state *l, unsigned c1, unsigned c2)
+{
+ l->class_disjoint[(c1 * l->class_count) + c2] = true;
+ l->class_disjoint[(c2 * l->class_count) + c1] = true;
+}
+
+void
+lcra_restrict_range(struct lcra_state *l, unsigned node, unsigned len)
+{
+ if (l->alignment[node])
+ l->modulus[node] = DIV_ROUND_UP(l->bound - len + 1, 1 << (l->alignment[node] - 1));
+}
+
+void
+lcra_add_node_interference(struct lcra_state *l, unsigned i, unsigned cmask_i, unsigned j, unsigned cmask_j)
+{
+ if (i == j)
+ return;
+
+ if (l->class_disjoint[(l->class[i] * l->class_count) + l->class[j]])
+ return;
+
+ uint32_t constraint_fw = 0;
+ uint32_t constraint_bw = 0;
+
+ for (unsigned D = 0; D < 16; ++D) {
+ if (cmask_i & (cmask_j << D)) {
+ constraint_bw |= (1 << (15 + D));
+ constraint_fw |= (1 << (15 - D));
+ }
+
+ if (cmask_i & (cmask_j >> D)) {
+ constraint_fw |= (1 << (15 + D));
+ constraint_bw |= (1 << (15 - D));
+ }
+ }
+
+ l->linear[j * l->node_count + i] |= constraint_fw;
+ l->linear[i * l->node_count + j] |= constraint_bw;
+}
+
+static bool
+lcra_test_linear(struct lcra_state *l, unsigned *solutions, unsigned i)
+{
+ unsigned *row = &l->linear[i * l->node_count];
+ signed constant = solutions[i];
+
+ for (unsigned j = 0; j < l->node_count; ++j) {
+ if (solutions[j] == ~0) continue;
+
+ signed lhs = solutions[j] - constant;
+
+ if (lhs < -15 || lhs > 15)
+ continue;
+
+ if (row[j] & (1 << (lhs + 15)))
+ return false;
+ }
+
+ return true;
+}
+
+bool
+lcra_solve(struct lcra_state *l)
+{
+ for (unsigned step = 0; step < l->node_count; ++step) {
+ if (l->solutions[step] != ~0) continue;
+ if (l->alignment[step] == 0) continue;
+
+ unsigned _class = l->class[step];
+ unsigned class_start = l->class_start[_class];
+
+ unsigned shift = l->alignment[step] - 1;
+
+ unsigned P = l->bound >> shift;
+ unsigned Q = l->modulus[step];
+ unsigned r_max = l->class_size[_class];
+ unsigned k_max = r_max >> shift;
+ unsigned m_max = k_max / P;
+ bool succ = false;
+
+ for (unsigned m = 0; m < m_max; ++m) {
+ for (unsigned n = 0; n < Q; ++n) {
+ l->solutions[step] = ((m * P + n) << shift) + class_start;
+ succ = lcra_test_linear(l, l->solutions, step);
+
+ if (succ) break;
+ }
+
+ if (succ) break;
+ }
+
+ /* Out of registers - prepare to spill */
+ if (!succ) {
+ l->spill_class = l->class[step];
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/* Register spilling is implemented with a cost-benefit system. Costs are set
+ * by the user. Benefits are calculated from the constraints. */
+
+void
+lcra_set_node_spill_cost(struct lcra_state *l, unsigned node, signed cost)
+{
+ l->spill_cost[node] = cost;
+}
+
+/* Count along the lower triangle */
+
+static unsigned
+lcra_count_constraints(struct lcra_state *l, unsigned i)
+{
+ unsigned count = 0;
+ unsigned *constraints = &l->linear[i * l->node_count];
+
+ for (unsigned j = 0; j < i; ++j)
+ count += util_bitcount(constraints[j]);
+
+ return count;
+}
+
+signed
+lcra_get_best_spill_node(struct lcra_state *l)
+{
+ signed best_benefit = INT_MIN;
+ signed best_node = -1;
+
+ for (unsigned i = 0; i < l->node_count; ++i) {
+ /* Find spillable nodes */
+ if (l->class[i] != l->spill_class) continue;
+ if (l->spill_cost[i] < 0) continue;
+
+ /* Compute the benefit of spilling a node as the number of
+ * constraints on the node plus the number of slots it occupies
+ * minus a multiple of the cost. TODO: Come up with a formula
+ * whose use I can justify beyond "it makes my shaderdb look
+ * happy" */
+
+ signed benefit = lcra_count_constraints(l, i);
+ benefit -= l->spill_cost[i] * 2;
+
+ if (benefit > best_benefit) {
+ best_benefit = benefit;
+ best_node = i;
+ }
+ }
+
+ return best_node;
+}
--- /dev/null
+/*
+ * Copyright (C) 2019 Collabora, Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors (Collabora):
+ * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
+ */
+
+#ifndef __LCRA_H
+#define __LCRA_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+struct lcra_state {
+ unsigned node_count;
+
+ /* Word boundary where vectors can't cross */
+ unsigned bound;
+
+ /* Alignment for node in log2(bytes)+1. Since alignment must be
+ * non-negative power-of-two, the elements are strictly positive
+ * integers. Zero is the sentinel for a missing node */
+ unsigned *alignment;
+
+ /* Linear constraints imposed. Nested array sized upfront, organized as
+ * linear[node_left][node_right]. That is, calculate indices as:
+ *
+ * Each element is itself a bit field denoting whether (c_j - c_i) bias
+ * is present or not, including negative biases.
+ *
+ * Note for Midgard, there are 16 components so the bias is in range
+ * [-15, 15] so encoded by 32-bit field. */
+
+ uint32_t *linear;
+
+ /* Per node max modulus constraints */
+ uint8_t *modulus;
+
+ /* Classes allow nodes to be partitioned with a starting register.
+ * Classes cannot interfere; that is, they are true partitions in the
+ * usual sense of the word. class_count is the number of classes.
+ * class[] is indexed by a node to get the mapped class. class_start is
+ * biased to all solutions in the class. */
+
+ unsigned class_count;
+ unsigned *class;
+ unsigned *class_start;
+ unsigned *class_size;
+ bool *class_disjoint;
+
+ /* Before solving, forced registers; after solving, solutions. */
+ unsigned *solutions;
+
+ /* For register spilling, the costs to spill nodes (as set by the user)
+ * are in spill_cost[], negative if a node is unspillable. Internally,
+ * spill_class specifies which class to spill (whichever class failed
+ * to allocate) */
+
+ signed *spill_cost;
+ unsigned spill_class;
+};
+
+struct lcra_state *
+lcra_alloc_equations(
+ unsigned node_count,
+ unsigned min_alignment, unsigned max_alignment,
+ unsigned bound, unsigned class_count);
+
+
+void
+lcra_set_disjoint_class(struct lcra_state *l, unsigned c1, unsigned c2);
+
+void
+lcra_set_alignment(struct lcra_state *l, unsigned node, unsigned align_log2);
+
+void
+lcra_restrict_range(struct lcra_state *l, unsigned node, unsigned len);
+
+void
+lcra_add_node_interference(struct lcra_state *l, unsigned i, unsigned cmask_i, unsigned j, unsigned cmask_j);
+
+bool
+lcra_solve(struct lcra_state *l);
+
+void
+lcra_set_node_spill_cost(struct lcra_state *l, unsigned node, signed cost);
+
+signed
+lcra_get_best_spill_node(struct lcra_state *l);
+
+#endif