X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fprogram%2Fregister_allocate.c;h=7faf67215c84440f2bbf43684185b9f8ae8e1a93;hb=2828680e39e843514a38781878e2a3534f876233;hp=e276b8ac84bfd9dc1a1ed0b2474936f7e6661393;hpb=2e177bc8a5585d4b0234b1b680617bfd2ae6ddf8;p=mesa.git diff --git a/src/mesa/program/register_allocate.c b/src/mesa/program/register_allocate.c index e276b8ac84b..7faf67215c8 100644 --- a/src/mesa/program/register_allocate.c +++ b/src/mesa/program/register_allocate.c @@ -71,8 +71,8 @@ */ #include -#include +#include "util/ralloc.h" #include "main/imports.h" #include "main/macros.h" #include "main/mtypes.h" @@ -82,7 +82,7 @@ #define NO_REG ~0 struct ra_reg { - GLboolean *conflicts; + BITSET_WORD *conflicts; unsigned int *conflict_list; unsigned int conflict_list_size; unsigned int num_conflicts; @@ -99,7 +99,12 @@ struct ra_regs { }; struct ra_class { - GLboolean *regs; + /** + * Bitset indicating which registers belong to this class. + * + * (If bit N is set, then register N belongs to this class.) + */ + BITSET_WORD *regs; /** * p(B) in Runeson/Nyström paper. @@ -139,7 +144,13 @@ struct ra_node { * "remove the edge from the graph" in simplification without * having to actually modify the adjacency_list. */ - GLboolean in_stack; + bool in_stack; + + /** + * The q total, as defined in the Runeson/Nyström paper, for all the + * interfering nodes not in the stack. + */ + unsigned int q_total; /* For an implementation that needs register spilling, this is the * approximate cost of spilling this node. @@ -176,8 +187,9 @@ ra_alloc_reg_set(void *mem_ctx, unsigned int count) regs->regs = rzalloc_array(regs, struct ra_reg, count); for (i = 0; i < count; i++) { - regs->regs[i].conflicts = rzalloc_array(regs->regs, GLboolean, count); - regs->regs[i].conflicts[i] = GL_TRUE; + regs->regs[i].conflicts = rzalloc_array(regs->regs, BITSET_WORD, + BITSET_WORDS(count)); + BITSET_SET(regs->regs[i].conflicts, i); regs->regs[i].conflict_list = ralloc_array(regs->regs, unsigned int, 4); regs->regs[i].conflict_list_size = 4; @@ -215,13 +227,13 @@ ra_add_conflict_list(struct ra_regs *regs, unsigned int r1, unsigned int r2) unsigned int, reg1->conflict_list_size); } reg1->conflict_list[reg1->num_conflicts++] = r2; - reg1->conflicts[r2] = GL_TRUE; + BITSET_SET(reg1->conflicts, r2); } void ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2) { - if (!regs->regs[r1].conflicts[r2]) { + if (!BITSET_TEST(regs->regs[r1].conflicts, r2)) { ra_add_conflict_list(regs, r1, r2); ra_add_conflict_list(regs, r2, r1); } @@ -259,7 +271,7 @@ ra_alloc_reg_class(struct ra_regs *regs) class = rzalloc(regs, struct ra_class); regs->classes[regs->class_count] = class; - class->regs = rzalloc_array(class, GLboolean, regs->count); + class->regs = rzalloc_array(class, BITSET_WORD, BITSET_WORDS(regs->count)); return regs->class_count++; } @@ -269,10 +281,19 @@ ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int r) { struct ra_class *class = regs->classes[c]; - class->regs[r] = GL_TRUE; + BITSET_SET(class->regs, r); class->p++; } +/** + * Returns true if the register belongs to the given class. + */ +static bool +reg_belongs_to_class(unsigned int r, struct ra_class *c) +{ + return BITSET_TEST(c->regs, r); +} + /** * Must be called after all conflicts and register classes have been * set up and before the register set is used for allocation. @@ -309,12 +330,12 @@ ra_set_finalize(struct ra_regs *regs, unsigned int **q_values) int conflicts = 0; int i; - if (!regs->classes[c]->regs[rc]) + if (!reg_belongs_to_class(rc, regs->classes[c])) continue; for (i = 0; i < regs->regs[rc].num_conflicts; i++) { unsigned int rb = regs->regs[rc].conflict_list[i]; - if (regs->classes[b]->regs[rb]) + if (BITSET_TEST(regs->classes[b]->regs, rb)) conflicts++; } max_conflicts = MAX2(max_conflicts, conflicts); @@ -329,6 +350,12 @@ ra_add_node_adjacency(struct ra_graph *g, unsigned int n1, unsigned int n2) { BITSET_SET(g->nodes[n1].adjacency, n2); + if (n1 != n2) { + int n1_class = g->nodes[n1].class; + int n2_class = g->nodes[n2].class; + g->nodes[n1].q_total += g->regs->classes[n1_class]->q[n2_class]; + } + if (g->nodes[n1].adjacency_count >= g->nodes[n1].adjacency_list_size) { g->nodes[n1].adjacency_list_size *= 2; @@ -355,13 +382,14 @@ ra_alloc_interference_graph(struct ra_regs *regs, unsigned int count) g->stack = rzalloc_array(g, unsigned int, count); for (i = 0; i < count; i++) { - int bitset_count = ALIGN(count, BITSET_WORDBITS) / BITSET_WORDBITS; + int bitset_count = BITSET_WORDS(count); g->nodes[i].adjacency = rzalloc_array(g, BITSET_WORD, bitset_count); g->nodes[i].adjacency_list_size = 4; g->nodes[i].adjacency_list = ralloc_array(g, unsigned int, g->nodes[i].adjacency_list_size); g->nodes[i].adjacency_count = 0; + g->nodes[i].q_total = 0; ra_add_node_adjacency(g, i, i); g->nodes[i].reg = NO_REG; @@ -387,22 +415,29 @@ ra_add_node_interference(struct ra_graph *g, } } -static GLboolean pq_test(struct ra_graph *g, unsigned int n) +static bool +pq_test(struct ra_graph *g, unsigned int n) { - unsigned int j; - unsigned int q = 0; int n_class = g->nodes[n].class; - for (j = 0; j < g->nodes[n].adjacency_count; j++) { - unsigned int n2 = g->nodes[n].adjacency_list[j]; + return g->nodes[n].q_total < g->regs->classes[n_class]->p; +} + +static void +decrement_q(struct ra_graph *g, unsigned int n) +{ + unsigned int i; + int n_class = g->nodes[n].class; + + for (i = 0; i < g->nodes[n].adjacency_count; i++) { + unsigned int n2 = g->nodes[n].adjacency_list[i]; unsigned int n2_class = g->nodes[n2].class; if (n != n2 && !g->nodes[n2].in_stack) { - q += g->regs->classes[n_class]->q[n2_class]; + assert(g->nodes[n2].q_total >= g->regs->classes[n2_class]->q[n_class]); + g->nodes[n2].q_total -= g->regs->classes[n2_class]->q[n_class]; } } - - return q < g->regs->classes[n_class]->p; } /** @@ -410,38 +445,50 @@ static GLboolean pq_test(struct ra_graph *g, unsigned int n) * trivially-colorable nodes into a stack of nodes to be colored, * removing them from the graph, and rinsing and repeating. * - * Returns GL_TRUE if all nodes were removed from the graph. GL_FALSE - * means that either spilling will be required, or optimistic coloring - * should be applied. + * If we encounter a case where we can't push any nodes on the stack, then + * we optimistically choose a node and push it on the stack. We heuristically + * push the node with the lowest total q value, since it has the fewest + * neighbors and therefore is most likely to be allocated. */ -GLboolean +static void ra_simplify(struct ra_graph *g) { - GLboolean progress = GL_TRUE; + bool progress = true; int i; while (progress) { - progress = GL_FALSE; + unsigned int best_optimistic_node = ~0; + unsigned int lowest_q_total = ~0; + + progress = false; for (i = g->count - 1; i >= 0; i--) { if (g->nodes[i].in_stack || g->nodes[i].reg != NO_REG) continue; if (pq_test(g, i)) { + decrement_q(g, i); g->stack[g->stack_count] = i; g->stack_count++; - g->nodes[i].in_stack = GL_TRUE; - progress = GL_TRUE; + g->nodes[i].in_stack = true; + progress = true; + } else { + unsigned int new_q_total = g->nodes[i].q_total; + if (new_q_total < lowest_q_total) { + best_optimistic_node = i; + lowest_q_total = new_q_total; + } } } - } - for (i = 0; i < g->count; i++) { - if (!g->nodes[i].in_stack) - return GL_FALSE; + if (!progress && best_optimistic_node != ~0) { + decrement_q(g, best_optimistic_node); + g->stack[g->stack_count] = best_optimistic_node; + g->stack_count++; + g->nodes[best_optimistic_node].in_stack = true; + progress = true; + } } - - return GL_TRUE; } /** @@ -449,9 +496,9 @@ ra_simplify(struct ra_graph *g) * registers as they go. * * If all nodes were trivially colorable, then this must succeed. If - * not (optimistic coloring), then it may return GL_FALSE; + * not (optimistic coloring), then it may return false; */ -GLboolean +static bool ra_select(struct ra_graph *g) { int i; @@ -468,7 +515,7 @@ ra_select(struct ra_graph *g) */ for (ri = 0; ri < g->regs->count; ri++) { r = (start_search_reg + ri) % g->regs->count; - if (!c->regs[r]) + if (!reg_belongs_to_class(r, c)) continue; /* Check if any of our neighbors conflict with this register choice. */ @@ -476,55 +523,36 @@ ra_select(struct ra_graph *g) unsigned int n2 = g->nodes[n].adjacency_list[i]; if (!g->nodes[n2].in_stack && - g->regs->regs[r].conflicts[g->nodes[n2].reg]) { + BITSET_TEST(g->regs->regs[r].conflicts, g->nodes[n2].reg)) { break; } } if (i == g->nodes[n].adjacency_count) break; } + + /* set this to false even if we return here so that + * ra_get_best_spill_node() considers this node later. + */ + g->nodes[n].in_stack = false; + if (ri == g->regs->count) - return GL_FALSE; + return false; g->nodes[n].reg = r; - g->nodes[n].in_stack = GL_FALSE; g->stack_count--; if (g->regs->round_robin) start_search_reg = r + 1; } - return GL_TRUE; + return true; } -/** - * Optimistic register coloring: Just push the remaining nodes - * on the stack. They'll be colored first in ra_select(), and - * if they succeed then the locally-colorable nodes are still - * locally-colorable and the rest of the register allocation - * will succeed. - */ -void -ra_optimistic_color(struct ra_graph *g) +bool +ra_allocate(struct ra_graph *g) { - unsigned int i; - - for (i = 0; i < g->count; i++) { - if (g->nodes[i].in_stack || g->nodes[i].reg != NO_REG) - continue; - - g->stack[g->stack_count] = i; - g->stack_count++; - g->nodes[i].in_stack = GL_TRUE; - } -} - -GLboolean -ra_allocate_no_spills(struct ra_graph *g) -{ - if (!ra_simplify(g)) { - ra_optimistic_color(g); - } + ra_simplify(g); return ra_select(g); } @@ -551,7 +579,7 @@ void ra_set_node_reg(struct ra_graph *g, unsigned int n, unsigned int reg) { g->nodes[n].reg = reg; - g->nodes[n].in_stack = GL_FALSE; + g->nodes[n].in_stack = false; } static float @@ -589,6 +617,11 @@ ra_get_best_spill_node(struct ra_graph *g) float best_benefit = 0.0; unsigned int n; + /* Consider any nodes that we colored successfully or the node we failed to + * color for spilling. When we failed to color a node in ra_select(), we + * only considered these nodes, so spilling any other ones would not result + * in us making progress. + */ for (n = 0; n < g->count; n++) { float cost = g->nodes[n].spill_cost; float benefit; @@ -596,10 +629,6 @@ ra_get_best_spill_node(struct ra_graph *g) if (cost <= 0.0) continue; - /* Only consider registers for spilling if they are still in the - * interference graph (those on the stack have already been proven to be - * allocatable without spilling). - */ if (g->nodes[n].in_stack) continue;