Merge remote branch 'origin/master' into pipe-video
[mesa.git] / src / mesa / program / register_allocate.c
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
28 /** @file register_allocate.c
29 *
30 * Graph-coloring register allocator.
31 */
32
33 #include <ralloc.h>
34
35 #include "main/imports.h"
36 #include "main/macros.h"
37 #include "main/mtypes.h"
38 #include "register_allocate.h"
39
40 struct ra_reg {
41 GLboolean *conflicts;
42 unsigned int *conflict_list;
43 unsigned int conflict_list_size;
44 unsigned int num_conflicts;
45 };
46
47 struct ra_regs {
48 struct ra_reg *regs;
49 unsigned int count;
50
51 struct ra_class **classes;
52 unsigned int class_count;
53 };
54
55 struct ra_class {
56 GLboolean *regs;
57
58 /**
59 * p_B in Runeson/Nyström paper.
60 *
61 * This is "how many regs are in the set."
62 */
63 unsigned int p;
64
65 /**
66 * q_B,C in Runeson/Nyström paper.
67 */
68 unsigned int *q;
69 };
70
71 struct ra_node {
72 GLboolean *adjacency;
73 unsigned int *adjacency_list;
74 unsigned int class;
75 unsigned int adjacency_count;
76 unsigned int reg;
77 GLboolean in_stack;
78 float spill_cost;
79 };
80
81 struct ra_graph {
82 struct ra_regs *regs;
83 /**
84 * the variables that need register allocation.
85 */
86 struct ra_node *nodes;
87 unsigned int count; /**< count of nodes. */
88
89 unsigned int *stack;
90 unsigned int stack_count;
91 };
92
93 struct ra_regs *
94 ra_alloc_reg_set(unsigned int count)
95 {
96 unsigned int i;
97 struct ra_regs *regs;
98
99 regs = rzalloc(NULL, struct ra_regs);
100 regs->count = count;
101 regs->regs = rzalloc_array(regs, struct ra_reg, count);
102
103 for (i = 0; i < count; i++) {
104 regs->regs[i].conflicts = rzalloc_array(regs->regs, GLboolean, count);
105 regs->regs[i].conflicts[i] = GL_TRUE;
106
107 regs->regs[i].conflict_list = ralloc_array(regs->regs, unsigned int, 4);
108 regs->regs[i].conflict_list_size = 4;
109 regs->regs[i].conflict_list[0] = i;
110 regs->regs[i].num_conflicts = 1;
111 }
112
113 return regs;
114 }
115
116 static void
117 ra_add_conflict_list(struct ra_regs *regs, unsigned int r1, unsigned int r2)
118 {
119 struct ra_reg *reg1 = &regs->regs[r1];
120
121 if (reg1->conflict_list_size == reg1->num_conflicts) {
122 reg1->conflict_list_size *= 2;
123 reg1->conflict_list = reralloc(regs->regs, reg1->conflict_list,
124 unsigned int, reg1->conflict_list_size);
125 }
126 reg1->conflict_list[reg1->num_conflicts++] = r2;
127 reg1->conflicts[r2] = GL_TRUE;
128 }
129
130 void
131 ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2)
132 {
133 if (!regs->regs[r1].conflicts[r2]) {
134 ra_add_conflict_list(regs, r1, r2);
135 ra_add_conflict_list(regs, r2, r1);
136 }
137 }
138
139 unsigned int
140 ra_alloc_reg_class(struct ra_regs *regs)
141 {
142 struct ra_class *class;
143
144 regs->classes = reralloc(regs->regs, regs->classes, struct ra_class *,
145 regs->class_count + 1);
146
147 class = rzalloc(regs, struct ra_class);
148 regs->classes[regs->class_count] = class;
149
150 class->regs = rzalloc_array(class, GLboolean, regs->count);
151
152 return regs->class_count++;
153 }
154
155 void
156 ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int r)
157 {
158 struct ra_class *class = regs->classes[c];
159
160 class->regs[r] = GL_TRUE;
161 class->p++;
162 }
163
164 /**
165 * Must be called after all conflicts and register classes have been
166 * set up and before the register set is used for allocation.
167 */
168 void
169 ra_set_finalize(struct ra_regs *regs)
170 {
171 unsigned int b, c;
172
173 for (b = 0; b < regs->class_count; b++) {
174 regs->classes[b]->q = ralloc_array(regs, unsigned int, regs->class_count);
175 }
176
177 /* Compute, for each class B and C, how many regs of B an
178 * allocation to C could conflict with.
179 */
180 for (b = 0; b < regs->class_count; b++) {
181 for (c = 0; c < regs->class_count; c++) {
182 unsigned int rc;
183 int max_conflicts = 0;
184
185 for (rc = 0; rc < regs->count; rc++) {
186 int conflicts = 0;
187 int i;
188
189 if (!regs->classes[c]->regs[rc])
190 continue;
191
192 for (i = 0; i < regs->regs[rc].num_conflicts; i++) {
193 unsigned int rb = regs->regs[rc].conflict_list[i];
194 if (regs->classes[b]->regs[rb])
195 conflicts++;
196 }
197 max_conflicts = MAX2(max_conflicts, conflicts);
198 }
199 regs->classes[b]->q[c] = max_conflicts;
200 }
201 }
202 }
203
204 static void
205 ra_add_node_adjacency(struct ra_graph *g, unsigned int n1, unsigned int n2)
206 {
207 g->nodes[n1].adjacency[n2] = GL_TRUE;
208 g->nodes[n1].adjacency_list[g->nodes[n1].adjacency_count] = n2;
209 g->nodes[n1].adjacency_count++;
210 }
211
212 struct ra_graph *
213 ra_alloc_interference_graph(struct ra_regs *regs, unsigned int count)
214 {
215 struct ra_graph *g;
216 unsigned int i;
217
218 g = rzalloc(regs, struct ra_graph);
219 g->regs = regs;
220 g->nodes = rzalloc_array(g, struct ra_node, count);
221 g->count = count;
222
223 g->stack = rzalloc_array(g, unsigned int, count);
224
225 for (i = 0; i < count; i++) {
226 g->nodes[i].adjacency = rzalloc_array(g, GLboolean, count);
227 g->nodes[i].adjacency_list = ralloc_array(g, unsigned int, count);
228 g->nodes[i].adjacency_count = 0;
229 ra_add_node_adjacency(g, i, i);
230 g->nodes[i].reg = ~0;
231 }
232
233 return g;
234 }
235
236 void
237 ra_set_node_class(struct ra_graph *g,
238 unsigned int n, unsigned int class)
239 {
240 g->nodes[n].class = class;
241 }
242
243 void
244 ra_add_node_interference(struct ra_graph *g,
245 unsigned int n1, unsigned int n2)
246 {
247 if (!g->nodes[n1].adjacency[n2]) {
248 ra_add_node_adjacency(g, n1, n2);
249 ra_add_node_adjacency(g, n2, n1);
250 }
251 }
252
253 static GLboolean pq_test(struct ra_graph *g, unsigned int n)
254 {
255 unsigned int j;
256 unsigned int q = 0;
257 int n_class = g->nodes[n].class;
258
259 for (j = 0; j < g->nodes[n].adjacency_count; j++) {
260 unsigned int n2 = g->nodes[n].adjacency_list[j];
261 unsigned int n2_class = g->nodes[n2].class;
262
263 if (n != n2 && !g->nodes[n2].in_stack) {
264 q += g->regs->classes[n_class]->q[n2_class];
265 }
266 }
267
268 return q < g->regs->classes[n_class]->p;
269 }
270
271 /**
272 * Simplifies the interference graph by pushing all
273 * trivially-colorable nodes into a stack of nodes to be colored,
274 * removing them from the graph, and rinsing and repeating.
275 *
276 * Returns GL_TRUE if all nodes were removed from the graph. GL_FALSE
277 * means that either spilling will be required, or optimistic coloring
278 * should be applied.
279 */
280 GLboolean
281 ra_simplify(struct ra_graph *g)
282 {
283 GLboolean progress = GL_TRUE;
284 int i;
285
286 while (progress) {
287 progress = GL_FALSE;
288
289 for (i = g->count - 1; i >= 0; i--) {
290 if (g->nodes[i].in_stack)
291 continue;
292
293 if (pq_test(g, i)) {
294 g->stack[g->stack_count] = i;
295 g->stack_count++;
296 g->nodes[i].in_stack = GL_TRUE;
297 progress = GL_TRUE;
298 }
299 }
300 }
301
302 for (i = 0; i < g->count; i++) {
303 if (!g->nodes[i].in_stack)
304 return GL_FALSE;
305 }
306
307 return GL_TRUE;
308 }
309
310 /**
311 * Pops nodes from the stack back into the graph, coloring them with
312 * registers as they go.
313 *
314 * If all nodes were trivially colorable, then this must succeed. If
315 * not (optimistic coloring), then it may return GL_FALSE;
316 */
317 GLboolean
318 ra_select(struct ra_graph *g)
319 {
320 int i;
321
322 while (g->stack_count != 0) {
323 unsigned int r;
324 int n = g->stack[g->stack_count - 1];
325 struct ra_class *c = g->regs->classes[g->nodes[n].class];
326
327 /* Find the lowest-numbered reg which is not used by a member
328 * of the graph adjacent to us.
329 */
330 for (r = 0; r < g->regs->count; r++) {
331 if (!c->regs[r])
332 continue;
333
334 /* Check if any of our neighbors conflict with this register choice. */
335 for (i = 0; i < g->nodes[n].adjacency_count; i++) {
336 unsigned int n2 = g->nodes[n].adjacency_list[i];
337
338 if (!g->nodes[n2].in_stack &&
339 g->regs->regs[r].conflicts[g->nodes[n2].reg]) {
340 break;
341 }
342 }
343 if (i == g->nodes[n].adjacency_count)
344 break;
345 }
346 if (r == g->regs->count)
347 return GL_FALSE;
348
349 g->nodes[n].reg = r;
350 g->nodes[n].in_stack = GL_FALSE;
351 g->stack_count--;
352 }
353
354 return GL_TRUE;
355 }
356
357 /**
358 * Optimistic register coloring: Just push the remaining nodes
359 * on the stack. They'll be colored first in ra_select(), and
360 * if they succeed then the locally-colorable nodes are still
361 * locally-colorable and the rest of the register allocation
362 * will succeed.
363 */
364 void
365 ra_optimistic_color(struct ra_graph *g)
366 {
367 unsigned int i;
368
369 for (i = 0; i < g->count; i++) {
370 if (g->nodes[i].in_stack)
371 continue;
372
373 g->stack[g->stack_count] = i;
374 g->stack_count++;
375 g->nodes[i].in_stack = GL_TRUE;
376 }
377 }
378
379 GLboolean
380 ra_allocate_no_spills(struct ra_graph *g)
381 {
382 if (!ra_simplify(g)) {
383 ra_optimistic_color(g);
384 }
385 return ra_select(g);
386 }
387
388 unsigned int
389 ra_get_node_reg(struct ra_graph *g, unsigned int n)
390 {
391 return g->nodes[n].reg;
392 }
393
394 static float
395 ra_get_spill_benefit(struct ra_graph *g, unsigned int n)
396 {
397 int j;
398 float benefit = 0;
399 int n_class = g->nodes[n].class;
400
401 /* Define the benefit of eliminating an interference between n, n2
402 * through spilling as q(C, B) / p(C). This is similar to the
403 * "count number of edges" approach of traditional graph coloring,
404 * but takes classes into account.
405 */
406 for (j = 0; j < g->nodes[n].adjacency_count; j++) {
407 unsigned int n2 = g->nodes[n].adjacency_list[j];
408 if (n != n2) {
409 unsigned int n2_class = g->nodes[n2].class;
410 benefit += ((float)g->regs->classes[n_class]->q[n2_class] /
411 g->regs->classes[n_class]->p);
412 }
413 }
414
415 return benefit;
416 }
417
418 /**
419 * Returns a node number to be spilled according to the cost/benefit using
420 * the pq test, or -1 if there are no spillable nodes.
421 */
422 int
423 ra_get_best_spill_node(struct ra_graph *g)
424 {
425 unsigned int best_node = -1;
426 unsigned int best_benefit = 0.0;
427 unsigned int n;
428
429 for (n = 0; n < g->count; n++) {
430 float cost = g->nodes[n].spill_cost;
431 float benefit;
432
433 if (cost <= 0.0)
434 continue;
435
436 benefit = ra_get_spill_benefit(g, n);
437
438 if (benefit / cost > best_benefit) {
439 best_benefit = benefit / cost;
440 best_node = n;
441 }
442 }
443
444 return best_node;
445 }
446
447 /**
448 * Only nodes with a spill cost set (cost != 0.0) will be considered
449 * for register spilling.
450 */
451 void
452 ra_set_node_spill_cost(struct ra_graph *g, unsigned int n, float cost)
453 {
454 g->nodes[n].spill_cost = cost;
455 }