glsl: Remove the ir_binop_cross opcode.
[mesa.git] / src / mesa / program / register_allocate.c
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
28 /** @file register_allocate.c
29 *
30 * Graph-coloring register allocator.
31 */
32
33 #include <talloc.h>
34
35 #include "main/imports.h"
36 #include "main/macros.h"
37 #include "main/mtypes.h"
38 #include "register_allocate.h"
39
40 struct ra_reg {
41 char *name;
42 GLboolean *conflicts;
43 };
44
45 struct ra_regs {
46 struct ra_reg *regs;
47 unsigned int count;
48
49 struct ra_class **classes;
50 unsigned int class_count;
51 };
52
53 struct ra_class {
54 GLboolean *regs;
55
56 /**
57 * p_B in Runeson/Nyström paper.
58 *
59 * This is "how many regs are in the set."
60 */
61 unsigned int p;
62
63 /**
64 * q_B,C in Runeson/Nyström paper.
65 */
66 unsigned int *q;
67 };
68
69 struct ra_node {
70 GLboolean *adjacency;
71 unsigned int class;
72 unsigned int adjacency_count;
73 unsigned int reg;
74 GLboolean in_stack;
75 float spill_cost;
76 };
77
78 struct ra_graph {
79 struct ra_regs *regs;
80 /**
81 * the variables that need register allocation.
82 */
83 struct ra_node *nodes;
84 unsigned int count; /**< count of nodes. */
85
86 unsigned int *stack;
87 unsigned int stack_count;
88 };
89
90 struct ra_regs *
91 ra_alloc_reg_set(unsigned int count)
92 {
93 unsigned int i;
94 struct ra_regs *regs;
95
96 regs = talloc_zero(NULL, struct ra_regs);
97 regs->count = count;
98 regs->regs = talloc_zero_array(regs, struct ra_reg, count);
99
100 for (i = 0; i < count; i++) {
101 regs->regs[i].conflicts = talloc_zero_array(regs->regs, GLboolean, count);
102 regs->regs[i].conflicts[i] = GL_TRUE;
103 }
104
105 return regs;
106 }
107
108 void
109 ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2)
110 {
111 regs->regs[r1].conflicts[r2] = GL_TRUE;
112 regs->regs[r2].conflicts[r1] = GL_TRUE;
113 }
114
115 unsigned int
116 ra_alloc_reg_class(struct ra_regs *regs)
117 {
118 struct ra_class *class;
119
120 regs->classes = talloc_realloc(regs, regs->classes,
121 struct ra_class *,
122 regs->class_count + 1);
123
124 class = talloc_zero(regs, struct ra_class);
125 regs->classes[regs->class_count] = class;
126
127 class->regs = talloc_zero_array(class, GLboolean, regs->count);
128
129 return regs->class_count++;
130 }
131
132 void
133 ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int r)
134 {
135 struct ra_class *class = regs->classes[c];
136
137 class->regs[r] = GL_TRUE;
138 class->p++;
139 }
140
141 /**
142 * Must be called after all conflicts and register classes have been
143 * set up and before the register set is used for allocation.
144 */
145 void
146 ra_set_finalize(struct ra_regs *regs)
147 {
148 unsigned int b, c;
149
150 for (b = 0; b < regs->class_count; b++) {
151 regs->classes[b]->q = talloc_array(regs, unsigned int, regs->class_count);
152 }
153
154 /* Compute, for each class B and C, how many regs of B an
155 * allocation to C could conflict with.
156 */
157 for (b = 0; b < regs->class_count; b++) {
158 for (c = 0; c < regs->class_count; c++) {
159 unsigned int rc;
160 int max_conflicts = 0;
161
162 for (rc = 0; rc < regs->count; rc++) {
163 unsigned int rb;
164 int conflicts = 0;
165
166 if (!regs->classes[c]->regs[rc])
167 continue;
168
169 for (rb = 0; rb < regs->count; rb++) {
170 if (regs->classes[b]->regs[rb] &&
171 regs->regs[rb].conflicts[rc])
172 conflicts++;
173 }
174 max_conflicts = MAX2(max_conflicts, conflicts);
175 }
176 regs->classes[b]->q[c] = max_conflicts;
177 }
178 }
179 }
180
181 struct ra_graph *
182 ra_alloc_interference_graph(struct ra_regs *regs, unsigned int count)
183 {
184 struct ra_graph *g;
185 unsigned int i;
186
187 g = talloc_zero(regs, struct ra_graph);
188 g->regs = regs;
189 g->nodes = talloc_zero_array(g, struct ra_node, count);
190 g->count = count;
191
192 g->stack = talloc_zero_array(g, unsigned int, count);
193
194 for (i = 0; i < count; i++) {
195 g->nodes[i].adjacency = talloc_zero_array(g, GLboolean, count);
196 g->nodes[i].adjacency[i] = GL_TRUE;
197 g->nodes[i].reg = ~0;
198 }
199
200 return g;
201 }
202
203 void
204 ra_set_node_class(struct ra_graph *g,
205 unsigned int n, unsigned int class)
206 {
207 g->nodes[n].class = class;
208 }
209
210 void
211 ra_add_node_interference(struct ra_graph *g,
212 unsigned int n1, unsigned int n2)
213 {
214 if (g->nodes[n1].adjacency[n2])
215 return;
216
217 g->nodes[n1].adjacency[n2] = GL_TRUE;
218 g->nodes[n2].adjacency_count++;
219 g->nodes[n2].adjacency[n1] = GL_TRUE;
220 g->nodes[n2].adjacency_count++;
221 }
222
223 static GLboolean pq_test(struct ra_graph *g, unsigned int n)
224 {
225 unsigned int j;
226 unsigned int q = 0;
227 int n_class = g->nodes[n].class;
228
229 for (j = 0; j < g->count; j++) {
230 if (j == n || g->nodes[j].in_stack)
231 continue;
232
233 if (g->nodes[n].adjacency[j]) {
234 unsigned int j_class = g->nodes[j].class;
235 q += g->regs->classes[n_class]->q[j_class];
236 }
237 }
238
239 return q < g->regs->classes[n_class]->p;
240 }
241
242 /**
243 * Simplifies the interference graph by pushing all
244 * trivially-colorable nodes into a stack of nodes to be colored,
245 * removing them from the graph, and rinsing and repeating.
246 *
247 * Returns GL_TRUE if all nodes were removed from the graph. GL_FALSE
248 * means that either spilling will be required, or optimistic coloring
249 * should be applied.
250 */
251 GLboolean
252 ra_simplify(struct ra_graph *g)
253 {
254 GLboolean progress = GL_TRUE;
255 int i;
256
257 while (progress) {
258 progress = GL_FALSE;
259
260 for (i = g->count - 1; i >= 0; i--) {
261 if (g->nodes[i].in_stack)
262 continue;
263
264 if (pq_test(g, i)) {
265 g->stack[g->stack_count] = i;
266 g->stack_count++;
267 g->nodes[i].in_stack = GL_TRUE;
268 progress = GL_TRUE;
269 }
270 }
271 }
272
273 for (i = 0; i < g->count; i++) {
274 if (!g->nodes[i].in_stack)
275 return GL_FALSE;
276 }
277
278 return GL_TRUE;
279 }
280
281 /**
282 * Pops nodes from the stack back into the graph, coloring them with
283 * registers as they go.
284 *
285 * If all nodes were trivially colorable, then this must succeed. If
286 * not (optimistic coloring), then it may return GL_FALSE;
287 */
288 GLboolean
289 ra_select(struct ra_graph *g)
290 {
291 int i;
292
293 while (g->stack_count != 0) {
294 unsigned int r;
295 int n = g->stack[g->stack_count - 1];
296 struct ra_class *c = g->regs->classes[g->nodes[n].class];
297
298 /* Find the lowest-numbered reg which is not used by a member
299 * of the graph adjacent to us.
300 */
301 for (r = 0; r < g->regs->count; r++) {
302 if (!c->regs[r])
303 continue;
304
305 /* Check if any of our neighbors conflict with this register choice. */
306 for (i = 0; i < g->count; i++) {
307 if (g->nodes[n].adjacency[i] &&
308 !g->nodes[i].in_stack &&
309 g->regs->regs[r].conflicts[g->nodes[i].reg]) {
310 break;
311 }
312 }
313 if (i == g->count)
314 break;
315 }
316 if (r == g->regs->count)
317 return GL_FALSE;
318
319 g->nodes[n].reg = r;
320 g->nodes[n].in_stack = GL_FALSE;
321 g->stack_count--;
322 }
323
324 return GL_TRUE;
325 }
326
327 /**
328 * Optimistic register coloring: Just push the remaining nodes
329 * on the stack. They'll be colored first in ra_select(), and
330 * if they succeed then the locally-colorable nodes are still
331 * locally-colorable and the rest of the register allocation
332 * will succeed.
333 */
334 void
335 ra_optimistic_color(struct ra_graph *g)
336 {
337 unsigned int i;
338
339 for (i = 0; i < g->count; i++) {
340 if (g->nodes[i].in_stack)
341 continue;
342
343 g->stack[g->stack_count] = i;
344 g->stack_count++;
345 g->nodes[i].in_stack = GL_TRUE;
346 }
347 }
348
349 GLboolean
350 ra_allocate_no_spills(struct ra_graph *g)
351 {
352 if (!ra_simplify(g)) {
353 ra_optimistic_color(g);
354 }
355 return ra_select(g);
356 }
357
358 unsigned int
359 ra_get_node_reg(struct ra_graph *g, unsigned int n)
360 {
361 return g->nodes[n].reg;
362 }
363
364 static float
365 ra_get_spill_benefit(struct ra_graph *g, unsigned int n)
366 {
367 int j;
368 float benefit = 0;
369 int n_class = g->nodes[n].class;
370
371 /* Define the benefit of eliminating an interference between n, j
372 * through spilling as q(C, B) / p(C). This is similar to the
373 * "count number of edges" approach of traditional graph coloring,
374 * but takes classes into account.
375 */
376 for (j = 0; j < g->count; j++) {
377 if (j != n && g->nodes[n].adjacency[j]) {
378 unsigned int j_class = g->nodes[j].class;
379 benefit += ((float)g->regs->classes[n_class]->q[j_class] /
380 g->regs->classes[n_class]->p);
381 break;
382 }
383 }
384
385 return benefit;
386 }
387
388 /**
389 * Returns a node number to be spilled according to the cost/benefit using
390 * the pq test, or -1 if there are no spillable nodes.
391 */
392 int
393 ra_get_best_spill_node(struct ra_graph *g)
394 {
395 unsigned int best_node = -1;
396 unsigned int best_benefit = 0.0;
397 unsigned int n;
398
399 for (n = 0; n < g->count; n++) {
400 float cost = g->nodes[n].spill_cost;
401 float benefit;
402
403 if (cost <= 0.0)
404 continue;
405
406 benefit = ra_get_spill_benefit(g, n);
407
408 if (benefit / cost > best_benefit) {
409 best_benefit = benefit / cost;
410 best_node = n;
411 }
412 }
413
414 return best_node;
415 }
416
417 /**
418 * Only nodes with a spill cost set (cost != 0.0) will be considered
419 * for register spilling.
420 */
421 void
422 ra_set_node_spill_cost(struct ra_graph *g, unsigned int n, float cost)
423 {
424 g->nodes[n].spill_cost = cost;
425 }