Merge branch 'llvm-cliptest-viewport'
[mesa.git] / src / mesa / program / register_allocate.c
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
28 /** @file register_allocate.c
29 *
30 * Graph-coloring register allocator.
31 */
32
33 #include <talloc.h>
34
35 #include "main/imports.h"
36 #include "main/macros.h"
37 #include "main/mtypes.h"
38 #include "register_allocate.h"
39
40 struct ra_reg {
41 char *name;
42 GLboolean *conflicts;
43 };
44
45 struct ra_regs {
46 struct ra_reg *regs;
47 unsigned int count;
48
49 struct ra_class **classes;
50 unsigned int class_count;
51 };
52
53 struct ra_class {
54 GLboolean *regs;
55
56 /**
57 * p_B in Runeson/Nyström paper.
58 *
59 * This is "how many regs are in the set."
60 */
61 unsigned int p;
62
63 /**
64 * q_B,C in Runeson/Nyström paper.
65 */
66 unsigned int *q;
67 };
68
69 struct ra_node {
70 GLboolean *adjacency;
71 unsigned int class;
72 unsigned int adjacency_count;
73 unsigned int reg;
74 GLboolean in_stack;
75 };
76
77 struct ra_graph {
78 struct ra_regs *regs;
79 /**
80 * the variables that need register allocation.
81 */
82 struct ra_node *nodes;
83 unsigned int count; /**< count of nodes. */
84
85 unsigned int *stack;
86 unsigned int stack_count;
87 };
88
89 struct ra_regs *
90 ra_alloc_reg_set(unsigned int count)
91 {
92 unsigned int i;
93 struct ra_regs *regs;
94
95 regs = talloc_zero(NULL, struct ra_regs);
96 regs->count = count;
97 regs->regs = talloc_zero_array(regs, struct ra_reg, count);
98
99 for (i = 0; i < count; i++) {
100 regs->regs[i].conflicts = talloc_zero_array(regs->regs, GLboolean, count);
101 regs->regs[i].conflicts[i] = GL_TRUE;
102 }
103
104 return regs;
105 }
106
107 void
108 ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2)
109 {
110 regs->regs[r1].conflicts[r2] = GL_TRUE;
111 regs->regs[r2].conflicts[r1] = GL_TRUE;
112 }
113
114 unsigned int
115 ra_alloc_reg_class(struct ra_regs *regs)
116 {
117 struct ra_class *class;
118
119 regs->classes = talloc_realloc(regs, regs->classes,
120 struct ra_class *,
121 regs->class_count + 1);
122
123 class = talloc_zero(regs, struct ra_class);
124 regs->classes[regs->class_count] = class;
125
126 class->regs = talloc_zero_array(class, GLboolean, regs->count);
127
128 return regs->class_count++;
129 }
130
131 void
132 ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int r)
133 {
134 struct ra_class *class = regs->classes[c];
135
136 class->regs[r] = GL_TRUE;
137 class->p++;
138 }
139
140 /**
141 * Must be called after all conflicts and register classes have been
142 * set up and before the register set is used for allocation.
143 */
144 void
145 ra_set_finalize(struct ra_regs *regs)
146 {
147 unsigned int b, c;
148
149 for (b = 0; b < regs->class_count; b++) {
150 regs->classes[b]->q = talloc_array(regs, unsigned int, regs->class_count);
151 }
152
153 /* Compute, for each class B and C, how many regs of B an
154 * allocation to C could conflict with.
155 */
156 for (b = 0; b < regs->class_count; b++) {
157 for (c = 0; c < regs->class_count; c++) {
158 unsigned int rc;
159 int max_conflicts = 0;
160
161 for (rc = 0; rc < regs->count; rc++) {
162 unsigned int rb;
163 int conflicts = 0;
164
165 if (!regs->classes[c]->regs[rc])
166 continue;
167
168 for (rb = 0; rb < regs->count; rb++) {
169 if (regs->classes[b]->regs[rb] &&
170 regs->regs[rb].conflicts[rc])
171 conflicts++;
172 }
173 max_conflicts = MAX2(max_conflicts, conflicts);
174 }
175 regs->classes[b]->q[c] = max_conflicts;
176 }
177 }
178 }
179
180 struct ra_graph *
181 ra_alloc_interference_graph(struct ra_regs *regs, unsigned int count)
182 {
183 struct ra_graph *g;
184 unsigned int i;
185
186 g = talloc_zero(regs, struct ra_graph);
187 g->regs = regs;
188 g->nodes = talloc_zero_array(g, struct ra_node, count);
189 g->count = count;
190
191 g->stack = talloc_zero_array(g, unsigned int, count);
192
193 for (i = 0; i < count; i++) {
194 g->nodes[i].adjacency = talloc_zero_array(g, GLboolean, count);
195 g->nodes[i].adjacency[i] = GL_TRUE;
196 g->nodes[i].reg = ~0;
197 }
198
199 return g;
200 }
201
202 void
203 ra_set_node_class(struct ra_graph *g,
204 unsigned int n, unsigned int class)
205 {
206 g->nodes[n].class = class;
207 }
208
209 void
210 ra_add_node_interference(struct ra_graph *g,
211 unsigned int n1, unsigned int n2)
212 {
213 if (g->nodes[n1].adjacency[n2])
214 return;
215
216 g->nodes[n1].adjacency[n2] = GL_TRUE;
217 g->nodes[n2].adjacency_count++;
218 g->nodes[n2].adjacency[n1] = GL_TRUE;
219 g->nodes[n2].adjacency_count++;
220 }
221
222 static GLboolean pq_test(struct ra_graph *g, unsigned int n)
223 {
224 unsigned int j;
225 unsigned int q = 0;
226 int n_class = g->nodes[n].class;
227
228 for (j = 0; j < g->count; j++) {
229 if (j == n || g->nodes[j].in_stack)
230 continue;
231
232 if (g->nodes[n].adjacency[j]) {
233 unsigned int j_class = g->nodes[j].class;
234 q += g->regs->classes[n_class]->q[j_class];
235 }
236 }
237
238 return q < g->regs->classes[n_class]->p;
239 }
240
241 /**
242 * Simplifies the interference graph by pushing all
243 * trivially-colorable nodes into a stack of nodes to be colored,
244 * removing them from the graph, and rinsing and repeating.
245 *
246 * Returns GL_TRUE if all nodes were removed from the graph. GL_FALSE
247 * means that either spilling will be required, or optimistic coloring
248 * should be applied.
249 */
250 GLboolean
251 ra_simplify(struct ra_graph *g)
252 {
253 GLboolean progress = GL_TRUE;
254 int i;
255
256 while (progress) {
257 progress = GL_FALSE;
258
259 for (i = g->count - 1; i >= 0; i--) {
260 if (g->nodes[i].in_stack)
261 continue;
262
263 if (pq_test(g, i)) {
264 g->stack[g->stack_count] = i;
265 g->stack_count++;
266 g->nodes[i].in_stack = GL_TRUE;
267 progress = GL_TRUE;
268 }
269 }
270 }
271
272 for (i = 0; i < g->count; i++) {
273 if (!g->nodes[i].in_stack)
274 return GL_FALSE;
275 }
276
277 return GL_TRUE;
278 }
279
280 /**
281 * Pops nodes from the stack back into the graph, coloring them with
282 * registers as they go.
283 *
284 * If all nodes were trivially colorable, then this must succeed. If
285 * not (optimistic coloring), then it may return GL_FALSE;
286 */
287 GLboolean
288 ra_select(struct ra_graph *g)
289 {
290 int i;
291
292 while (g->stack_count != 0) {
293 unsigned int r;
294 int n = g->stack[g->stack_count - 1];
295 struct ra_class *c = g->regs->classes[g->nodes[n].class];
296
297 /* Find the lowest-numbered reg which is not used by a member
298 * of the graph adjacent to us.
299 */
300 for (r = 0; r < g->regs->count; r++) {
301 if (!c->regs[r])
302 continue;
303
304 /* Check if any of our neighbors conflict with this register choice. */
305 for (i = 0; i < g->count; i++) {
306 if (g->nodes[n].adjacency[i] &&
307 !g->nodes[i].in_stack &&
308 g->regs->regs[r].conflicts[g->nodes[i].reg]) {
309 break;
310 }
311 }
312 if (i == g->count)
313 break;
314 }
315 if (r == g->regs->count)
316 return GL_FALSE;
317
318 g->nodes[n].reg = r;
319 g->nodes[n].in_stack = GL_FALSE;
320 g->stack_count--;
321 }
322
323 return GL_TRUE;
324 }
325
326 /**
327 * Optimistic register coloring: Just push the remaining nodes
328 * on the stack. They'll be colored first in ra_select(), and
329 * if they succeed then the locally-colorable nodes are still
330 * locally-colorable and the rest of the register allocation
331 * will succeed.
332 */
333 void
334 ra_optimistic_color(struct ra_graph *g)
335 {
336 unsigned int i;
337
338 for (i = 0; i < g->count; i++) {
339 if (g->nodes[i].in_stack)
340 continue;
341
342 g->stack[g->stack_count] = i;
343 g->stack_count++;
344 g->nodes[i].in_stack = GL_TRUE;
345 }
346 }
347
348 GLboolean
349 ra_allocate_no_spills(struct ra_graph *g)
350 {
351 if (!ra_simplify(g)) {
352 ra_optimistic_color(g);
353 }
354 return ra_select(g);
355 }
356
357 unsigned int
358 ra_get_node_reg(struct ra_graph *g, unsigned int n)
359 {
360 return g->nodes[n].reg;
361 }