2 * Copyright © 2010 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Eric Anholt <eric@anholt.net>
28 /** @file register_allocate.c
30 * Graph-coloring register allocator.
35 #include "main/imports.h"
36 #include "main/macros.h"
37 #include "main/mtypes.h"
38 #include "register_allocate.h"
49 struct ra_class
**classes
;
50 unsigned int class_count
;
57 * p_B in Runeson/Nyström paper.
59 * This is "how many regs are in the set."
64 * q_B,C in Runeson/Nyström paper.
72 unsigned int adjacency_count
;
80 * the variables that need register allocation.
82 struct ra_node
*nodes
;
83 unsigned int count
; /**< count of nodes. */
86 unsigned int stack_count
;
90 ra_alloc_reg_set(unsigned int count
)
95 regs
= talloc_zero(NULL
, struct ra_regs
);
97 regs
->regs
= talloc_zero_array(regs
, struct ra_reg
, count
);
99 for (i
= 0; i
< count
; i
++) {
100 regs
->regs
[i
].conflicts
= talloc_zero_array(regs
->regs
, GLboolean
, count
);
101 regs
->regs
[i
].conflicts
[i
] = GL_TRUE
;
108 ra_add_reg_conflict(struct ra_regs
*regs
, unsigned int r1
, unsigned int r2
)
110 regs
->regs
[r1
].conflicts
[r2
] = GL_TRUE
;
111 regs
->regs
[r2
].conflicts
[r1
] = GL_TRUE
;
115 ra_alloc_reg_class(struct ra_regs
*regs
)
117 struct ra_class
*class;
119 regs
->classes
= talloc_realloc(regs
, regs
->classes
,
121 regs
->class_count
+ 1);
123 class = talloc_zero(regs
, struct ra_class
);
124 regs
->classes
[regs
->class_count
] = class;
126 class->regs
= talloc_zero_array(class, GLboolean
, regs
->count
);
128 return regs
->class_count
++;
132 ra_class_add_reg(struct ra_regs
*regs
, unsigned int c
, unsigned int r
)
134 struct ra_class
*class = regs
->classes
[c
];
136 class->regs
[r
] = GL_TRUE
;
141 * Must be called after all conflicts and register classes have been
142 * set up and before the register set is used for allocation.
145 ra_set_finalize(struct ra_regs
*regs
)
149 for (b
= 0; b
< regs
->class_count
; b
++) {
150 regs
->classes
[b
]->q
= talloc_array(regs
, unsigned int, regs
->class_count
);
153 /* Compute, for each class B and C, how many regs of B an
154 * allocation to C could conflict with.
156 for (b
= 0; b
< regs
->class_count
; b
++) {
157 for (c
= 0; c
< regs
->class_count
; c
++) {
159 int max_conflicts
= 0;
161 for (rc
= 0; rc
< regs
->count
; rc
++) {
165 if (!regs
->classes
[c
]->regs
[rc
])
168 for (rb
= 0; rb
< regs
->count
; rb
++) {
169 if (regs
->classes
[b
]->regs
[rb
] &&
170 regs
->regs
[rb
].conflicts
[rc
])
173 max_conflicts
= MAX2(max_conflicts
, conflicts
);
175 regs
->classes
[b
]->q
[c
] = max_conflicts
;
181 ra_alloc_interference_graph(struct ra_regs
*regs
, unsigned int count
)
186 g
= talloc_zero(regs
, struct ra_graph
);
188 g
->nodes
= talloc_zero_array(g
, struct ra_node
, count
);
191 g
->stack
= talloc_zero_array(g
, unsigned int, count
);
193 for (i
= 0; i
< count
; i
++) {
194 g
->nodes
[i
].adjacency
= talloc_zero_array(g
, GLboolean
, count
);
195 g
->nodes
[i
].adjacency
[i
] = GL_TRUE
;
196 g
->nodes
[i
].reg
= ~0;
203 ra_set_node_class(struct ra_graph
*g
,
204 unsigned int n
, unsigned int class)
206 g
->nodes
[n
].class = class;
210 ra_add_node_interference(struct ra_graph
*g
,
211 unsigned int n1
, unsigned int n2
)
213 if (g
->nodes
[n1
].adjacency
[n2
])
216 g
->nodes
[n1
].adjacency
[n2
] = GL_TRUE
;
217 g
->nodes
[n2
].adjacency_count
++;
218 g
->nodes
[n2
].adjacency
[n1
] = GL_TRUE
;
219 g
->nodes
[n2
].adjacency_count
++;
222 static GLboolean
pq_test(struct ra_graph
*g
, unsigned int n
)
226 int n_class
= g
->nodes
[n
].class;
228 for (j
= 0; j
< g
->count
; j
++) {
229 if (j
== n
|| g
->nodes
[j
].in_stack
)
232 if (g
->nodes
[n
].adjacency
[j
]) {
233 unsigned int j_class
= g
->nodes
[j
].class;
234 q
+= g
->regs
->classes
[n_class
]->q
[j_class
];
238 return q
< g
->regs
->classes
[n_class
]->p
;
242 * Simplifies the interference graph by pushing all
243 * trivially-colorable nodes into a stack of nodes to be colored,
244 * removing them from the graph, and rinsing and repeating.
246 * Returns GL_TRUE if all nodes were removed from the graph. GL_FALSE
247 * means that either spilling will be required, or optimistic coloring
251 ra_simplify(struct ra_graph
*g
)
253 GLboolean progress
= GL_TRUE
;
259 for (i
= g
->count
- 1; i
>= 0; i
--) {
260 if (g
->nodes
[i
].in_stack
)
264 g
->stack
[g
->stack_count
] = i
;
266 g
->nodes
[i
].in_stack
= GL_TRUE
;
272 for (i
= 0; i
< g
->count
; i
++) {
273 if (!g
->nodes
[i
].in_stack
)
281 * Pops nodes from the stack back into the graph, coloring them with
282 * registers as they go.
284 * If all nodes were trivially colorable, then this must succeed. If
285 * not (optimistic coloring), then it may return GL_FALSE;
288 ra_select(struct ra_graph
*g
)
292 while (g
->stack_count
!= 0) {
294 int n
= g
->stack
[g
->stack_count
- 1];
295 struct ra_class
*c
= g
->regs
->classes
[g
->nodes
[n
].class];
297 /* Find the lowest-numbered reg which is not used by a member
298 * of the graph adjacent to us.
300 for (r
= 0; r
< g
->regs
->count
; r
++) {
304 /* Check if any of our neighbors conflict with this register choice. */
305 for (i
= 0; i
< g
->count
; i
++) {
306 if (g
->nodes
[n
].adjacency
[i
] &&
307 !g
->nodes
[i
].in_stack
&&
308 g
->regs
->regs
[r
].conflicts
[g
->nodes
[i
].reg
]) {
315 if (r
== g
->regs
->count
)
319 g
->nodes
[n
].in_stack
= GL_FALSE
;
327 * Optimistic register coloring: Just push the remaining nodes
328 * on the stack. They'll be colored first in ra_select(), and
329 * if they succeed then the locally-colorable nodes are still
330 * locally-colorable and the rest of the register allocation
334 ra_optimistic_color(struct ra_graph
*g
)
338 for (i
= 0; i
< g
->count
; i
++) {
339 if (g
->nodes
[i
].in_stack
)
342 g
->stack
[g
->stack_count
] = i
;
344 g
->nodes
[i
].in_stack
= GL_TRUE
;
349 ra_allocate_no_spills(struct ra_graph
*g
)
351 if (!ra_simplify(g
)) {
352 ra_optimistic_color(g
);
358 ra_get_node_reg(struct ra_graph
*g
, unsigned int n
)
360 return g
->nodes
[n
].reg
;