2 * Copyright © 2010 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Eric Anholt <eric@anholt.net>
28 /** @file register_allocate.c
30 * Graph-coloring register allocator.
35 #include "main/imports.h"
36 #include "main/macros.h"
37 #include "main/mtypes.h"
38 #include "register_allocate.h"
42 unsigned int *conflict_list
;
43 unsigned int conflict_list_size
;
44 unsigned int num_conflicts
;
51 struct ra_class
**classes
;
52 unsigned int class_count
;
59 * p_B in Runeson/Nyström paper.
61 * This is "how many regs are in the set."
66 * q_B,C in Runeson/Nyström paper.
73 unsigned int *adjacency_list
;
75 unsigned int adjacency_count
;
84 * the variables that need register allocation.
86 struct ra_node
*nodes
;
87 unsigned int count
; /**< count of nodes. */
90 unsigned int stack_count
;
94 ra_alloc_reg_set(unsigned int count
)
99 regs
= talloc_zero(NULL
, struct ra_regs
);
101 regs
->regs
= talloc_zero_array(regs
, struct ra_reg
, count
);
103 for (i
= 0; i
< count
; i
++) {
104 regs
->regs
[i
].conflicts
= talloc_zero_array(regs
->regs
, GLboolean
, count
);
105 regs
->regs
[i
].conflicts
[i
] = GL_TRUE
;
107 regs
->regs
[i
].conflict_list
= talloc_array(regs
->regs
, unsigned int, 4);
108 regs
->regs
[i
].conflict_list_size
= 4;
109 regs
->regs
[i
].conflict_list
[0] = i
;
110 regs
->regs
[i
].num_conflicts
= 1;
117 ra_add_conflict_list(struct ra_regs
*regs
, unsigned int r1
, unsigned int r2
)
119 struct ra_reg
*reg1
= ®s
->regs
[r1
];
121 if (reg1
->conflict_list_size
== reg1
->num_conflicts
) {
122 reg1
->conflict_list_size
*= 2;
123 reg1
->conflict_list
= talloc_realloc(regs
,
126 reg1
->conflict_list_size
);
128 reg1
->conflict_list
[reg1
->num_conflicts
++] = r2
;
129 reg1
->conflicts
[r2
] = GL_TRUE
;
133 ra_add_reg_conflict(struct ra_regs
*regs
, unsigned int r1
, unsigned int r2
)
135 if (!regs
->regs
[r1
].conflicts
[r2
]) {
136 ra_add_conflict_list(regs
, r1
, r2
);
137 ra_add_conflict_list(regs
, r2
, r1
);
142 ra_alloc_reg_class(struct ra_regs
*regs
)
144 struct ra_class
*class;
146 regs
->classes
= talloc_realloc(regs
, regs
->classes
,
148 regs
->class_count
+ 1);
150 class = talloc_zero(regs
, struct ra_class
);
151 regs
->classes
[regs
->class_count
] = class;
153 class->regs
= talloc_zero_array(class, GLboolean
, regs
->count
);
155 return regs
->class_count
++;
159 ra_class_add_reg(struct ra_regs
*regs
, unsigned int c
, unsigned int r
)
161 struct ra_class
*class = regs
->classes
[c
];
163 class->regs
[r
] = GL_TRUE
;
168 * Must be called after all conflicts and register classes have been
169 * set up and before the register set is used for allocation.
172 ra_set_finalize(struct ra_regs
*regs
)
176 for (b
= 0; b
< regs
->class_count
; b
++) {
177 regs
->classes
[b
]->q
= talloc_array(regs
, unsigned int, regs
->class_count
);
180 /* Compute, for each class B and C, how many regs of B an
181 * allocation to C could conflict with.
183 for (b
= 0; b
< regs
->class_count
; b
++) {
184 for (c
= 0; c
< regs
->class_count
; c
++) {
186 int max_conflicts
= 0;
188 for (rc
= 0; rc
< regs
->count
; rc
++) {
192 if (!regs
->classes
[c
]->regs
[rc
])
195 for (i
= 0; i
< regs
->regs
[rc
].num_conflicts
; i
++) {
196 unsigned int rb
= regs
->regs
[rc
].conflict_list
[i
];
197 if (regs
->classes
[b
]->regs
[rb
])
200 max_conflicts
= MAX2(max_conflicts
, conflicts
);
202 regs
->classes
[b
]->q
[c
] = max_conflicts
;
208 ra_add_node_adjacency(struct ra_graph
*g
, unsigned int n1
, unsigned int n2
)
210 g
->nodes
[n1
].adjacency
[n2
] = GL_TRUE
;
211 g
->nodes
[n1
].adjacency_list
[g
->nodes
[n1
].adjacency_count
] = n2
;
212 g
->nodes
[n1
].adjacency_count
++;
216 ra_alloc_interference_graph(struct ra_regs
*regs
, unsigned int count
)
221 g
= talloc_zero(regs
, struct ra_graph
);
223 g
->nodes
= talloc_zero_array(g
, struct ra_node
, count
);
226 g
->stack
= talloc_zero_array(g
, unsigned int, count
);
228 for (i
= 0; i
< count
; i
++) {
229 g
->nodes
[i
].adjacency
= talloc_zero_array(g
, GLboolean
, count
);
230 g
->nodes
[i
].adjacency_list
= talloc_array(g
, unsigned int, count
);
231 g
->nodes
[i
].adjacency_count
= 0;
232 ra_add_node_adjacency(g
, i
, i
);
233 g
->nodes
[i
].reg
= ~0;
240 ra_set_node_class(struct ra_graph
*g
,
241 unsigned int n
, unsigned int class)
243 g
->nodes
[n
].class = class;
247 ra_add_node_interference(struct ra_graph
*g
,
248 unsigned int n1
, unsigned int n2
)
250 if (!g
->nodes
[n1
].adjacency
[n2
]) {
251 ra_add_node_adjacency(g
, n1
, n2
);
252 ra_add_node_adjacency(g
, n2
, n1
);
256 static GLboolean
pq_test(struct ra_graph
*g
, unsigned int n
)
260 int n_class
= g
->nodes
[n
].class;
262 for (j
= 0; j
< g
->nodes
[n
].adjacency_count
; j
++) {
263 unsigned int n2
= g
->nodes
[n
].adjacency_list
[j
];
264 unsigned int n2_class
= g
->nodes
[n2
].class;
266 if (n
!= n2
&& !g
->nodes
[n2
].in_stack
) {
267 q
+= g
->regs
->classes
[n_class
]->q
[n2_class
];
271 return q
< g
->regs
->classes
[n_class
]->p
;
275 * Simplifies the interference graph by pushing all
276 * trivially-colorable nodes into a stack of nodes to be colored,
277 * removing them from the graph, and rinsing and repeating.
279 * Returns GL_TRUE if all nodes were removed from the graph. GL_FALSE
280 * means that either spilling will be required, or optimistic coloring
284 ra_simplify(struct ra_graph
*g
)
286 GLboolean progress
= GL_TRUE
;
292 for (i
= g
->count
- 1; i
>= 0; i
--) {
293 if (g
->nodes
[i
].in_stack
)
297 g
->stack
[g
->stack_count
] = i
;
299 g
->nodes
[i
].in_stack
= GL_TRUE
;
305 for (i
= 0; i
< g
->count
; i
++) {
306 if (!g
->nodes
[i
].in_stack
)
314 * Pops nodes from the stack back into the graph, coloring them with
315 * registers as they go.
317 * If all nodes were trivially colorable, then this must succeed. If
318 * not (optimistic coloring), then it may return GL_FALSE;
321 ra_select(struct ra_graph
*g
)
325 while (g
->stack_count
!= 0) {
327 int n
= g
->stack
[g
->stack_count
- 1];
328 struct ra_class
*c
= g
->regs
->classes
[g
->nodes
[n
].class];
330 /* Find the lowest-numbered reg which is not used by a member
331 * of the graph adjacent to us.
333 for (r
= 0; r
< g
->regs
->count
; r
++) {
337 /* Check if any of our neighbors conflict with this register choice. */
338 for (i
= 0; i
< g
->nodes
[n
].adjacency_count
; i
++) {
339 unsigned int n2
= g
->nodes
[n
].adjacency_list
[i
];
341 if (!g
->nodes
[n2
].in_stack
&&
342 g
->regs
->regs
[r
].conflicts
[g
->nodes
[n2
].reg
]) {
346 if (i
== g
->nodes
[n
].adjacency_count
)
349 if (r
== g
->regs
->count
)
353 g
->nodes
[n
].in_stack
= GL_FALSE
;
361 * Optimistic register coloring: Just push the remaining nodes
362 * on the stack. They'll be colored first in ra_select(), and
363 * if they succeed then the locally-colorable nodes are still
364 * locally-colorable and the rest of the register allocation
368 ra_optimistic_color(struct ra_graph
*g
)
372 for (i
= 0; i
< g
->count
; i
++) {
373 if (g
->nodes
[i
].in_stack
)
376 g
->stack
[g
->stack_count
] = i
;
378 g
->nodes
[i
].in_stack
= GL_TRUE
;
383 ra_allocate_no_spills(struct ra_graph
*g
)
385 if (!ra_simplify(g
)) {
386 ra_optimistic_color(g
);
392 ra_get_node_reg(struct ra_graph
*g
, unsigned int n
)
394 return g
->nodes
[n
].reg
;
398 ra_get_spill_benefit(struct ra_graph
*g
, unsigned int n
)
402 int n_class
= g
->nodes
[n
].class;
404 /* Define the benefit of eliminating an interference between n, n2
405 * through spilling as q(C, B) / p(C). This is similar to the
406 * "count number of edges" approach of traditional graph coloring,
407 * but takes classes into account.
409 for (j
= 0; j
< g
->nodes
[n
].adjacency_count
; j
++) {
410 unsigned int n2
= g
->nodes
[n
].adjacency_list
[j
];
412 unsigned int n2_class
= g
->nodes
[n2
].class;
413 benefit
+= ((float)g
->regs
->classes
[n_class
]->q
[n2_class
] /
414 g
->regs
->classes
[n_class
]->p
);
422 * Returns a node number to be spilled according to the cost/benefit using
423 * the pq test, or -1 if there are no spillable nodes.
426 ra_get_best_spill_node(struct ra_graph
*g
)
428 unsigned int best_node
= -1;
429 unsigned int best_benefit
= 0.0;
432 for (n
= 0; n
< g
->count
; n
++) {
433 float cost
= g
->nodes
[n
].spill_cost
;
439 benefit
= ra_get_spill_benefit(g
, n
);
441 if (benefit
/ cost
> best_benefit
) {
442 best_benefit
= benefit
/ cost
;
451 * Only nodes with a spill cost set (cost != 0.0) will be considered
452 * for register spilling.
455 ra_set_node_spill_cost(struct ra_graph
*g
, unsigned int n
, float cost
)
457 g
->nodes
[n
].spill_cost
= cost
;