2 * Copyright © 2010 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Eric Anholt <eric@anholt.net>
28 /** @file register_allocate.c
30 * Graph-coloring register allocator.
35 #include "main/imports.h"
36 #include "main/macros.h"
37 #include "main/mtypes.h"
38 #include "register_allocate.h"
42 unsigned int *conflict_list
;
43 unsigned int conflict_list_size
;
44 unsigned int num_conflicts
;
51 struct ra_class
**classes
;
52 unsigned int class_count
;
59 * p_B in Runeson/Nyström paper.
61 * This is "how many regs are in the set."
66 * q_B,C in Runeson/Nyström paper.
73 unsigned int *adjacency_list
;
75 unsigned int adjacency_count
;
84 * the variables that need register allocation.
86 struct ra_node
*nodes
;
87 unsigned int count
; /**< count of nodes. */
90 unsigned int stack_count
;
94 ra_alloc_reg_set(unsigned int count
)
99 regs
= rzalloc(NULL
, struct ra_regs
);
101 regs
->regs
= rzalloc_array(regs
, struct ra_reg
, count
);
103 for (i
= 0; i
< count
; i
++) {
104 regs
->regs
[i
].conflicts
= rzalloc_array(regs
->regs
, GLboolean
, count
);
105 regs
->regs
[i
].conflicts
[i
] = GL_TRUE
;
107 regs
->regs
[i
].conflict_list
= ralloc_array(regs
->regs
, unsigned int, 4);
108 regs
->regs
[i
].conflict_list_size
= 4;
109 regs
->regs
[i
].conflict_list
[0] = i
;
110 regs
->regs
[i
].num_conflicts
= 1;
117 ra_add_conflict_list(struct ra_regs
*regs
, unsigned int r1
, unsigned int r2
)
119 struct ra_reg
*reg1
= ®s
->regs
[r1
];
121 if (reg1
->conflict_list_size
== reg1
->num_conflicts
) {
122 reg1
->conflict_list_size
*= 2;
123 reg1
->conflict_list
= reralloc(regs
->regs
, reg1
->conflict_list
,
124 unsigned int, reg1
->conflict_list_size
);
126 reg1
->conflict_list
[reg1
->num_conflicts
++] = r2
;
127 reg1
->conflicts
[r2
] = GL_TRUE
;
131 ra_add_reg_conflict(struct ra_regs
*regs
, unsigned int r1
, unsigned int r2
)
133 if (!regs
->regs
[r1
].conflicts
[r2
]) {
134 ra_add_conflict_list(regs
, r1
, r2
);
135 ra_add_conflict_list(regs
, r2
, r1
);
140 ra_alloc_reg_class(struct ra_regs
*regs
)
142 struct ra_class
*class;
144 regs
->classes
= reralloc(regs
->regs
, regs
->classes
, struct ra_class
*,
145 regs
->class_count
+ 1);
147 class = rzalloc(regs
, struct ra_class
);
148 regs
->classes
[regs
->class_count
] = class;
150 class->regs
= rzalloc_array(class, GLboolean
, regs
->count
);
152 return regs
->class_count
++;
156 ra_class_add_reg(struct ra_regs
*regs
, unsigned int c
, unsigned int r
)
158 struct ra_class
*class = regs
->classes
[c
];
160 class->regs
[r
] = GL_TRUE
;
165 * Must be called after all conflicts and register classes have been
166 * set up and before the register set is used for allocation.
169 ra_set_finalize(struct ra_regs
*regs
)
173 for (b
= 0; b
< regs
->class_count
; b
++) {
174 regs
->classes
[b
]->q
= ralloc_array(regs
, unsigned int, regs
->class_count
);
177 /* Compute, for each class B and C, how many regs of B an
178 * allocation to C could conflict with.
180 for (b
= 0; b
< regs
->class_count
; b
++) {
181 for (c
= 0; c
< regs
->class_count
; c
++) {
183 int max_conflicts
= 0;
185 for (rc
= 0; rc
< regs
->count
; rc
++) {
189 if (!regs
->classes
[c
]->regs
[rc
])
192 for (i
= 0; i
< regs
->regs
[rc
].num_conflicts
; i
++) {
193 unsigned int rb
= regs
->regs
[rc
].conflict_list
[i
];
194 if (regs
->classes
[b
]->regs
[rb
])
197 max_conflicts
= MAX2(max_conflicts
, conflicts
);
199 regs
->classes
[b
]->q
[c
] = max_conflicts
;
205 ra_add_node_adjacency(struct ra_graph
*g
, unsigned int n1
, unsigned int n2
)
207 g
->nodes
[n1
].adjacency
[n2
] = GL_TRUE
;
208 g
->nodes
[n1
].adjacency_list
[g
->nodes
[n1
].adjacency_count
] = n2
;
209 g
->nodes
[n1
].adjacency_count
++;
213 ra_alloc_interference_graph(struct ra_regs
*regs
, unsigned int count
)
218 g
= rzalloc(regs
, struct ra_graph
);
220 g
->nodes
= rzalloc_array(g
, struct ra_node
, count
);
223 g
->stack
= rzalloc_array(g
, unsigned int, count
);
225 for (i
= 0; i
< count
; i
++) {
226 g
->nodes
[i
].adjacency
= rzalloc_array(g
, GLboolean
, count
);
227 g
->nodes
[i
].adjacency_list
= ralloc_array(g
, unsigned int, count
);
228 g
->nodes
[i
].adjacency_count
= 0;
229 ra_add_node_adjacency(g
, i
, i
);
230 g
->nodes
[i
].reg
= ~0;
237 ra_set_node_class(struct ra_graph
*g
,
238 unsigned int n
, unsigned int class)
240 g
->nodes
[n
].class = class;
244 ra_add_node_interference(struct ra_graph
*g
,
245 unsigned int n1
, unsigned int n2
)
247 if (!g
->nodes
[n1
].adjacency
[n2
]) {
248 ra_add_node_adjacency(g
, n1
, n2
);
249 ra_add_node_adjacency(g
, n2
, n1
);
253 static GLboolean
pq_test(struct ra_graph
*g
, unsigned int n
)
257 int n_class
= g
->nodes
[n
].class;
259 for (j
= 0; j
< g
->nodes
[n
].adjacency_count
; j
++) {
260 unsigned int n2
= g
->nodes
[n
].adjacency_list
[j
];
261 unsigned int n2_class
= g
->nodes
[n2
].class;
263 if (n
!= n2
&& !g
->nodes
[n2
].in_stack
) {
264 q
+= g
->regs
->classes
[n_class
]->q
[n2_class
];
268 return q
< g
->regs
->classes
[n_class
]->p
;
272 * Simplifies the interference graph by pushing all
273 * trivially-colorable nodes into a stack of nodes to be colored,
274 * removing them from the graph, and rinsing and repeating.
276 * Returns GL_TRUE if all nodes were removed from the graph. GL_FALSE
277 * means that either spilling will be required, or optimistic coloring
281 ra_simplify(struct ra_graph
*g
)
283 GLboolean progress
= GL_TRUE
;
289 for (i
= g
->count
- 1; i
>= 0; i
--) {
290 if (g
->nodes
[i
].in_stack
)
294 g
->stack
[g
->stack_count
] = i
;
296 g
->nodes
[i
].in_stack
= GL_TRUE
;
302 for (i
= 0; i
< g
->count
; i
++) {
303 if (!g
->nodes
[i
].in_stack
)
311 * Pops nodes from the stack back into the graph, coloring them with
312 * registers as they go.
314 * If all nodes were trivially colorable, then this must succeed. If
315 * not (optimistic coloring), then it may return GL_FALSE;
318 ra_select(struct ra_graph
*g
)
322 while (g
->stack_count
!= 0) {
324 int n
= g
->stack
[g
->stack_count
- 1];
325 struct ra_class
*c
= g
->regs
->classes
[g
->nodes
[n
].class];
327 /* Find the lowest-numbered reg which is not used by a member
328 * of the graph adjacent to us.
330 for (r
= 0; r
< g
->regs
->count
; r
++) {
334 /* Check if any of our neighbors conflict with this register choice. */
335 for (i
= 0; i
< g
->nodes
[n
].adjacency_count
; i
++) {
336 unsigned int n2
= g
->nodes
[n
].adjacency_list
[i
];
338 if (!g
->nodes
[n2
].in_stack
&&
339 g
->regs
->regs
[r
].conflicts
[g
->nodes
[n2
].reg
]) {
343 if (i
== g
->nodes
[n
].adjacency_count
)
346 if (r
== g
->regs
->count
)
350 g
->nodes
[n
].in_stack
= GL_FALSE
;
358 * Optimistic register coloring: Just push the remaining nodes
359 * on the stack. They'll be colored first in ra_select(), and
360 * if they succeed then the locally-colorable nodes are still
361 * locally-colorable and the rest of the register allocation
365 ra_optimistic_color(struct ra_graph
*g
)
369 for (i
= 0; i
< g
->count
; i
++) {
370 if (g
->nodes
[i
].in_stack
)
373 g
->stack
[g
->stack_count
] = i
;
375 g
->nodes
[i
].in_stack
= GL_TRUE
;
380 ra_allocate_no_spills(struct ra_graph
*g
)
382 if (!ra_simplify(g
)) {
383 ra_optimistic_color(g
);
389 ra_get_node_reg(struct ra_graph
*g
, unsigned int n
)
391 return g
->nodes
[n
].reg
;
395 ra_get_spill_benefit(struct ra_graph
*g
, unsigned int n
)
399 int n_class
= g
->nodes
[n
].class;
401 /* Define the benefit of eliminating an interference between n, n2
402 * through spilling as q(C, B) / p(C). This is similar to the
403 * "count number of edges" approach of traditional graph coloring,
404 * but takes classes into account.
406 for (j
= 0; j
< g
->nodes
[n
].adjacency_count
; j
++) {
407 unsigned int n2
= g
->nodes
[n
].adjacency_list
[j
];
409 unsigned int n2_class
= g
->nodes
[n2
].class;
410 benefit
+= ((float)g
->regs
->classes
[n_class
]->q
[n2_class
] /
411 g
->regs
->classes
[n_class
]->p
);
419 * Returns a node number to be spilled according to the cost/benefit using
420 * the pq test, or -1 if there are no spillable nodes.
423 ra_get_best_spill_node(struct ra_graph
*g
)
425 unsigned int best_node
= -1;
426 unsigned int best_benefit
= 0.0;
429 for (n
= 0; n
< g
->count
; n
++) {
430 float cost
= g
->nodes
[n
].spill_cost
;
436 benefit
= ra_get_spill_benefit(g
, n
);
438 if (benefit
/ cost
> best_benefit
) {
439 best_benefit
= benefit
/ cost
;
448 * Only nodes with a spill cost set (cost != 0.0) will be considered
449 * for register spilling.
452 ra_set_node_spill_cost(struct ra_graph
*g
, unsigned int n
, float cost
)
454 g
->nodes
[n
].spill_cost
= cost
;