2 * Copyright © 2010 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Eric Anholt <eric@anholt.net>
28 /** @file register_allocate.c
30 * Graph-coloring register allocator.
35 #include "main/imports.h"
36 #include "main/macros.h"
37 #include "main/mtypes.h"
38 #include "register_allocate.h"
49 struct ra_class
**classes
;
50 unsigned int class_count
;
57 * p_B in Runeson/Nyström paper.
59 * This is "how many regs are in the set."
64 * q_B,C in Runeson/Nyström paper.
72 unsigned int adjacency_count
;
81 * the variables that need register allocation.
83 struct ra_node
*nodes
;
84 unsigned int count
; /**< count of nodes. */
87 unsigned int stack_count
;
91 ra_alloc_reg_set(unsigned int count
)
96 regs
= talloc_zero(NULL
, struct ra_regs
);
98 regs
->regs
= talloc_zero_array(regs
, struct ra_reg
, count
);
100 for (i
= 0; i
< count
; i
++) {
101 regs
->regs
[i
].conflicts
= talloc_zero_array(regs
->regs
, GLboolean
, count
);
102 regs
->regs
[i
].conflicts
[i
] = GL_TRUE
;
109 ra_add_reg_conflict(struct ra_regs
*regs
, unsigned int r1
, unsigned int r2
)
111 regs
->regs
[r1
].conflicts
[r2
] = GL_TRUE
;
112 regs
->regs
[r2
].conflicts
[r1
] = GL_TRUE
;
116 ra_alloc_reg_class(struct ra_regs
*regs
)
118 struct ra_class
*class;
120 regs
->classes
= talloc_realloc(regs
, regs
->classes
,
122 regs
->class_count
+ 1);
124 class = talloc_zero(regs
, struct ra_class
);
125 regs
->classes
[regs
->class_count
] = class;
127 class->regs
= talloc_zero_array(class, GLboolean
, regs
->count
);
129 return regs
->class_count
++;
133 ra_class_add_reg(struct ra_regs
*regs
, unsigned int c
, unsigned int r
)
135 struct ra_class
*class = regs
->classes
[c
];
137 class->regs
[r
] = GL_TRUE
;
142 * Must be called after all conflicts and register classes have been
143 * set up and before the register set is used for allocation.
146 ra_set_finalize(struct ra_regs
*regs
)
150 for (b
= 0; b
< regs
->class_count
; b
++) {
151 regs
->classes
[b
]->q
= talloc_array(regs
, unsigned int, regs
->class_count
);
154 /* Compute, for each class B and C, how many regs of B an
155 * allocation to C could conflict with.
157 for (b
= 0; b
< regs
->class_count
; b
++) {
158 for (c
= 0; c
< regs
->class_count
; c
++) {
160 int max_conflicts
= 0;
162 for (rc
= 0; rc
< regs
->count
; rc
++) {
166 if (!regs
->classes
[c
]->regs
[rc
])
169 for (rb
= 0; rb
< regs
->count
; rb
++) {
170 if (regs
->classes
[b
]->regs
[rb
] &&
171 regs
->regs
[rb
].conflicts
[rc
])
174 max_conflicts
= MAX2(max_conflicts
, conflicts
);
176 regs
->classes
[b
]->q
[c
] = max_conflicts
;
182 ra_alloc_interference_graph(struct ra_regs
*regs
, unsigned int count
)
187 g
= talloc_zero(regs
, struct ra_graph
);
189 g
->nodes
= talloc_zero_array(g
, struct ra_node
, count
);
192 g
->stack
= talloc_zero_array(g
, unsigned int, count
);
194 for (i
= 0; i
< count
; i
++) {
195 g
->nodes
[i
].adjacency
= talloc_zero_array(g
, GLboolean
, count
);
196 g
->nodes
[i
].adjacency
[i
] = GL_TRUE
;
197 g
->nodes
[i
].reg
= ~0;
204 ra_set_node_class(struct ra_graph
*g
,
205 unsigned int n
, unsigned int class)
207 g
->nodes
[n
].class = class;
211 ra_add_node_interference(struct ra_graph
*g
,
212 unsigned int n1
, unsigned int n2
)
214 if (g
->nodes
[n1
].adjacency
[n2
])
217 g
->nodes
[n1
].adjacency
[n2
] = GL_TRUE
;
218 g
->nodes
[n2
].adjacency_count
++;
219 g
->nodes
[n2
].adjacency
[n1
] = GL_TRUE
;
220 g
->nodes
[n2
].adjacency_count
++;
223 static GLboolean
pq_test(struct ra_graph
*g
, unsigned int n
)
227 int n_class
= g
->nodes
[n
].class;
229 for (j
= 0; j
< g
->count
; j
++) {
230 if (j
== n
|| g
->nodes
[j
].in_stack
)
233 if (g
->nodes
[n
].adjacency
[j
]) {
234 unsigned int j_class
= g
->nodes
[j
].class;
235 q
+= g
->regs
->classes
[n_class
]->q
[j_class
];
239 return q
< g
->regs
->classes
[n_class
]->p
;
243 * Simplifies the interference graph by pushing all
244 * trivially-colorable nodes into a stack of nodes to be colored,
245 * removing them from the graph, and rinsing and repeating.
247 * Returns GL_TRUE if all nodes were removed from the graph. GL_FALSE
248 * means that either spilling will be required, or optimistic coloring
252 ra_simplify(struct ra_graph
*g
)
254 GLboolean progress
= GL_TRUE
;
260 for (i
= g
->count
- 1; i
>= 0; i
--) {
261 if (g
->nodes
[i
].in_stack
)
265 g
->stack
[g
->stack_count
] = i
;
267 g
->nodes
[i
].in_stack
= GL_TRUE
;
273 for (i
= 0; i
< g
->count
; i
++) {
274 if (!g
->nodes
[i
].in_stack
)
282 * Pops nodes from the stack back into the graph, coloring them with
283 * registers as they go.
285 * If all nodes were trivially colorable, then this must succeed. If
286 * not (optimistic coloring), then it may return GL_FALSE;
289 ra_select(struct ra_graph
*g
)
293 while (g
->stack_count
!= 0) {
295 int n
= g
->stack
[g
->stack_count
- 1];
296 struct ra_class
*c
= g
->regs
->classes
[g
->nodes
[n
].class];
298 /* Find the lowest-numbered reg which is not used by a member
299 * of the graph adjacent to us.
301 for (r
= 0; r
< g
->regs
->count
; r
++) {
305 /* Check if any of our neighbors conflict with this register choice. */
306 for (i
= 0; i
< g
->count
; i
++) {
307 if (g
->nodes
[n
].adjacency
[i
] &&
308 !g
->nodes
[i
].in_stack
&&
309 g
->regs
->regs
[r
].conflicts
[g
->nodes
[i
].reg
]) {
316 if (r
== g
->regs
->count
)
320 g
->nodes
[n
].in_stack
= GL_FALSE
;
328 * Optimistic register coloring: Just push the remaining nodes
329 * on the stack. They'll be colored first in ra_select(), and
330 * if they succeed then the locally-colorable nodes are still
331 * locally-colorable and the rest of the register allocation
335 ra_optimistic_color(struct ra_graph
*g
)
339 for (i
= 0; i
< g
->count
; i
++) {
340 if (g
->nodes
[i
].in_stack
)
343 g
->stack
[g
->stack_count
] = i
;
345 g
->nodes
[i
].in_stack
= GL_TRUE
;
350 ra_allocate_no_spills(struct ra_graph
*g
)
352 if (!ra_simplify(g
)) {
353 ra_optimistic_color(g
);
359 ra_get_node_reg(struct ra_graph
*g
, unsigned int n
)
361 return g
->nodes
[n
].reg
;
365 ra_get_spill_benefit(struct ra_graph
*g
, unsigned int n
)
369 int n_class
= g
->nodes
[n
].class;
371 /* Define the benefit of eliminating an interference between n, j
372 * through spilling as q(C, B) / p(C). This is similar to the
373 * "count number of edges" approach of traditional graph coloring,
374 * but takes classes into account.
376 for (j
= 0; j
< g
->count
; j
++) {
377 if (j
!= n
&& g
->nodes
[n
].adjacency
[j
]) {
378 unsigned int j_class
= g
->nodes
[j
].class;
379 benefit
+= ((float)g
->regs
->classes
[n_class
]->q
[j_class
] /
380 g
->regs
->classes
[n_class
]->p
);
389 * Returns a node number to be spilled according to the cost/benefit using
390 * the pq test, or -1 if there are no spillable nodes.
393 ra_get_best_spill_node(struct ra_graph
*g
)
395 unsigned int best_node
= -1;
396 unsigned int best_benefit
= 0.0;
399 for (n
= 0; n
< g
->count
; n
++) {
400 float cost
= g
->nodes
[n
].spill_cost
;
406 benefit
= ra_get_spill_benefit(g
, n
);
408 if (benefit
/ cost
> best_benefit
) {
409 best_benefit
= benefit
/ cost
;
418 * Only nodes with a spill cost set (cost != 0.0) will be considered
419 * for register spilling.
422 ra_set_node_spill_cost(struct ra_graph
*g
, unsigned int n
, float cost
)
424 g
->nodes
[n
].spill_cost
= cost
;