2 * Copyright (c) 2017 Lima Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
26 #include "util/u_dynarray.h"
28 /* Per-register information */
31 BITSET_WORD
*conflicts
;
32 struct util_dynarray conflict_list
;
34 /* Number of conflicts that must be allocated to physical registers.
36 unsigned phys_conflicts
;
38 unsigned node_conflicts
;
40 /* Number of conflicts that can be allocated to either. */
41 unsigned total_conflicts
;
49 unsigned bitset_words
, num_nodes_and_regs
;
50 struct reg_info
*registers
;
52 /* Reusable scratch liveness array */
56 unsigned worklist_start
, worklist_end
;
65 /* Liveness analysis */
67 static void propagate_liveness_instr(gpir_node
*node
, BITSET_WORD
*live
,
71 if (node
->type
== gpir_node_type_store
) {
72 if (node
->op
== gpir_op_store_reg
) {
73 gpir_store_node
*store
= gpir_node_to_store(node
);
74 BITSET_CLEAR(live
, store
->reg
->index
);
79 if (node
->type
== gpir_node_type_load
) {
80 if (node
->op
== gpir_op_load_reg
) {
81 gpir_load_node
*load
= gpir_node_to_load(node
);
82 BITSET_SET(live
, load
->reg
->index
);
87 static bool propagate_liveness_block(gpir_block
*block
, struct regalloc_ctx
*ctx
)
89 for (unsigned i
= 0; i
< 2; i
++) {
90 if (block
->successors
[i
]) {
91 for (unsigned j
= 0; j
< ctx
->bitset_words
; j
++)
92 block
->live_out
[j
] |= block
->successors
[i
]->live_in
[j
];
96 memcpy(ctx
->live
, block
->live_out
, ctx
->bitset_words
* sizeof(BITSET_WORD
));
98 list_for_each_entry_rev(gpir_node
, node
, &block
->node_list
, list
) {
99 propagate_liveness_instr(node
, ctx
->live
, block
->comp
);
102 bool changed
= false;
103 for (unsigned i
= 0; i
< ctx
->bitset_words
; i
++) {
104 changed
|= (block
->live_in
[i
] != ctx
->live
[i
]);
105 block
->live_in
[i
] = ctx
->live
[i
];
110 static void calc_def_block(gpir_block
*block
)
112 list_for_each_entry(gpir_node
, node
, &block
->node_list
, list
) {
113 if (node
->op
== gpir_op_store_reg
) {
114 gpir_store_node
*store
= gpir_node_to_store(node
);
115 BITSET_SET(block
->def_out
, store
->reg
->index
);
120 static void calc_liveness(struct regalloc_ctx
*ctx
)
125 list_for_each_entry_rev(gpir_block
, block
, &ctx
->comp
->block_list
, list
) {
126 changed
|= propagate_liveness_block(block
, ctx
);
130 list_for_each_entry(gpir_block
, block
, &ctx
->comp
->block_list
, list
) {
131 calc_def_block(block
);
137 list_for_each_entry(gpir_block
, block
, &ctx
->comp
->block_list
, list
) {
138 for (unsigned i
= 0; i
< 2; i
++) {
139 gpir_block
*succ
= block
->successors
[i
];
143 for (unsigned j
= 0; j
< ctx
->bitset_words
; j
++) {
144 BITSET_WORD
new = block
->def_out
[j
] & ~succ
->def_out
[j
];
145 changed
|= (new != 0);
146 succ
->def_out
[j
] |= block
->def_out
[j
];
153 /* Interference calculation */
155 static void add_interference(struct regalloc_ctx
*ctx
, unsigned i
, unsigned j
)
160 struct reg_info
*a
= &ctx
->registers
[i
];
161 struct reg_info
*b
= &ctx
->registers
[j
];
163 if (BITSET_TEST(a
->conflicts
, j
))
166 BITSET_SET(a
->conflicts
, j
);
167 BITSET_SET(b
->conflicts
, i
);
169 a
->total_conflicts
++;
170 b
->total_conflicts
++;
171 if (j
< ctx
->comp
->cur_reg
)
176 if (i
< ctx
->comp
->cur_reg
)
181 util_dynarray_append(&a
->conflict_list
, unsigned, j
);
182 util_dynarray_append(&b
->conflict_list
, unsigned, i
);
185 /* Make the register or node "i" intefere with all the other live registers
188 static void add_all_interferences(struct regalloc_ctx
*ctx
,
190 BITSET_WORD
*live_nodes
,
191 BITSET_WORD
*live_regs
)
195 BITSET_FOREACH_SET(live_node
, tmp
, live_nodes
, ctx
->comp
->cur_index
) {
196 add_interference(ctx
, i
,
197 live_node
+ ctx
->comp
->cur_reg
);
201 BITSET_FOREACH_SET(live_reg
, tmp
, ctx
->live
, ctx
->comp
->cur_index
) {
202 add_interference(ctx
, i
, live_reg
);
207 static void print_liveness(struct regalloc_ctx
*ctx
,
208 BITSET_WORD
*live_reg
, BITSET_WORD
*live_val
)
210 if (!(lima_debug
& LIMA_DEBUG_GP
))
215 BITSET_FOREACH_SET(live_idx
, tmp
, live_reg
, ctx
->comp
->cur_reg
) {
216 printf("reg%d ", live_idx
);
218 BITSET_FOREACH_SET(live_idx
, tmp
, live_val
, ctx
->comp
->cur_index
) {
219 printf("%d ", live_idx
);
224 static void calc_interference(struct regalloc_ctx
*ctx
)
226 BITSET_WORD
*live_nodes
=
227 rzalloc_array(ctx
->mem_ctx
, BITSET_WORD
, ctx
->comp
->cur_index
);
229 list_for_each_entry(gpir_block
, block
, &ctx
->comp
->block_list
, list
) {
230 /* Initialize liveness at the end of the block, but exclude values that
231 * definitely aren't defined by the end. This helps out with
232 * partially-defined registers, like:
241 * If we naively propagated liveness backwards, foo would be live from
242 * the beginning of the program, but if we're not inside a loop then
243 * its value is undefined before the first if and we don't have to
244 * consider it live. Mask out registers like foo here.
246 for (unsigned i
= 0; i
< ctx
->bitset_words
; i
++) {
247 ctx
->live
[i
] = block
->live_out
[i
] & block
->def_out
[i
];
250 list_for_each_entry_rev(gpir_node
, node
, &block
->node_list
, list
) {
251 gpir_debug("processing node %d\n", node
->index
);
252 print_liveness(ctx
, ctx
->live
, live_nodes
);
253 if (node
->type
!= gpir_node_type_store
&&
254 node
->type
!= gpir_node_type_branch
) {
255 add_all_interferences(ctx
, node
->index
+ ctx
->comp
->cur_reg
,
256 live_nodes
, ctx
->live
);
259 BITSET_CLEAR(live_nodes
, node
->index
);
260 } else if (node
->op
== gpir_op_store_reg
) {
261 gpir_store_node
*store
= gpir_node_to_store(node
);
262 add_all_interferences(ctx
, store
->reg
->index
,
263 live_nodes
, ctx
->live
);
266 BITSET_CLEAR(ctx
->live
, store
->reg
->index
);
270 if (node
->type
== gpir_node_type_store
) {
271 gpir_store_node
*store
= gpir_node_to_store(node
);
272 BITSET_SET(live_nodes
, store
->child
->index
);
273 } else if (node
->type
== gpir_node_type_alu
) {
274 gpir_alu_node
*alu
= gpir_node_to_alu(node
);
275 for (int i
= 0; i
< alu
->num_child
; i
++)
276 BITSET_SET(live_nodes
, alu
->children
[i
]->index
);
277 } else if (node
->type
== gpir_node_type_branch
) {
278 gpir_branch_node
*branch
= gpir_node_to_branch(node
);
279 BITSET_SET(live_nodes
, branch
->cond
->index
);
280 } else if (node
->op
== gpir_op_load_reg
) {
281 gpir_load_node
*load
= gpir_node_to_load(node
);
282 BITSET_SET(ctx
->live
, load
->reg
->index
);
288 /* Register allocation */
290 static bool can_simplify(struct regalloc_ctx
*ctx
, unsigned i
)
292 struct reg_info
*info
= &ctx
->registers
[i
];
293 if (i
< ctx
->comp
->cur_reg
) {
295 return info
->phys_conflicts
+ info
->node_conflicts
< GPIR_PHYSICAL_REG_NUM
;
297 /* Nodes: if we manage to allocate all of its conflicting physical
298 * registers, they will take up at most GPIR_PHYSICAL_REG_NUM colors, so
299 * we can ignore any more than that.
301 return MIN2(info
->phys_conflicts
, GPIR_PHYSICAL_REG_NUM
) +
302 info
->node_conflicts
< GPIR_PHYSICAL_REG_NUM
+ GPIR_VALUE_REG_NUM
;
306 static void push_stack(struct regalloc_ctx
*ctx
, unsigned i
)
308 ctx
->stack
[ctx
->stack_size
++] = i
;
309 if (i
< ctx
->comp
->cur_reg
)
310 gpir_debug("pushing reg%u\n", i
);
312 gpir_debug("pushing %d\n", i
- ctx
->comp
->cur_reg
);
314 struct reg_info
*info
= &ctx
->registers
[i
];
315 assert(info
->visited
);
317 util_dynarray_foreach(&info
->conflict_list
, unsigned, conflict
) {
318 struct reg_info
*conflict_info
= &ctx
->registers
[*conflict
];
319 if (i
< ctx
->comp
->cur_reg
) {
320 assert(conflict_info
->phys_conflicts
> 0);
321 conflict_info
->phys_conflicts
--;
323 assert(conflict_info
->node_conflicts
> 0);
324 conflict_info
->node_conflicts
--;
326 if (!ctx
->registers
[*conflict
].visited
&& can_simplify(ctx
, *conflict
)) {
327 ctx
->worklist
[ctx
->worklist_end
++] = *conflict
;
328 ctx
->registers
[*conflict
].visited
= true;
333 static bool do_regalloc(struct regalloc_ctx
*ctx
)
335 ctx
->worklist_start
= 0;
336 ctx
->worklist_end
= 0;
339 /* Step 1: find the initially simplifiable registers */
340 for (int i
= 0; i
< ctx
->comp
->cur_reg
+ ctx
->comp
->cur_index
; i
++) {
341 if (can_simplify(ctx
, i
)) {
342 ctx
->worklist
[ctx
->worklist_end
++] = i
;
343 ctx
->registers
[i
].visited
= true;
348 /* Step 2: push onto the stack whatever we can */
349 while (ctx
->worklist_start
!= ctx
->worklist_end
) {
350 push_stack(ctx
, ctx
->worklist
[ctx
->worklist_start
++]);
353 if (ctx
->stack_size
< ctx
->num_nodes_and_regs
) {
354 /* If there are still unsimplifiable nodes left, we need to
355 * optimistically push a node onto the stack. Choose the one with
356 * the smallest number of current neighbors, since that's the most
359 unsigned min_conflicts
= UINT_MAX
;
360 unsigned best_reg
= 0;
361 for (unsigned reg
= 0; reg
< ctx
->num_nodes_and_regs
; reg
++) {
362 struct reg_info
*info
= &ctx
->registers
[reg
];
365 if (info
->phys_conflicts
+ info
->node_conflicts
< min_conflicts
) {
367 min_conflicts
= info
->phys_conflicts
+ info
->node_conflicts
;
370 gpir_debug("optimistic triggered\n");
371 ctx
->registers
[best_reg
].visited
= true;
372 push_stack(ctx
, best_reg
);
378 /* Step 4: pop off the stack and assign colors */
379 for (int i
= ctx
->num_nodes_and_regs
- 1; i
>= 0; i
--) {
380 unsigned idx
= ctx
->stack
[i
];
381 struct reg_info
*reg
= &ctx
->registers
[idx
];
383 unsigned num_available_regs
;
384 if (idx
< ctx
->comp
->cur_reg
) {
385 num_available_regs
= GPIR_PHYSICAL_REG_NUM
;
387 num_available_regs
= GPIR_VALUE_REG_NUM
+ GPIR_PHYSICAL_REG_NUM
;
391 unsigned start
= i
% num_available_regs
;
392 for (unsigned j
= 0; j
< num_available_regs
; j
++) {
393 unsigned candidate
= (j
+ start
) % num_available_regs
;
394 bool available
= true;
395 util_dynarray_foreach(®
->conflict_list
, unsigned, conflict_idx
) {
396 struct reg_info
*conflict
= &ctx
->registers
[*conflict_idx
];
397 if (conflict
->assigned_color
>= 0 &&
398 conflict
->assigned_color
== (int) candidate
) {
405 reg
->assigned_color
= candidate
;
413 gpir_error("Failed to allocate registers\n");
421 static void assign_regs(struct regalloc_ctx
*ctx
)
423 list_for_each_entry(gpir_block
, block
, &ctx
->comp
->block_list
, list
) {
424 list_for_each_entry(gpir_node
, node
, &block
->node_list
, list
) {
425 if (node
->index
>= 0) {
427 ctx
->registers
[ctx
->comp
->cur_reg
+ node
->index
].assigned_color
;
430 if (node
->op
== gpir_op_load_reg
) {
431 gpir_load_node
*load
= gpir_node_to_load(node
);
432 unsigned color
= ctx
->registers
[load
->reg
->index
].assigned_color
;
433 load
->index
= color
/ 4;
434 load
->component
= color
% 4;
437 if (node
->op
== gpir_op_store_reg
) {
438 gpir_store_node
*store
= gpir_node_to_store(node
);
439 unsigned color
= ctx
->registers
[store
->reg
->index
].assigned_color
;
440 store
->index
= color
/ 4;
441 store
->component
= color
% 4;
442 node
->value_reg
= color
;
446 block
->live_out_phys
= 0;
450 BITSET_FOREACH_SET(reg_idx
, tmp
, block
->live_out
, ctx
->comp
->cur_reg
) {
451 if (BITSET_TEST(block
->def_out
, reg_idx
)) {
452 block
->live_out_phys
|= (1ull << ctx
->registers
[reg_idx
].assigned_color
);
458 static void regalloc_print_result(gpir_compiler
*comp
)
460 if (!(lima_debug
& LIMA_DEBUG_GP
))
464 printf("======== regalloc ========\n");
465 list_for_each_entry(gpir_block
, block
, &comp
->block_list
, list
) {
466 list_for_each_entry(gpir_node
, node
, &block
->node_list
, list
) {
467 printf("%03d: %d/%d %s ", index
++, node
->index
, node
->value_reg
,
468 gpir_op_infos
[node
->op
].name
);
469 gpir_node_foreach_pred(node
, dep
) {
470 gpir_node
*pred
= dep
->pred
;
471 printf(" %d/%d", pred
->index
, pred
->value_reg
);
473 if (node
->op
== gpir_op_load_reg
) {
474 gpir_load_node
*load
= gpir_node_to_load(node
);
475 printf(" -/%d", 4 * load
->index
+ load
->component
);
476 printf(" (%d)", load
->reg
->index
);
477 } else if (node
->op
== gpir_op_store_reg
) {
478 gpir_store_node
*store
= gpir_node_to_store(node
);
479 printf(" (%d)", store
->reg
->index
);
483 printf("----------------------------\n");
487 bool gpir_regalloc_prog(gpir_compiler
*comp
)
489 struct regalloc_ctx ctx
;
491 ctx
.mem_ctx
= ralloc_context(NULL
);
492 ctx
.num_nodes_and_regs
= comp
->cur_reg
+ comp
->cur_index
;
493 ctx
.bitset_words
= BITSET_WORDS(ctx
.num_nodes_and_regs
);
494 ctx
.live
= ralloc_array(ctx
.mem_ctx
, BITSET_WORD
, ctx
.bitset_words
);
495 ctx
.worklist
= ralloc_array(ctx
.mem_ctx
, unsigned, ctx
.num_nodes_and_regs
);
496 ctx
.stack
= ralloc_array(ctx
.mem_ctx
, unsigned, ctx
.num_nodes_and_regs
);
499 ctx
.registers
= rzalloc_array(ctx
.mem_ctx
, struct reg_info
, ctx
.num_nodes_and_regs
);
500 for (unsigned i
= 0; i
< ctx
.num_nodes_and_regs
; i
++) {
501 ctx
.registers
[i
].conflicts
= rzalloc_array(ctx
.mem_ctx
, BITSET_WORD
,
503 util_dynarray_init(&ctx
.registers
[i
].conflict_list
, ctx
.mem_ctx
);
506 list_for_each_entry(gpir_block
, block
, &comp
->block_list
, list
) {
507 block
->live_out
= rzalloc_array(ctx
.mem_ctx
, BITSET_WORD
, ctx
.bitset_words
);
508 block
->live_in
= rzalloc_array(ctx
.mem_ctx
, BITSET_WORD
, ctx
.bitset_words
);
509 block
->def_out
= rzalloc_array(ctx
.mem_ctx
, BITSET_WORD
, ctx
.bitset_words
);
513 calc_interference(&ctx
);
514 if (!do_regalloc(&ctx
)) {
515 ralloc_free(ctx
.mem_ctx
);
520 regalloc_print_result(comp
);
521 ralloc_free(ctx
.mem_ctx
);