2 * Copyright (c) 2017 Lima Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 #include "util/ralloc.h"
26 #include "util/register_allocate.h"
27 #include "util/u_debug.h"
30 #include "lima_context.h"
32 #define PPIR_FULL_REG_NUM 6
34 #define PPIR_VEC1_REG_NUM (PPIR_FULL_REG_NUM * 4) /* x, y, z, w */
35 #define PPIR_VEC2_REG_NUM (PPIR_FULL_REG_NUM * 3) /* xy, yz, zw */
36 #define PPIR_VEC3_REG_NUM (PPIR_FULL_REG_NUM * 2) /* xyz, yzw */
37 #define PPIR_VEC4_REG_NUM PPIR_FULL_REG_NUM /* xyzw */
38 #define PPIR_HEAD_VEC1_REG_NUM PPIR_FULL_REG_NUM /* x */
39 #define PPIR_HEAD_VEC2_REG_NUM PPIR_FULL_REG_NUM /* xy */
40 #define PPIR_HEAD_VEC3_REG_NUM PPIR_FULL_REG_NUM /* xyz */
41 #define PPIR_HEAD_VEC4_REG_NUM PPIR_FULL_REG_NUM /* xyzw */
43 #define PPIR_VEC1_REG_BASE 0
44 #define PPIR_VEC2_REG_BASE (PPIR_VEC1_REG_BASE + PPIR_VEC1_REG_NUM)
45 #define PPIR_VEC3_REG_BASE (PPIR_VEC2_REG_BASE + PPIR_VEC2_REG_NUM)
46 #define PPIR_VEC4_REG_BASE (PPIR_VEC3_REG_BASE + PPIR_VEC3_REG_NUM)
47 #define PPIR_HEAD_VEC1_REG_BASE (PPIR_VEC4_REG_BASE + PPIR_VEC4_REG_NUM)
48 #define PPIR_HEAD_VEC2_REG_BASE (PPIR_HEAD_VEC1_REG_BASE + PPIR_HEAD_VEC1_REG_NUM)
49 #define PPIR_HEAD_VEC3_REG_BASE (PPIR_HEAD_VEC2_REG_BASE + PPIR_HEAD_VEC2_REG_NUM)
50 #define PPIR_HEAD_VEC4_REG_BASE (PPIR_HEAD_VEC3_REG_BASE + PPIR_HEAD_VEC3_REG_NUM)
51 #define PPIR_REG_COUNT (PPIR_HEAD_VEC4_REG_BASE + PPIR_HEAD_VEC4_REG_NUM)
53 enum ppir_ra_reg_class
{
54 ppir_ra_reg_class_vec1
,
55 ppir_ra_reg_class_vec2
,
56 ppir_ra_reg_class_vec3
,
57 ppir_ra_reg_class_vec4
,
59 /* 4 reg class for load/store instr regs:
60 * load/store instr has no swizzle field, so the (virtual) register
61 * must be allocated at the beginning of a (physical) register,
63 ppir_ra_reg_class_head_vec1
,
64 ppir_ra_reg_class_head_vec2
,
65 ppir_ra_reg_class_head_vec3
,
66 ppir_ra_reg_class_head_vec4
,
68 ppir_ra_reg_class_num
,
71 static const int ppir_ra_reg_base
[ppir_ra_reg_class_num
+ 1] = {
72 [ppir_ra_reg_class_vec1
] = PPIR_VEC1_REG_BASE
,
73 [ppir_ra_reg_class_vec2
] = PPIR_VEC2_REG_BASE
,
74 [ppir_ra_reg_class_vec3
] = PPIR_VEC3_REG_BASE
,
75 [ppir_ra_reg_class_vec4
] = PPIR_VEC4_REG_BASE
,
76 [ppir_ra_reg_class_head_vec1
] = PPIR_HEAD_VEC1_REG_BASE
,
77 [ppir_ra_reg_class_head_vec2
] = PPIR_HEAD_VEC2_REG_BASE
,
78 [ppir_ra_reg_class_head_vec3
] = PPIR_HEAD_VEC3_REG_BASE
,
79 [ppir_ra_reg_class_head_vec4
] = PPIR_HEAD_VEC4_REG_BASE
,
80 [ppir_ra_reg_class_num
] = PPIR_REG_COUNT
,
84 ppir_ra_reg_q_values
[ppir_ra_reg_class_num
] = {
85 (unsigned int []) {1, 2, 3, 4, 1, 2, 3, 4},
86 (unsigned int []) {2, 3, 3, 3, 1, 2, 3, 3},
87 (unsigned int []) {2, 2, 2, 2, 1, 2, 2, 2},
88 (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
89 (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
90 (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
91 (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
92 (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
95 struct ra_regs
*ppir_regalloc_init(void *mem_ctx
)
97 struct ra_regs
*ret
= ra_alloc_reg_set(mem_ctx
, PPIR_REG_COUNT
, false);
101 /* (x, y, z, w) (xy, yz, zw) (xyz, yzw) (xyzw) (x) (xy) (xyz) (xyzw) */
102 static const int class_reg_num
[ppir_ra_reg_class_num
] = {
103 4, 3, 2, 1, 1, 1, 1, 1,
105 /* base reg (x, y, z, w) confliction with other regs */
106 for (int h
= 0; h
< 4; h
++) {
107 int base_reg_mask
= 1 << h
;
108 for (int i
= 1; i
< ppir_ra_reg_class_num
; i
++) {
109 int class_reg_base_mask
= (1 << ((i
% 4) + 1)) - 1;
110 for (int j
= 0; j
< class_reg_num
[i
]; j
++) {
111 if (base_reg_mask
& (class_reg_base_mask
<< j
)) {
112 for (int k
= 0; k
< PPIR_FULL_REG_NUM
; k
++) {
113 ra_add_reg_conflict(ret
, k
* 4 + h
,
114 ppir_ra_reg_base
[i
] + k
* class_reg_num
[i
] + j
);
120 /* build all other confliction by the base reg confliction */
121 for (int i
= 0; i
< PPIR_VEC1_REG_NUM
; i
++)
122 ra_make_reg_conflicts_transitive(ret
, i
);
124 for (int i
= 0; i
< ppir_ra_reg_class_num
; i
++)
125 ra_alloc_reg_class(ret
);
128 for (int i
= 0; i
< ppir_ra_reg_class_num
; i
++) {
129 while (reg_index
< ppir_ra_reg_base
[i
+ 1])
130 ra_class_add_reg(ret
, i
, reg_index
++);
133 ra_set_finalize(ret
, ppir_ra_reg_q_values
);
137 static void ppir_regalloc_update_reglist_ssa(ppir_compiler
*comp
)
139 list_for_each_entry(ppir_block
, block
, &comp
->block_list
, list
) {
140 list_for_each_entry(ppir_node
, node
, &block
->node_list
, list
) {
141 if (node
->op
== ppir_op_store_color
)
144 if (!node
->instr
|| node
->op
== ppir_op_const
)
147 ppir_dest
*dest
= ppir_node_get_dest(node
);
149 ppir_reg
*reg
= NULL
;
151 if (dest
->type
== ppir_target_ssa
) {
153 list_addtail(®
->list
, &comp
->reg_list
);
160 static int get_phy_reg_index(int reg
)
164 for (i
= 0; i
< ppir_ra_reg_class_num
; i
++) {
165 if (reg
< ppir_ra_reg_base
[i
+ 1]) {
166 reg
-= ppir_ra_reg_base
[i
];
171 if (i
< ppir_ra_reg_class_head_vec1
)
172 return reg
/ (4 - i
) * 4 + reg
% (4 - i
);
177 static void ppir_regalloc_print_result(ppir_compiler
*comp
)
179 printf("======ppir regalloc result======\n");
180 list_for_each_entry(ppir_block
, block
, &comp
->block_list
, list
) {
181 list_for_each_entry(ppir_instr
, instr
, &block
->instr_list
, list
) {
182 printf("%03d:", instr
->index
);
183 for (int i
= 0; i
< PPIR_INSTR_SLOT_NUM
; i
++) {
184 ppir_node
*node
= instr
->slots
[i
];
188 printf(" (%d|", node
->index
);
190 ppir_dest
*dest
= ppir_node_get_dest(node
);
192 printf("%d", ppir_target_get_dest_reg_index(dest
));
196 for (int i
= 0; i
< ppir_node_get_src_num(node
); i
++) {
199 printf("%d", ppir_target_get_src_reg_index(ppir_node_get_src(node
, i
)));
207 printf("--------------------------\n");
210 static bool create_new_instr_after(ppir_block
*block
, ppir_instr
*ref
,
213 ppir_instr
*newinstr
= ppir_instr_create(block
);
214 if (unlikely(!newinstr
))
217 list_del(&newinstr
->list
);
218 list_add(&newinstr
->list
, &ref
->list
);
220 if (!ppir_instr_insert_node(newinstr
, node
))
223 list_for_each_entry_from(ppir_instr
, instr
, ref
, &block
->instr_list
, list
) {
226 newinstr
->seq
= ref
->seq
+1;
227 newinstr
->scheduled
= true;
231 static bool create_new_instr_before(ppir_block
*block
, ppir_instr
*ref
,
234 ppir_instr
*newinstr
= ppir_instr_create(block
);
235 if (unlikely(!newinstr
))
238 list_del(&newinstr
->list
);
239 list_addtail(&newinstr
->list
, &ref
->list
);
241 if (!ppir_instr_insert_node(newinstr
, node
))
244 list_for_each_entry_from(ppir_instr
, instr
, ref
, &block
->instr_list
, list
) {
247 newinstr
->seq
= ref
->seq
-1;
248 newinstr
->scheduled
= true;
252 static ppir_alu_node
* ppir_update_spilled_src(ppir_compiler
*comp
,
254 ppir_node
*node
, ppir_src
*src
,
255 ppir_alu_node
*move_alu
)
257 /* alu nodes may have multiple references to the same value.
258 * try to avoid unnecessary loads for the same alu node by
259 * saving the node resulting from the temporary load */
263 /* alloc new node to load value */
264 ppir_node
*load_node
= ppir_node_create(block
, ppir_op_load_temp
, -1, 0);
267 list_addtail(&load_node
->list
, &node
->list
);
270 ppir_load_node
*load
= ppir_node_to_load(load_node
);
272 load
->index
= -comp
->prog
->stack_size
; /* index sizes are negative */
273 load
->num_components
= 4;
275 ppir_dest
*ld_dest
= &load
->dest
;
276 ld_dest
->type
= ppir_target_pipeline
;
277 ld_dest
->pipeline
= ppir_pipeline_reg_uniform
;
278 ld_dest
->write_mask
= 0xf;
280 create_new_instr_before(block
, node
->instr
, load_node
);
282 /* Create move node */
283 ppir_node
*move_node
= ppir_node_create(block
, ppir_op_mov
, -1 , 0);
284 if (unlikely(!move_node
))
286 list_addtail(&move_node
->list
, &node
->list
);
288 move_alu
= ppir_node_to_alu(move_node
);
290 move_alu
->num_src
= 1;
291 move_alu
->src
->type
= ppir_target_pipeline
;
292 move_alu
->src
->pipeline
= ppir_pipeline_reg_uniform
;
293 for (int i
= 0; i
< 4; i
++)
294 move_alu
->src
->swizzle
[i
] = i
;
296 ppir_dest
*alu_dest
= &move_alu
->dest
;
297 alu_dest
->type
= ppir_target_ssa
;
298 alu_dest
->ssa
.num_components
= 4;
299 alu_dest
->ssa
.live_in
= INT_MAX
;
300 alu_dest
->ssa
.live_out
= 0;
301 alu_dest
->write_mask
= 0xf;
303 list_addtail(&alu_dest
->ssa
.list
, &comp
->reg_list
);
305 if (!ppir_instr_insert_node(load_node
->instr
, move_node
))
308 /* insert the new node as predecessor */
309 ppir_node_foreach_pred_safe(node
, dep
) {
310 ppir_node
*pred
= dep
->pred
;
311 ppir_node_remove_dep(dep
);
312 ppir_node_add_dep(load_node
, pred
);
314 ppir_node_add_dep(node
, move_node
);
315 ppir_node_add_dep(move_node
, load_node
);
318 /* switch node src to use the new ssa instead */
319 src
->type
= ppir_target_ssa
;
320 src
->ssa
= &move_alu
->dest
.ssa
;
325 static ppir_reg
*create_reg(ppir_compiler
*comp
, int num_components
)
327 ppir_reg
*r
= rzalloc(comp
, ppir_reg
);
331 r
->num_components
= num_components
;
332 r
->live_in
= INT_MAX
;
335 list_addtail(&r
->list
, &comp
->reg_list
);
340 static bool ppir_update_spilled_dest(ppir_compiler
*comp
, ppir_block
*block
,
341 ppir_node
*node
, ppir_dest
*dest
)
343 assert(dest
!= NULL
);
344 ppir_reg
*reg
= NULL
;
345 if (dest
->type
== ppir_target_register
) {
347 reg
->num_components
= 4;
351 reg
= create_reg(comp
, 4);
353 list_del(&dest
->ssa
.list
);
356 /* alloc new node to load value */
357 ppir_node
*load_node
= ppir_node_create(block
, ppir_op_load_temp
, -1, 0);
360 list_addtail(&load_node
->list
, &node
->list
);
363 ppir_load_node
*load
= ppir_node_to_load(load_node
);
365 load
->index
= -comp
->prog
->stack_size
; /* index sizes are negative */
366 load
->num_components
= 4;
368 load
->dest
.type
= ppir_target_pipeline
;
369 load
->dest
.pipeline
= ppir_pipeline_reg_uniform
;
370 load
->dest
.write_mask
= 0xf;
372 create_new_instr_before(block
, node
->instr
, load_node
);
374 /* Create move node */
375 ppir_node
*move_node
= ppir_node_create(block
, ppir_op_mov
, -1 , 0);
376 if (unlikely(!move_node
))
378 list_addtail(&move_node
->list
, &node
->list
);
380 ppir_alu_node
*move_alu
= ppir_node_to_alu(move_node
);
382 move_alu
->num_src
= 1;
383 move_alu
->src
->type
= ppir_target_pipeline
;
384 move_alu
->src
->pipeline
= ppir_pipeline_reg_uniform
;
385 for (int i
= 0; i
< 4; i
++)
386 move_alu
->src
->swizzle
[i
] = i
;
388 move_alu
->dest
.type
= ppir_target_register
;
389 move_alu
->dest
.reg
= reg
;
390 move_alu
->dest
.write_mask
= 0x0f;
392 if (!ppir_instr_insert_node(load_node
->instr
, move_node
))
395 ppir_node_foreach_pred_safe(node
, dep
) {
396 ppir_node
*pred
= dep
->pred
;
397 ppir_node_remove_dep(dep
);
398 ppir_node_add_dep(load_node
, pred
);
400 ppir_node_add_dep(node
, move_node
);
401 ppir_node_add_dep(move_node
, load_node
);
403 dest
->type
= ppir_target_register
;
406 /* alloc new node to store value */
407 ppir_node
*store_node
= ppir_node_create(block
, ppir_op_store_temp
, -1, 0);
410 list_addtail(&store_node
->list
, &node
->list
);
413 ppir_store_node
*store
= ppir_node_to_store(store_node
);
415 store
->index
= -comp
->prog
->stack_size
; /* index sizes are negative */
416 store
->num_components
= 4;
418 store
->src
.type
= ppir_target_register
;
419 store
->src
.reg
= dest
->reg
;
421 /* insert the new node as successor */
422 ppir_node_foreach_succ_safe(node
, dep
) {
423 ppir_node
*succ
= dep
->succ
;
424 ppir_node_remove_dep(dep
);
425 ppir_node_add_dep(succ
, store_node
);
427 ppir_node_add_dep(store_node
, node
);
429 create_new_instr_after(block
, node
->instr
, store_node
);
434 static bool ppir_regalloc_spill_reg(ppir_compiler
*comp
, ppir_reg
*chosen
)
436 list_for_each_entry(ppir_block
, block
, &comp
->block_list
, list
) {
437 list_for_each_entry(ppir_node
, node
, &block
->node_list
, list
) {
439 ppir_dest
*dest
= ppir_node_get_dest(node
);
440 ppir_reg
*reg
= NULL
;
442 reg
= ppir_dest_get_reg(dest
);
444 ppir_update_spilled_dest(comp
, block
, node
, dest
);
447 switch (node
->type
) {
448 case ppir_node_type_alu
:
450 /* alu nodes may have multiple references to the same value.
451 * try to avoid unnecessary loads for the same alu node by
452 * saving the node resulting from the temporary load */
453 ppir_alu_node
*move_alu
= NULL
;
454 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
455 for (int i
= 0; i
< alu
->num_src
; i
++) {
456 reg
= ppir_src_get_reg(alu
->src
+ i
);
458 move_alu
= ppir_update_spilled_src(comp
, block
, node
,
459 alu
->src
+ i
, move_alu
);
466 for (int i
= 0; i
< ppir_node_get_src_num(node
); i
++) {
467 ppir_src
*src
= ppir_node_get_src(node
, i
);
468 reg
= ppir_src_get_reg(src
);
470 ppir_update_spilled_src(comp
, block
, node
, src
, NULL
);
482 static ppir_reg
*ppir_regalloc_choose_spill_node(ppir_compiler
*comp
,
486 ppir_reg
*chosen
= NULL
;
488 list_for_each_entry(ppir_reg
, reg
, &comp
->reg_list
, list
) {
489 if (reg
->spilled
|| reg
->live_out
== INT_MAX
) {
490 /* not considered for spilling */
491 ra_set_node_spill_cost(g
, i
++, 0.0f
);
495 /* It is beneficial to spill registers with higher component number,
496 * so increase the cost of spilling registers with few components */
497 float spill_cost
= 4.0f
/ (float)reg
->num_components
;
498 ra_set_node_spill_cost(g
, i
++, spill_cost
);
501 int r
= ra_get_best_spill_node(g
);
506 list_for_each_entry(ppir_reg
, reg
, &comp
->reg_list
, list
) {
513 chosen
->spilled
= true;
518 static void ppir_regalloc_reset_liveness_info(ppir_compiler
*comp
)
520 int bitset_words
= BITSET_WORDS(list_length(&comp
->reg_list
));
523 list_for_each_entry(ppir_reg
, reg
, &comp
->reg_list
, list
) {
524 reg
->live_in
= INT_MAX
;
526 reg
->regalloc_index
= idx
++;
529 list_for_each_entry(ppir_block
, block
, &comp
->block_list
, list
) {
531 ralloc_free(block
->def
);
532 block
->def
= rzalloc_array(comp
, BITSET_WORD
, bitset_words
);
535 ralloc_free(block
->use
);
536 block
->use
= rzalloc_array(comp
, BITSET_WORD
, bitset_words
);
539 ralloc_free(block
->live_in
);
540 block
->live_in
= rzalloc_array(comp
, BITSET_WORD
, bitset_words
);
543 ralloc_free(block
->live_out
);
544 block
->live_out
= rzalloc_array(comp
, BITSET_WORD
, bitset_words
);
548 int lima_ppir_force_spilling
= 0;
550 static bool ppir_regalloc_prog_try(ppir_compiler
*comp
, bool *spilled
)
552 ppir_regalloc_reset_liveness_info(comp
);
554 ppir_liveness_analysis(comp
);
556 struct ra_graph
*g
= ra_alloc_interference_graph(
557 comp
->ra
, list_length(&comp
->reg_list
));
560 list_for_each_entry(ppir_reg
, reg
, &comp
->reg_list
, list
) {
561 int c
= ppir_ra_reg_class_vec1
+ (reg
->num_components
- 1);
564 ra_set_node_class(g
, n
++, c
);
568 list_for_each_entry(ppir_reg
, reg1
, &comp
->reg_list
, list
) {
570 list_for_each_entry_from(ppir_reg
, reg2
, reg1
->list
.next
,
571 &comp
->reg_list
, list
) {
572 bool interference
= false;
573 if (reg1
->live_in
< reg2
->live_in
) {
574 if (reg1
->live_out
> reg2
->live_in
)
577 else if (reg1
->live_in
> reg2
->live_in
) {
578 if (reg2
->live_out
> reg1
->live_in
)
585 ra_add_node_interference(g
, n1
, n2
);
593 bool ok
= ra_allocate(g
);
594 if (!ok
|| (comp
->force_spilling
-- > 0)) {
595 ppir_reg
*chosen
= ppir_regalloc_choose_spill_node(comp
, g
);
597 /* stack_size will be used to assemble the frame reg in lima_draw.
598 * It is also be used in the spilling code, as negative indices
599 * starting from -1, to create stack addresses. */
600 comp
->prog
->stack_size
++;
601 ppir_regalloc_spill_reg(comp
, chosen
);
602 /* Ask the outer loop to call back in. */
605 ppir_debug("spilled register\n");
609 ppir_error("regalloc fail\n");
614 list_for_each_entry(ppir_reg
, reg
, &comp
->reg_list
, list
) {
615 int reg_index
= ra_get_node_reg(g
, n
++);
616 reg
->index
= get_phy_reg_index(reg_index
);
621 if (lima_debug
& LIMA_DEBUG_PP
)
622 ppir_regalloc_print_result(comp
);
631 bool ppir_regalloc_prog(ppir_compiler
*comp
)
633 bool spilled
= false;
634 comp
->prog
->stack_size
= 0;
636 /* Set from an environment variable to force spilling
637 * for debugging purposes, see lima_screen.c */
638 comp
->force_spilling
= lima_ppir_force_spilling
;
640 ppir_regalloc_update_reglist_ssa(comp
);
642 /* No registers? Probably shader consists of discard instruction */
643 if (list_empty(&comp
->reg_list
))
646 /* this will most likely succeed in the first
647 * try, except for very complicated shaders */
648 while (!ppir_regalloc_prog_try(comp
, &spilled
))