2 * Copyright (c) 2017 Lima Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 #include "util/ralloc.h"
26 #include "util/register_allocate.h"
27 #include "util/u_debug.h"
30 #include "lima_context.h"
32 #define PPIR_FULL_REG_NUM 6
34 #define PPIR_VEC1_REG_NUM (PPIR_FULL_REG_NUM * 4) /* x, y, z, w */
35 #define PPIR_VEC2_REG_NUM (PPIR_FULL_REG_NUM * 3) /* xy, yz, zw */
36 #define PPIR_VEC3_REG_NUM (PPIR_FULL_REG_NUM * 2) /* xyz, yzw */
37 #define PPIR_VEC4_REG_NUM PPIR_FULL_REG_NUM /* xyzw */
38 #define PPIR_HEAD_VEC1_REG_NUM PPIR_FULL_REG_NUM /* x */
39 #define PPIR_HEAD_VEC2_REG_NUM PPIR_FULL_REG_NUM /* xy */
40 #define PPIR_HEAD_VEC3_REG_NUM PPIR_FULL_REG_NUM /* xyz */
41 #define PPIR_HEAD_VEC4_REG_NUM PPIR_FULL_REG_NUM /* xyzw */
43 #define PPIR_VEC1_REG_BASE 0
44 #define PPIR_VEC2_REG_BASE (PPIR_VEC1_REG_BASE + PPIR_VEC1_REG_NUM)
45 #define PPIR_VEC3_REG_BASE (PPIR_VEC2_REG_BASE + PPIR_VEC2_REG_NUM)
46 #define PPIR_VEC4_REG_BASE (PPIR_VEC3_REG_BASE + PPIR_VEC3_REG_NUM)
47 #define PPIR_HEAD_VEC1_REG_BASE (PPIR_VEC4_REG_BASE + PPIR_VEC4_REG_NUM)
48 #define PPIR_HEAD_VEC2_REG_BASE (PPIR_HEAD_VEC1_REG_BASE + PPIR_HEAD_VEC1_REG_NUM)
49 #define PPIR_HEAD_VEC3_REG_BASE (PPIR_HEAD_VEC2_REG_BASE + PPIR_HEAD_VEC2_REG_NUM)
50 #define PPIR_HEAD_VEC4_REG_BASE (PPIR_HEAD_VEC3_REG_BASE + PPIR_HEAD_VEC3_REG_NUM)
51 #define PPIR_REG_COUNT (PPIR_HEAD_VEC4_REG_BASE + PPIR_HEAD_VEC4_REG_NUM)
53 enum ppir_ra_reg_class
{
54 ppir_ra_reg_class_vec1
,
55 ppir_ra_reg_class_vec2
,
56 ppir_ra_reg_class_vec3
,
57 ppir_ra_reg_class_vec4
,
59 /* 4 reg class for load/store instr regs:
60 * load/store instr has no swizzle field, so the (virtual) register
61 * must be allocated at the beginning of a (physical) register,
63 ppir_ra_reg_class_head_vec1
,
64 ppir_ra_reg_class_head_vec2
,
65 ppir_ra_reg_class_head_vec3
,
66 ppir_ra_reg_class_head_vec4
,
68 ppir_ra_reg_class_num
,
71 static const int ppir_ra_reg_base
[ppir_ra_reg_class_num
+ 1] = {
72 [ppir_ra_reg_class_vec1
] = PPIR_VEC1_REG_BASE
,
73 [ppir_ra_reg_class_vec2
] = PPIR_VEC2_REG_BASE
,
74 [ppir_ra_reg_class_vec3
] = PPIR_VEC3_REG_BASE
,
75 [ppir_ra_reg_class_vec4
] = PPIR_VEC4_REG_BASE
,
76 [ppir_ra_reg_class_head_vec1
] = PPIR_HEAD_VEC1_REG_BASE
,
77 [ppir_ra_reg_class_head_vec2
] = PPIR_HEAD_VEC2_REG_BASE
,
78 [ppir_ra_reg_class_head_vec3
] = PPIR_HEAD_VEC3_REG_BASE
,
79 [ppir_ra_reg_class_head_vec4
] = PPIR_HEAD_VEC4_REG_BASE
,
80 [ppir_ra_reg_class_num
] = PPIR_REG_COUNT
,
84 ppir_ra_reg_q_values
[ppir_ra_reg_class_num
] = {
85 (unsigned int []) {1, 2, 3, 4, 1, 2, 3, 4},
86 (unsigned int []) {2, 3, 3, 3, 1, 2, 3, 3},
87 (unsigned int []) {2, 2, 2, 2, 1, 2, 2, 2},
88 (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
89 (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
90 (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
91 (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
92 (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
95 struct ra_regs
*ppir_regalloc_init(void *mem_ctx
)
97 struct ra_regs
*ret
= ra_alloc_reg_set(mem_ctx
, PPIR_REG_COUNT
, false);
101 /* (x, y, z, w) (xy, yz, zw) (xyz, yzw) (xyzw) (x) (xy) (xyz) (xyzw) */
102 static const int class_reg_num
[ppir_ra_reg_class_num
] = {
103 4, 3, 2, 1, 1, 1, 1, 1,
105 /* base reg (x, y, z, w) confliction with other regs */
106 for (int h
= 0; h
< 4; h
++) {
107 int base_reg_mask
= 1 << h
;
108 for (int i
= 1; i
< ppir_ra_reg_class_num
; i
++) {
109 int class_reg_base_mask
= (1 << ((i
% 4) + 1)) - 1;
110 for (int j
= 0; j
< class_reg_num
[i
]; j
++) {
111 if (base_reg_mask
& (class_reg_base_mask
<< j
)) {
112 for (int k
= 0; k
< PPIR_FULL_REG_NUM
; k
++) {
113 ra_add_reg_conflict(ret
, k
* 4 + h
,
114 ppir_ra_reg_base
[i
] + k
* class_reg_num
[i
] + j
);
120 /* build all other confliction by the base reg confliction */
121 for (int i
= 0; i
< PPIR_VEC1_REG_NUM
; i
++)
122 ra_make_reg_conflicts_transitive(ret
, i
);
124 for (int i
= 0; i
< ppir_ra_reg_class_num
; i
++)
125 ra_alloc_reg_class(ret
);
128 for (int i
= 0; i
< ppir_ra_reg_class_num
; i
++) {
129 while (reg_index
< ppir_ra_reg_base
[i
+ 1])
130 ra_class_add_reg(ret
, i
, reg_index
++);
133 ra_set_finalize(ret
, ppir_ra_reg_q_values
);
137 static ppir_reg
*get_src_reg(ppir_src
*src
)
140 case ppir_target_ssa
:
142 case ppir_target_register
:
149 static void ppir_regalloc_update_reglist_ssa(ppir_compiler
*comp
)
151 list_for_each_entry(ppir_block
, block
, &comp
->block_list
, list
) {
152 list_for_each_entry(ppir_node
, node
, &block
->node_list
, list
) {
153 if (node
->op
== ppir_op_store_color
)
156 if (!node
->instr
|| node
->op
== ppir_op_const
)
159 ppir_dest
*dest
= ppir_node_get_dest(node
);
161 ppir_reg
*reg
= NULL
;
163 if (dest
->type
== ppir_target_ssa
) {
165 list_addtail(®
->list
, &comp
->reg_list
);
172 static ppir_reg
*ppir_regalloc_build_liveness_info(ppir_compiler
*comp
)
174 ppir_reg
*ret
= NULL
;
176 list_for_each_entry(ppir_block
, block
, &comp
->block_list
, list
) {
177 list_for_each_entry(ppir_node
, node
, &block
->node_list
, list
) {
178 if (node
->op
== ppir_op_store_color
) {
179 ppir_store_node
*store
= ppir_node_to_store(node
);
180 if (store
->src
.type
== ppir_target_ssa
)
181 ret
= store
->src
.ssa
;
183 ret
= store
->src
.reg
;
184 ret
->live_out
= INT_MAX
;
188 if (!node
->instr
|| node
->op
== ppir_op_const
)
191 /* update reg live_in from node dest (write) */
192 ppir_dest
*dest
= ppir_node_get_dest(node
);
194 ppir_reg
*reg
= NULL
;
196 if (dest
->type
== ppir_target_ssa
) {
199 else if (dest
->type
== ppir_target_register
)
202 if (reg
&& node
->instr
->seq
< reg
->live_in
)
203 reg
->live_in
= node
->instr
->seq
;
206 /* update reg live_out from node src (read) */
207 for (int i
= 0; i
< ppir_node_get_src_num(node
); i
++)
209 ppir_reg
*reg
= get_src_reg(ppir_node_get_src(node
, i
));
210 if (reg
&& node
->instr
->seq
> reg
->live_out
)
211 reg
->live_out
= node
->instr
->seq
;
219 static int get_phy_reg_index(int reg
)
223 for (i
= 0; i
< ppir_ra_reg_class_num
; i
++) {
224 if (reg
< ppir_ra_reg_base
[i
+ 1]) {
225 reg
-= ppir_ra_reg_base
[i
];
230 if (i
< ppir_ra_reg_class_head_vec1
)
231 return reg
/ (4 - i
) * 4 + reg
% (4 - i
);
236 static void ppir_regalloc_print_result(ppir_compiler
*comp
)
238 printf("======ppir regalloc result======\n");
239 list_for_each_entry(ppir_block
, block
, &comp
->block_list
, list
) {
240 list_for_each_entry(ppir_instr
, instr
, &block
->instr_list
, list
) {
241 printf("%03d:", instr
->index
);
242 for (int i
= 0; i
< PPIR_INSTR_SLOT_NUM
; i
++) {
243 ppir_node
*node
= instr
->slots
[i
];
247 printf(" (%d|", node
->index
);
249 ppir_dest
*dest
= ppir_node_get_dest(node
);
251 printf("%d", ppir_target_get_dest_reg_index(dest
));
255 for (int i
= 0; i
< ppir_node_get_src_num(node
); i
++) {
258 printf("%d", ppir_target_get_src_reg_index(ppir_node_get_src(node
, i
)));
266 printf("--------------------------\n");
269 static bool create_new_instr_after(ppir_block
*block
, ppir_instr
*ref
,
272 ppir_instr
*newinstr
= ppir_instr_create(block
);
273 if (unlikely(!newinstr
))
276 list_del(&newinstr
->list
);
277 list_add(&newinstr
->list
, &ref
->list
);
279 if (!ppir_instr_insert_node(newinstr
, node
))
282 list_for_each_entry_from(ppir_instr
, instr
, ref
, &block
->instr_list
, list
) {
285 newinstr
->seq
= ref
->seq
+1;
286 newinstr
->scheduled
= true;
290 static bool create_new_instr_before(ppir_block
*block
, ppir_instr
*ref
,
293 ppir_instr
*newinstr
= ppir_instr_create(block
);
294 if (unlikely(!newinstr
))
297 list_del(&newinstr
->list
);
298 list_addtail(&newinstr
->list
, &ref
->list
);
300 if (!ppir_instr_insert_node(newinstr
, node
))
303 list_for_each_entry_from(ppir_instr
, instr
, ref
, &block
->instr_list
, list
) {
306 newinstr
->seq
= ref
->seq
-1;
307 newinstr
->scheduled
= true;
311 static ppir_alu_node
* ppir_update_spilled_src(ppir_compiler
*comp
,
313 ppir_node
*node
, ppir_src
*src
,
314 ppir_alu_node
*move_alu
)
316 /* alu nodes may have multiple references to the same value.
317 * try to avoid unnecessary loads for the same alu node by
318 * saving the node resulting from the temporary load */
322 /* alloc new node to load value */
323 ppir_node
*load_node
= ppir_node_create(block
, ppir_op_load_temp
, -1, 0);
326 list_addtail(&load_node
->list
, &node
->list
);
329 ppir_load_node
*load
= ppir_node_to_load(load_node
);
331 load
->index
= -comp
->prog
->stack_size
; /* index sizes are negative */
332 load
->num_components
= 4;
334 ppir_dest
*ld_dest
= &load
->dest
;
335 ld_dest
->type
= ppir_target_pipeline
;
336 ld_dest
->pipeline
= ppir_pipeline_reg_uniform
;
337 ld_dest
->write_mask
= 0xf;
339 create_new_instr_before(block
, node
->instr
, load_node
);
341 /* Create move node */
342 ppir_node
*move_node
= ppir_node_create(block
, ppir_op_mov
, -1 , 0);
343 if (unlikely(!move_node
))
345 list_addtail(&move_node
->list
, &node
->list
);
347 move_alu
= ppir_node_to_alu(move_node
);
349 move_alu
->num_src
= 1;
350 move_alu
->src
->type
= ppir_target_pipeline
;
351 move_alu
->src
->pipeline
= ppir_pipeline_reg_uniform
;
352 for (int i
= 0; i
< 4; i
++)
353 move_alu
->src
->swizzle
[i
] = i
;
355 ppir_dest
*alu_dest
= &move_alu
->dest
;
356 alu_dest
->type
= ppir_target_ssa
;
357 alu_dest
->ssa
.num_components
= 4;
358 alu_dest
->ssa
.live_in
= INT_MAX
;
359 alu_dest
->ssa
.live_out
= 0;
360 alu_dest
->write_mask
= 0xf;
362 list_addtail(&alu_dest
->ssa
.list
, &comp
->reg_list
);
364 if (!ppir_instr_insert_node(load_node
->instr
, move_node
))
367 /* insert the new node as predecessor */
368 ppir_node_foreach_pred_safe(node
, dep
) {
369 ppir_node
*pred
= dep
->pred
;
370 ppir_node_remove_dep(dep
);
371 ppir_node_add_dep(load_node
, pred
);
373 ppir_node_add_dep(node
, move_node
);
374 ppir_node_add_dep(move_node
, load_node
);
377 /* switch node src to use the new ssa instead */
378 src
->type
= ppir_target_ssa
;
379 src
->ssa
= &move_alu
->dest
.ssa
;
384 static ppir_reg
*create_reg(ppir_compiler
*comp
, int num_components
)
386 ppir_reg
*r
= rzalloc(comp
, ppir_reg
);
390 r
->num_components
= num_components
;
391 r
->live_in
= INT_MAX
;
394 list_addtail(&r
->list
, &comp
->reg_list
);
399 static bool ppir_update_spilled_dest(ppir_compiler
*comp
, ppir_block
*block
,
400 ppir_node
*node
, ppir_dest
*dest
)
402 assert(dest
!= NULL
);
403 ppir_reg
*reg
= NULL
;
404 if (dest
->type
== ppir_target_register
) {
406 reg
->num_components
= 4;
410 reg
= create_reg(comp
, 4);
412 list_del(&dest
->ssa
.list
);
415 /* alloc new node to load value */
416 ppir_node
*load_node
= ppir_node_create(block
, ppir_op_load_temp
, -1, 0);
419 list_addtail(&load_node
->list
, &node
->list
);
422 ppir_load_node
*load
= ppir_node_to_load(load_node
);
424 load
->index
= -comp
->prog
->stack_size
; /* index sizes are negative */
425 load
->num_components
= 4;
427 load
->dest
.type
= ppir_target_pipeline
;
428 load
->dest
.pipeline
= ppir_pipeline_reg_uniform
;
429 load
->dest
.write_mask
= 0xf;
431 create_new_instr_before(block
, node
->instr
, load_node
);
433 /* Create move node */
434 ppir_node
*move_node
= ppir_node_create(block
, ppir_op_mov
, -1 , 0);
435 if (unlikely(!move_node
))
437 list_addtail(&move_node
->list
, &node
->list
);
439 ppir_alu_node
*move_alu
= ppir_node_to_alu(move_node
);
441 move_alu
->num_src
= 1;
442 move_alu
->src
->type
= ppir_target_pipeline
;
443 move_alu
->src
->pipeline
= ppir_pipeline_reg_uniform
;
444 for (int i
= 0; i
< 4; i
++)
445 move_alu
->src
->swizzle
[i
] = i
;
447 move_alu
->dest
.type
= ppir_target_register
;
448 move_alu
->dest
.reg
= reg
;
449 move_alu
->dest
.write_mask
= 0x0f;
451 if (!ppir_instr_insert_node(load_node
->instr
, move_node
))
454 ppir_node_foreach_pred_safe(node
, dep
) {
455 ppir_node
*pred
= dep
->pred
;
456 ppir_node_remove_dep(dep
);
457 ppir_node_add_dep(load_node
, pred
);
459 ppir_node_add_dep(node
, move_node
);
460 ppir_node_add_dep(move_node
, load_node
);
462 dest
->type
= ppir_target_register
;
465 /* alloc new node to store value */
466 ppir_node
*store_node
= ppir_node_create(block
, ppir_op_store_temp
, -1, 0);
469 list_addtail(&store_node
->list
, &node
->list
);
472 ppir_store_node
*store
= ppir_node_to_store(store_node
);
474 store
->index
= -comp
->prog
->stack_size
; /* index sizes are negative */
475 store
->num_components
= 4;
477 store
->src
.type
= ppir_target_register
;
478 store
->src
.reg
= dest
->reg
;
480 /* insert the new node as successor */
481 ppir_node_foreach_succ_safe(node
, dep
) {
482 ppir_node
*succ
= dep
->succ
;
483 ppir_node_remove_dep(dep
);
484 ppir_node_add_dep(succ
, store_node
);
486 ppir_node_add_dep(store_node
, node
);
488 create_new_instr_after(block
, node
->instr
, store_node
);
493 static bool ppir_regalloc_spill_reg(ppir_compiler
*comp
, ppir_reg
*chosen
)
495 list_for_each_entry(ppir_block
, block
, &comp
->block_list
, list
) {
496 list_for_each_entry(ppir_node
, node
, &block
->node_list
, list
) {
498 ppir_dest
*dest
= ppir_node_get_dest(node
);
499 ppir_reg
*reg
= NULL
;
501 if (dest
->type
== ppir_target_ssa
)
503 else if (dest
->type
== ppir_target_register
)
507 ppir_update_spilled_dest(comp
, block
, node
, dest
);
510 switch (node
->type
) {
511 case ppir_node_type_alu
:
513 /* alu nodes may have multiple references to the same value.
514 * try to avoid unnecessary loads for the same alu node by
515 * saving the node resulting from the temporary load */
516 ppir_alu_node
*move_alu
= NULL
;
517 ppir_alu_node
*alu
= ppir_node_to_alu(node
);
518 for (int i
= 0; i
< alu
->num_src
; i
++) {
519 reg
= get_src_reg(alu
->src
+ i
);
521 move_alu
= ppir_update_spilled_src(comp
, block
, node
,
522 alu
->src
+ i
, move_alu
);
529 for (int i
= 0; i
< ppir_node_get_src_num(node
); i
++) {
530 ppir_src
*src
= ppir_node_get_src(node
, i
);
531 reg
= get_src_reg(src
);
533 ppir_update_spilled_src(comp
, block
, node
, src
, NULL
);
545 static ppir_reg
*ppir_regalloc_choose_spill_node(ppir_compiler
*comp
,
549 ppir_reg
*chosen
= NULL
;
551 list_for_each_entry(ppir_reg
, reg
, &comp
->reg_list
, list
) {
552 int range
= reg
->live_out
- reg
->live_in
;
554 if (!reg
->spilled
&& reg
->live_out
!= INT_MAX
&& range
> max_range
) {
561 chosen
->spilled
= true;
566 static void ppir_regalloc_reset_liveness_info(ppir_compiler
*comp
)
568 list_for_each_entry(ppir_reg
, reg
, &comp
->reg_list
, list
) {
569 reg
->live_in
= INT_MAX
;
574 int lima_ppir_force_spilling
= 0;
576 static bool ppir_regalloc_prog_try(ppir_compiler
*comp
, bool *spilled
)
580 ppir_regalloc_reset_liveness_info(comp
);
581 end_reg
= ppir_regalloc_build_liveness_info(comp
);
583 struct ra_graph
*g
= ra_alloc_interference_graph(
584 comp
->ra
, list_length(&comp
->reg_list
));
586 int n
= 0, end_reg_index
= 0;
587 list_for_each_entry(ppir_reg
, reg
, &comp
->reg_list
, list
) {
588 int c
= ppir_ra_reg_class_vec1
+ (reg
->num_components
- 1);
593 ra_set_node_class(g
, n
++, c
);
597 list_for_each_entry(ppir_reg
, reg1
, &comp
->reg_list
, list
) {
599 list_for_each_entry_from(ppir_reg
, reg2
, reg1
->list
.next
,
600 &comp
->reg_list
, list
) {
601 bool interference
= false;
602 if (reg1
->live_in
< reg2
->live_in
) {
603 if (reg1
->live_out
> reg2
->live_in
)
606 else if (reg1
->live_in
> reg2
->live_in
) {
607 if (reg2
->live_out
> reg1
->live_in
)
614 ra_add_node_interference(g
, n1
, n2
);
621 ra_set_node_reg(g
, end_reg_index
, ppir_ra_reg_base
[ppir_ra_reg_class_vec4
]);
624 bool ok
= ra_allocate(g
);
625 if (!ok
|| (comp
->force_spilling
-- > 0)) {
626 ppir_reg
*chosen
= ppir_regalloc_choose_spill_node(comp
, g
);
628 /* stack_size will be used to assemble the frame reg in lima_draw.
629 * It is also be used in the spilling code, as negative indices
630 * starting from -1, to create stack addresses. */
631 comp
->prog
->stack_size
++;
632 ppir_regalloc_spill_reg(comp
, chosen
);
633 /* Ask the outer loop to call back in. */
636 ppir_debug("spilled register\n");
640 ppir_error("regalloc fail\n");
645 list_for_each_entry(ppir_reg
, reg
, &comp
->reg_list
, list
) {
646 int reg_index
= ra_get_node_reg(g
, n
++);
647 reg
->index
= get_phy_reg_index(reg_index
);
652 if (lima_debug
& LIMA_DEBUG_PP
)
653 ppir_regalloc_print_result(comp
);
662 bool ppir_regalloc_prog(ppir_compiler
*comp
)
664 bool spilled
= false;
665 comp
->prog
->stack_size
= 0;
667 /* Set from an environment variable to force spilling
668 * for debugging purposes, see lima_screen.c */
669 comp
->force_spilling
= lima_ppir_force_spilling
;
671 ppir_regalloc_update_reglist_ssa(comp
);
673 /* No registers? Probably shader consists of discard instruction */
674 if (list_empty(&comp
->reg_list
))
677 /* this will most likely succeed in the first
678 * try, except for very complicated shaders */
679 while (!ppir_regalloc_prog_try(comp
, &spilled
))