2 * Copyright (c) 2017 Lima Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 #include "util/ralloc.h"
26 #include "util/register_allocate.h"
27 #include "util/u_debug.h"
30 #include "lima_context.h"
32 #define PPIR_FULL_REG_NUM 6
34 #define PPIR_VEC1_REG_NUM (PPIR_FULL_REG_NUM * 4) /* x, y, z, w */
35 #define PPIR_VEC2_REG_NUM (PPIR_FULL_REG_NUM * 3) /* xy, yz, zw */
36 #define PPIR_VEC3_REG_NUM (PPIR_FULL_REG_NUM * 2) /* xyz, yzw */
37 #define PPIR_VEC4_REG_NUM PPIR_FULL_REG_NUM /* xyzw */
38 #define PPIR_HEAD_VEC1_REG_NUM PPIR_FULL_REG_NUM /* x */
39 #define PPIR_HEAD_VEC2_REG_NUM PPIR_FULL_REG_NUM /* xy */
40 #define PPIR_HEAD_VEC3_REG_NUM PPIR_FULL_REG_NUM /* xyz */
41 #define PPIR_HEAD_VEC4_REG_NUM PPIR_FULL_REG_NUM /* xyzw */
43 #define PPIR_VEC1_REG_BASE 0
44 #define PPIR_VEC2_REG_BASE (PPIR_VEC1_REG_BASE + PPIR_VEC1_REG_NUM)
45 #define PPIR_VEC3_REG_BASE (PPIR_VEC2_REG_BASE + PPIR_VEC2_REG_NUM)
46 #define PPIR_VEC4_REG_BASE (PPIR_VEC3_REG_BASE + PPIR_VEC3_REG_NUM)
47 #define PPIR_HEAD_VEC1_REG_BASE (PPIR_VEC4_REG_BASE + PPIR_VEC4_REG_NUM)
48 #define PPIR_HEAD_VEC2_REG_BASE (PPIR_HEAD_VEC1_REG_BASE + PPIR_HEAD_VEC1_REG_NUM)
49 #define PPIR_HEAD_VEC3_REG_BASE (PPIR_HEAD_VEC2_REG_BASE + PPIR_HEAD_VEC2_REG_NUM)
50 #define PPIR_HEAD_VEC4_REG_BASE (PPIR_HEAD_VEC3_REG_BASE + PPIR_HEAD_VEC3_REG_NUM)
51 #define PPIR_REG_COUNT (PPIR_HEAD_VEC4_REG_BASE + PPIR_HEAD_VEC4_REG_NUM)
53 enum ppir_ra_reg_class
{
54 ppir_ra_reg_class_vec1
,
55 ppir_ra_reg_class_vec2
,
56 ppir_ra_reg_class_vec3
,
57 ppir_ra_reg_class_vec4
,
59 /* 4 reg class for load/store instr regs:
60 * load/store instr has no swizzle field, so the (virtual) register
61 * must be allocated at the beginning of a (physical) register,
63 ppir_ra_reg_class_head_vec1
,
64 ppir_ra_reg_class_head_vec2
,
65 ppir_ra_reg_class_head_vec3
,
66 ppir_ra_reg_class_head_vec4
,
68 ppir_ra_reg_class_num
,
71 static const int ppir_ra_reg_base
[ppir_ra_reg_class_num
+ 1] = {
72 [ppir_ra_reg_class_vec1
] = PPIR_VEC1_REG_BASE
,
73 [ppir_ra_reg_class_vec2
] = PPIR_VEC2_REG_BASE
,
74 [ppir_ra_reg_class_vec3
] = PPIR_VEC3_REG_BASE
,
75 [ppir_ra_reg_class_vec4
] = PPIR_VEC4_REG_BASE
,
76 [ppir_ra_reg_class_head_vec1
] = PPIR_HEAD_VEC1_REG_BASE
,
77 [ppir_ra_reg_class_head_vec2
] = PPIR_HEAD_VEC2_REG_BASE
,
78 [ppir_ra_reg_class_head_vec3
] = PPIR_HEAD_VEC3_REG_BASE
,
79 [ppir_ra_reg_class_head_vec4
] = PPIR_HEAD_VEC4_REG_BASE
,
80 [ppir_ra_reg_class_num
] = PPIR_REG_COUNT
,
84 ppir_ra_reg_q_values
[ppir_ra_reg_class_num
] = {
85 (unsigned int []) {1, 2, 3, 4, 1, 2, 3, 4},
86 (unsigned int []) {2, 3, 3, 3, 1, 2, 3, 3},
87 (unsigned int []) {2, 2, 2, 2, 1, 2, 2, 2},
88 (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
89 (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
90 (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
91 (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
92 (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
95 struct ra_regs
*ppir_regalloc_init(void *mem_ctx
)
97 struct ra_regs
*ret
= ra_alloc_reg_set(mem_ctx
, PPIR_REG_COUNT
, false);
101 /* (x, y, z, w) (xy, yz, zw) (xyz, yzw) (xyzw) (x) (xy) (xyz) (xyzw) */
102 static const int class_reg_num
[ppir_ra_reg_class_num
] = {
103 4, 3, 2, 1, 1, 1, 1, 1,
105 /* base reg (x, y, z, w) confliction with other regs */
106 for (int h
= 0; h
< 4; h
++) {
107 int base_reg_mask
= 1 << h
;
108 for (int i
= 1; i
< ppir_ra_reg_class_num
; i
++) {
109 int class_reg_base_mask
= (1 << ((i
% 4) + 1)) - 1;
110 for (int j
= 0; j
< class_reg_num
[i
]; j
++) {
111 if (base_reg_mask
& (class_reg_base_mask
<< j
)) {
112 for (int k
= 0; k
< PPIR_FULL_REG_NUM
; k
++) {
113 ra_add_reg_conflict(ret
, k
* 4 + h
,
114 ppir_ra_reg_base
[i
] + k
* class_reg_num
[i
] + j
);
120 /* build all other confliction by the base reg confliction */
121 for (int i
= 0; i
< PPIR_VEC1_REG_NUM
; i
++)
122 ra_make_reg_conflicts_transitive(ret
, i
);
124 for (int i
= 0; i
< ppir_ra_reg_class_num
; i
++)
125 ra_alloc_reg_class(ret
);
128 for (int i
= 0; i
< ppir_ra_reg_class_num
; i
++) {
129 while (reg_index
< ppir_ra_reg_base
[i
+ 1])
130 ra_class_add_reg(ret
, i
, reg_index
++);
133 ra_set_finalize(ret
, ppir_ra_reg_q_values
);
137 static void ppir_regalloc_update_reglist_ssa(ppir_compiler
*comp
)
139 list_for_each_entry(ppir_block
, block
, &comp
->block_list
, list
) {
140 list_for_each_entry(ppir_node
, node
, &block
->node_list
, list
) {
141 if (node
->op
== ppir_op_store_color
)
144 if (!node
->instr
|| node
->op
== ppir_op_const
)
147 ppir_dest
*dest
= ppir_node_get_dest(node
);
149 ppir_reg
*reg
= NULL
;
151 if (dest
->type
== ppir_target_ssa
) {
153 list_addtail(®
->list
, &comp
->reg_list
);
160 static int get_phy_reg_index(int reg
)
164 for (i
= 0; i
< ppir_ra_reg_class_num
; i
++) {
165 if (reg
< ppir_ra_reg_base
[i
+ 1]) {
166 reg
-= ppir_ra_reg_base
[i
];
171 if (i
< ppir_ra_reg_class_head_vec1
)
172 return reg
/ (4 - i
) * 4 + reg
% (4 - i
);
177 static void ppir_regalloc_print_result(ppir_compiler
*comp
)
179 printf("======ppir regalloc result======\n");
180 list_for_each_entry(ppir_block
, block
, &comp
->block_list
, list
) {
181 list_for_each_entry(ppir_instr
, instr
, &block
->instr_list
, list
) {
182 printf("%03d:", instr
->index
);
183 for (int i
= 0; i
< PPIR_INSTR_SLOT_NUM
; i
++) {
184 ppir_node
*node
= instr
->slots
[i
];
188 printf(" (%d|", node
->index
);
190 ppir_dest
*dest
= ppir_node_get_dest(node
);
192 printf("%d", ppir_target_get_dest_reg_index(dest
));
196 for (int i
= 0; i
< ppir_node_get_src_num(node
); i
++) {
199 printf("%d", ppir_target_get_src_reg_index(ppir_node_get_src(node
, i
)));
207 printf("--------------------------\n");
210 static bool create_new_instr_after(ppir_block
*block
, ppir_instr
*ref
,
213 ppir_instr
*newinstr
= ppir_instr_create(block
);
214 if (unlikely(!newinstr
))
217 list_del(&newinstr
->list
);
218 list_add(&newinstr
->list
, &ref
->list
);
220 if (!ppir_instr_insert_node(newinstr
, node
))
223 list_for_each_entry_from(ppir_instr
, instr
, ref
, &block
->instr_list
, list
) {
226 newinstr
->seq
= ref
->seq
+1;
227 newinstr
->scheduled
= true;
231 static bool create_new_instr_before(ppir_block
*block
, ppir_instr
*ref
,
234 ppir_instr
*newinstr
= ppir_instr_create(block
);
235 if (unlikely(!newinstr
))
238 list_del(&newinstr
->list
);
239 list_addtail(&newinstr
->list
, &ref
->list
);
241 if (!ppir_instr_insert_node(newinstr
, node
))
244 list_for_each_entry_from(ppir_instr
, instr
, ref
, &block
->instr_list
, list
) {
247 newinstr
->seq
= ref
->seq
-1;
248 newinstr
->scheduled
= true;
252 static bool ppir_update_spilled_src(ppir_compiler
*comp
, ppir_block
*block
,
253 ppir_node
*node
, ppir_src
*src
,
254 ppir_node
**fill_node
)
256 /* nodes might have multiple references to the same value.
257 * avoid creating unnecessary loads for the same fill by
258 * saving the node resulting from the temporary load */
262 int num_components
= src
->reg
->num_components
;
264 /* alloc new node to load value */
265 ppir_node
*load_node
= ppir_node_create(block
, ppir_op_load_temp
, -1, 0);
268 list_addtail(&load_node
->list
, &node
->list
);
271 ppir_load_node
*load
= ppir_node_to_load(load_node
);
273 load
->index
= -comp
->prog
->stack_size
; /* index sizes are negative */
274 load
->num_components
= num_components
;
276 ppir_dest
*ld_dest
= &load
->dest
;
277 ld_dest
->type
= ppir_target_pipeline
;
278 ld_dest
->pipeline
= ppir_pipeline_reg_uniform
;
279 ld_dest
->write_mask
= u_bit_consecutive(0, num_components
);
281 /* If the uniform slot is empty, we can insert the load_temp
282 * there and use it directly. Exceptionally, if the node is in the
283 * varying or texld slot, this doesn't work. */
284 if (!node
->instr
->slots
[PPIR_INSTR_SLOT_UNIFORM
] &&
285 node
->instr_pos
!= PPIR_INSTR_SLOT_VARYING
&&
286 node
->instr_pos
!= PPIR_INSTR_SLOT_TEXLD
) {
287 ppir_node_target_assign(src
, load_node
);
288 *fill_node
= load_node
;
289 return ppir_instr_insert_node(node
->instr
, load_node
);
292 /* Uniform slot was taken, so fall back to a new instruction with a mov */
293 if (!create_new_instr_before(block
, node
->instr
, load_node
))
296 /* Create move node */
297 ppir_node
*move_node
= ppir_node_create(block
, ppir_op_mov
, -1 , 0);
298 if (unlikely(!move_node
))
300 list_addtail(&move_node
->list
, &node
->list
);
302 ppir_alu_node
*move_alu
= ppir_node_to_alu(move_node
);
304 move_alu
->num_src
= 1;
305 move_alu
->src
->type
= ppir_target_pipeline
;
306 move_alu
->src
->pipeline
= ppir_pipeline_reg_uniform
;
307 for (int i
= 0; i
< 4; i
++)
308 move_alu
->src
->swizzle
[i
] = i
;
310 ppir_dest
*alu_dest
= &move_alu
->dest
;
311 alu_dest
->type
= ppir_target_ssa
;
312 alu_dest
->ssa
.num_components
= num_components
;
313 alu_dest
->ssa
.spilled
= true;
314 alu_dest
->write_mask
= u_bit_consecutive(0, num_components
);
316 list_addtail(&alu_dest
->ssa
.list
, &comp
->reg_list
);
318 if (!ppir_instr_insert_node(load_node
->instr
, move_node
))
321 /* insert the new node as predecessor */
322 ppir_node_foreach_pred_safe(node
, dep
) {
323 ppir_node
*pred
= dep
->pred
;
324 ppir_node_remove_dep(dep
);
325 ppir_node_add_dep(load_node
, pred
, ppir_dep_src
);
327 ppir_node_add_dep(node
, move_node
, ppir_dep_src
);
328 ppir_node_add_dep(move_node
, load_node
, ppir_dep_src
);
330 *fill_node
= move_node
;
333 /* switch node src to use the fill node dest */
334 ppir_node_target_assign(src
, *fill_node
);
339 static bool ppir_update_spilled_dest_load(ppir_compiler
*comp
, ppir_block
*block
,
342 ppir_dest
*dest
= ppir_node_get_dest(node
);
343 assert(dest
!= NULL
);
344 assert(dest
->type
== ppir_target_register
);
345 ppir_reg
*reg
= dest
->reg
;
346 int num_components
= reg
->num_components
;
348 /* alloc new node to load value */
349 ppir_node
*load_node
= ppir_node_create(block
, ppir_op_load_temp
, -1, 0);
352 list_addtail(&load_node
->list
, &node
->list
);
355 ppir_load_node
*load
= ppir_node_to_load(load_node
);
357 load
->index
= -comp
->prog
->stack_size
; /* index sizes are negative */
358 load
->num_components
= num_components
;
360 load
->dest
.type
= ppir_target_pipeline
;
361 load
->dest
.pipeline
= ppir_pipeline_reg_uniform
;
362 load
->dest
.write_mask
= u_bit_consecutive(0, num_components
);
364 /* New instruction is needed since we're updating a dest register
365 * and we can't write to the uniform pipeline reg */
366 if (!create_new_instr_before(block
, node
->instr
, load_node
))
369 /* Create move node */
370 ppir_node
*move_node
= ppir_node_create(block
, ppir_op_mov
, -1 , 0);
371 if (unlikely(!move_node
))
373 list_addtail(&move_node
->list
, &node
->list
);
375 ppir_alu_node
*move_alu
= ppir_node_to_alu(move_node
);
377 move_alu
->num_src
= 1;
378 move_alu
->src
->type
= ppir_target_pipeline
;
379 move_alu
->src
->pipeline
= ppir_pipeline_reg_uniform
;
380 for (int i
= 0; i
< 4; i
++)
381 move_alu
->src
->swizzle
[i
] = i
;
383 move_alu
->dest
.type
= ppir_target_register
;
384 move_alu
->dest
.reg
= reg
;
385 move_alu
->dest
.write_mask
= u_bit_consecutive(0, num_components
);
387 if (!ppir_instr_insert_node(load_node
->instr
, move_node
))
390 ppir_node_foreach_pred_safe(node
, dep
) {
391 ppir_node
*pred
= dep
->pred
;
392 ppir_node_remove_dep(dep
);
393 ppir_node_add_dep(load_node
, pred
, ppir_dep_src
);
395 ppir_node_add_dep(node
, move_node
, ppir_dep_src
);
396 ppir_node_add_dep(move_node
, load_node
, ppir_dep_src
);
401 static bool ppir_update_spilled_dest(ppir_compiler
*comp
, ppir_block
*block
,
404 ppir_dest
*dest
= ppir_node_get_dest(node
);
405 assert(dest
!= NULL
);
406 ppir_reg
*reg
= ppir_dest_get_reg(dest
);
408 /* alloc new node to store value */
409 ppir_node
*store_node
= ppir_node_create(block
, ppir_op_store_temp
, -1, 0);
412 list_addtail(&store_node
->list
, &node
->list
);
415 ppir_store_node
*store
= ppir_node_to_store(store_node
);
417 store
->index
= -comp
->prog
->stack_size
; /* index sizes are negative */
419 ppir_node_target_assign(&store
->src
, node
);
420 store
->num_components
= reg
->num_components
;
422 /* insert the new node as successor */
423 ppir_node_foreach_succ_safe(node
, dep
) {
424 ppir_node
*succ
= dep
->succ
;
425 ppir_node_remove_dep(dep
);
426 ppir_node_add_dep(succ
, store_node
, ppir_dep_src
);
428 ppir_node_add_dep(store_node
, node
, ppir_dep_src
);
430 /* If the store temp slot is empty, we can insert the store_temp
431 * there and use it directly. Exceptionally, if the node is in the
432 * combine slot, this doesn't work. */
433 if (!node
->instr
->slots
[PPIR_INSTR_SLOT_STORE_TEMP
] &&
434 node
->instr_pos
!= PPIR_INSTR_SLOT_ALU_COMBINE
)
435 return ppir_instr_insert_node(node
->instr
, store_node
);
437 /* Not possible to merge store, so fall back to a new instruction */
438 return create_new_instr_after(block
, node
->instr
, store_node
);
441 static bool ppir_regalloc_spill_reg(ppir_compiler
*comp
, ppir_reg
*chosen
)
443 list_for_each_entry(ppir_block
, block
, &comp
->block_list
, list
) {
444 list_for_each_entry(ppir_node
, node
, &block
->node_list
, list
) {
446 ppir_dest
*dest
= ppir_node_get_dest(node
);
447 if (dest
&& ppir_dest_get_reg(dest
) == chosen
) {
448 /* If dest is a register, it might be updating only some its
449 * components, so need to load the existing value first */
450 if (dest
->type
== ppir_target_register
) {
451 if (!ppir_update_spilled_dest_load(comp
, block
, node
))
454 if (!ppir_update_spilled_dest(comp
, block
, node
))
458 ppir_node
*fill_node
= NULL
;
459 /* nodes might have multiple references to the same value.
460 * avoid creating unnecessary loads for the same fill by
461 * saving the node resulting from the temporary load */
462 for (int i
= 0; i
< ppir_node_get_src_num(node
); i
++) {
463 ppir_src
*src
= ppir_node_get_src(node
, i
);
464 ppir_reg
*reg
= ppir_src_get_reg(src
);
466 if (!ppir_update_spilled_src(comp
, block
, node
, src
, &fill_node
))
476 static ppir_reg
*ppir_regalloc_choose_spill_node(ppir_compiler
*comp
,
479 float spill_costs
[list_length(&comp
->reg_list
)];
480 /* experimentally determined, it seems to be worth scaling cost of
481 * regs in instructions that have used uniform/store_temp slots,
482 * but not too much as to offset the num_components base cost. */
483 const float slot_scale
= 1.1f
;
485 list_for_each_entry(ppir_reg
, reg
, &comp
->reg_list
, list
) {
487 /* not considered for spilling */
488 spill_costs
[reg
->regalloc_index
] = 0.0f
;
492 /* It is beneficial to spill registers with higher component number,
493 * so increase the cost of spilling registers with few components */
494 float spill_cost
= 4.0f
/ (float)reg
->num_components
;
495 spill_costs
[reg
->regalloc_index
] = spill_cost
;
498 list_for_each_entry(ppir_block
, block
, &comp
->block_list
, list
) {
499 list_for_each_entry(ppir_instr
, instr
, &block
->instr_list
, list
) {
500 if (instr
->slots
[PPIR_INSTR_SLOT_UNIFORM
]) {
501 for (int i
= 0; i
< PPIR_INSTR_SLOT_NUM
; i
++) {
502 ppir_node
*node
= instr
->slots
[i
];
505 for (int j
= 0; j
< ppir_node_get_src_num(node
); j
++) {
506 ppir_src
*src
= ppir_node_get_src(node
, j
);
509 ppir_reg
*reg
= ppir_src_get_reg(src
);
513 spill_costs
[reg
->regalloc_index
] *= slot_scale
;
517 if (instr
->slots
[PPIR_INSTR_SLOT_STORE_TEMP
]) {
518 for (int i
= 0; i
< PPIR_INSTR_SLOT_NUM
; i
++) {
519 ppir_node
*node
= instr
->slots
[i
];
522 ppir_dest
*dest
= ppir_node_get_dest(node
);
525 ppir_reg
*reg
= ppir_dest_get_reg(dest
);
529 spill_costs
[reg
->regalloc_index
] *= slot_scale
;
535 for (int i
= 0; i
< list_length(&comp
->reg_list
); i
++)
536 ra_set_node_spill_cost(g
, i
, spill_costs
[i
]);
538 int r
= ra_get_best_spill_node(g
);
542 ppir_reg
*chosen
= NULL
;
544 list_for_each_entry(ppir_reg
, reg
, &comp
->reg_list
, list
) {
551 chosen
->spilled
= true;
552 chosen
->is_head
= true; /* store_temp unable to do swizzle */
557 static void ppir_regalloc_reset_liveness_info(ppir_compiler
*comp
)
561 list_for_each_entry(ppir_reg
, reg
, &comp
->reg_list
, list
) {
562 reg
->regalloc_index
= idx
++;
565 list_for_each_entry(ppir_block
, block
, &comp
->block_list
, list
) {
568 ralloc_free(block
->live_in
);
569 block
->live_in
= rzalloc_array(comp
,
570 struct ppir_liveness
, list_length(&comp
->reg_list
));
572 if (block
->live_in_set
)
573 _mesa_set_destroy(block
->live_in_set
, NULL
);
574 block
->live_in_set
= _mesa_set_create(comp
,
576 _mesa_key_pointer_equal
);
579 ralloc_free(block
->live_out
);
580 block
->live_out
= rzalloc_array(comp
,
581 struct ppir_liveness
, list_length(&comp
->reg_list
));
583 if (block
->live_out_set
)
584 _mesa_set_destroy(block
->live_out_set
, NULL
);
585 block
->live_out_set
= _mesa_set_create(comp
,
587 _mesa_key_pointer_equal
);
589 list_for_each_entry(ppir_instr
, instr
, &block
->instr_list
, list
) {
592 ralloc_free(instr
->live_in
);
593 instr
->live_in
= rzalloc_array(comp
,
594 struct ppir_liveness
, list_length(&comp
->reg_list
));
596 if (instr
->live_in_set
)
597 _mesa_set_destroy(instr
->live_in_set
, NULL
);
598 instr
->live_in_set
= _mesa_set_create(comp
,
600 _mesa_key_pointer_equal
);
602 if (instr
->live_internal
)
603 ralloc_free(instr
->live_internal
);
604 instr
->live_internal
= rzalloc_array(comp
,
605 struct ppir_liveness
, list_length(&comp
->reg_list
));
607 if (instr
->live_internal_set
)
608 _mesa_set_destroy(instr
->live_internal_set
, NULL
);
609 instr
->live_internal_set
= _mesa_set_create(comp
,
611 _mesa_key_pointer_equal
);
614 ralloc_free(instr
->live_out
);
615 instr
->live_out
= rzalloc_array(comp
,
616 struct ppir_liveness
, list_length(&comp
->reg_list
));
618 if (instr
->live_out_set
)
619 _mesa_set_destroy(instr
->live_out_set
, NULL
);
620 instr
->live_out_set
= _mesa_set_create(comp
,
622 _mesa_key_pointer_equal
);
627 static void ppir_all_interference(ppir_compiler
*comp
, struct ra_graph
*g
,
628 struct set
*liveness
)
630 set_foreach(liveness
, entry1
) {
631 set_foreach(liveness
, entry2
) {
632 const struct ppir_liveness
*r1
= entry1
->key
;
633 const struct ppir_liveness
*r2
= entry2
->key
;
634 ra_add_node_interference(g
, r1
->reg
->regalloc_index
,
635 r2
->reg
->regalloc_index
);
637 _mesa_set_remove(liveness
, entry1
);
641 int lima_ppir_force_spilling
= 0;
643 static bool ppir_regalloc_prog_try(ppir_compiler
*comp
, bool *spilled
)
645 ppir_regalloc_reset_liveness_info(comp
);
647 struct ra_graph
*g
= ra_alloc_interference_graph(
648 comp
->ra
, list_length(&comp
->reg_list
));
651 list_for_each_entry(ppir_reg
, reg
, &comp
->reg_list
, list
) {
652 int c
= ppir_ra_reg_class_vec1
+ (reg
->num_components
- 1);
655 ra_set_node_class(g
, n
++, c
);
658 ppir_liveness_analysis(comp
);
660 list_for_each_entry(ppir_block
, block
, &comp
->block_list
, list
) {
661 list_for_each_entry(ppir_instr
, instr
, &block
->instr_list
, list
) {
662 set_foreach(instr
->live_internal_set
, entry
) {
663 _mesa_set_add(instr
->live_in_set
, entry
->key
);
664 _mesa_set_add(instr
->live_out_set
, entry
->key
);
666 ppir_all_interference(comp
, g
, instr
->live_in_set
);
667 ppir_all_interference(comp
, g
, instr
->live_out_set
);
672 bool ok
= ra_allocate(g
);
673 if (!ok
|| (comp
->force_spilling
-- > 0)) {
674 ppir_reg
*chosen
= ppir_regalloc_choose_spill_node(comp
, g
);
676 /* stack_size will be used to assemble the frame reg in lima_draw.
677 * It is also be used in the spilling code, as negative indices
678 * starting from -1, to create stack addresses. */
679 comp
->prog
->stack_size
++;
680 if (!ppir_regalloc_spill_reg(comp
, chosen
))
682 /* Ask the outer loop to call back in. */
685 ppir_debug("spilled register %d/%d, num_components: %d\n",
686 chosen
->regalloc_index
, list_length(&comp
->reg_list
),
687 chosen
->num_components
);
691 ppir_error("regalloc fail\n");
696 list_for_each_entry(ppir_reg
, reg
, &comp
->reg_list
, list
) {
697 int reg_index
= ra_get_node_reg(g
, n
++);
698 reg
->index
= get_phy_reg_index(reg_index
);
703 if (lima_debug
& LIMA_DEBUG_PP
)
704 ppir_regalloc_print_result(comp
);
713 bool ppir_regalloc_prog(ppir_compiler
*comp
)
715 bool spilled
= false;
716 comp
->prog
->stack_size
= 0;
718 /* Set from an environment variable to force spilling
719 * for debugging purposes, see lima_screen.c */
720 comp
->force_spilling
= lima_ppir_force_spilling
;
722 ppir_regalloc_update_reglist_ssa(comp
);
724 /* No registers? Probably shader consists of discard instruction */
725 if (list_is_empty(&comp
->reg_list
))
728 /* this will most likely succeed in the first
729 * try, except for very complicated shaders */
730 while (!ppir_regalloc_prog_try(comp
, &spilled
))