3bd8569cf151f6eca16eb8620234e4520e6641dd
[mesa.git] / src / gallium / drivers / lima / ir / pp / regalloc.c
1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include "util/ralloc.h"
26 #include "util/register_allocate.h"
27 #include "util/u_debug.h"
28
29 #include "ppir.h"
30 #include "lima_context.h"
31
32 #define PPIR_FULL_REG_NUM 6
33
34 #define PPIR_VEC1_REG_NUM (PPIR_FULL_REG_NUM * 4) /* x, y, z, w */
35 #define PPIR_VEC2_REG_NUM (PPIR_FULL_REG_NUM * 3) /* xy, yz, zw */
36 #define PPIR_VEC3_REG_NUM (PPIR_FULL_REG_NUM * 2) /* xyz, yzw */
37 #define PPIR_VEC4_REG_NUM PPIR_FULL_REG_NUM /* xyzw */
38 #define PPIR_HEAD_VEC1_REG_NUM PPIR_FULL_REG_NUM /* x */
39 #define PPIR_HEAD_VEC2_REG_NUM PPIR_FULL_REG_NUM /* xy */
40 #define PPIR_HEAD_VEC3_REG_NUM PPIR_FULL_REG_NUM /* xyz */
41 #define PPIR_HEAD_VEC4_REG_NUM PPIR_FULL_REG_NUM /* xyzw */
42
43 #define PPIR_VEC1_REG_BASE 0
44 #define PPIR_VEC2_REG_BASE (PPIR_VEC1_REG_BASE + PPIR_VEC1_REG_NUM)
45 #define PPIR_VEC3_REG_BASE (PPIR_VEC2_REG_BASE + PPIR_VEC2_REG_NUM)
46 #define PPIR_VEC4_REG_BASE (PPIR_VEC3_REG_BASE + PPIR_VEC3_REG_NUM)
47 #define PPIR_HEAD_VEC1_REG_BASE (PPIR_VEC4_REG_BASE + PPIR_VEC4_REG_NUM)
48 #define PPIR_HEAD_VEC2_REG_BASE (PPIR_HEAD_VEC1_REG_BASE + PPIR_HEAD_VEC1_REG_NUM)
49 #define PPIR_HEAD_VEC3_REG_BASE (PPIR_HEAD_VEC2_REG_BASE + PPIR_HEAD_VEC2_REG_NUM)
50 #define PPIR_HEAD_VEC4_REG_BASE (PPIR_HEAD_VEC3_REG_BASE + PPIR_HEAD_VEC3_REG_NUM)
51 #define PPIR_REG_COUNT (PPIR_HEAD_VEC4_REG_BASE + PPIR_HEAD_VEC4_REG_NUM)
52
53 enum ppir_ra_reg_class {
54 ppir_ra_reg_class_vec1,
55 ppir_ra_reg_class_vec2,
56 ppir_ra_reg_class_vec3,
57 ppir_ra_reg_class_vec4,
58
59 /* 4 reg class for load/store instr regs:
60 * load/store instr has no swizzle field, so the (virtual) register
61 * must be allocated at the beginning of a (physical) register,
62 */
63 ppir_ra_reg_class_head_vec1,
64 ppir_ra_reg_class_head_vec2,
65 ppir_ra_reg_class_head_vec3,
66 ppir_ra_reg_class_head_vec4,
67
68 ppir_ra_reg_class_num,
69 };
70
71 static const int ppir_ra_reg_base[ppir_ra_reg_class_num + 1] = {
72 [ppir_ra_reg_class_vec1] = PPIR_VEC1_REG_BASE,
73 [ppir_ra_reg_class_vec2] = PPIR_VEC2_REG_BASE,
74 [ppir_ra_reg_class_vec3] = PPIR_VEC3_REG_BASE,
75 [ppir_ra_reg_class_vec4] = PPIR_VEC4_REG_BASE,
76 [ppir_ra_reg_class_head_vec1] = PPIR_HEAD_VEC1_REG_BASE,
77 [ppir_ra_reg_class_head_vec2] = PPIR_HEAD_VEC2_REG_BASE,
78 [ppir_ra_reg_class_head_vec3] = PPIR_HEAD_VEC3_REG_BASE,
79 [ppir_ra_reg_class_head_vec4] = PPIR_HEAD_VEC4_REG_BASE,
80 [ppir_ra_reg_class_num] = PPIR_REG_COUNT,
81 };
82
83 static unsigned int *
84 ppir_ra_reg_q_values[ppir_ra_reg_class_num] = {
85 (unsigned int []) {1, 2, 3, 4, 1, 2, 3, 4},
86 (unsigned int []) {2, 3, 3, 3, 1, 2, 3, 3},
87 (unsigned int []) {2, 2, 2, 2, 1, 2, 2, 2},
88 (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
89 (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
90 (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
91 (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
92 (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
93 };
94
95 struct ra_regs *ppir_regalloc_init(void *mem_ctx)
96 {
97 struct ra_regs *ret = ra_alloc_reg_set(mem_ctx, PPIR_REG_COUNT, false);
98 if (!ret)
99 return NULL;
100
101 /* (x, y, z, w) (xy, yz, zw) (xyz, yzw) (xyzw) (x) (xy) (xyz) (xyzw) */
102 static const int class_reg_num[ppir_ra_reg_class_num] = {
103 4, 3, 2, 1, 1, 1, 1, 1,
104 };
105 /* base reg (x, y, z, w) confliction with other regs */
106 for (int h = 0; h < 4; h++) {
107 int base_reg_mask = 1 << h;
108 for (int i = 1; i < ppir_ra_reg_class_num; i++) {
109 int class_reg_base_mask = (1 << ((i % 4) + 1)) - 1;
110 for (int j = 0; j < class_reg_num[i]; j++) {
111 if (base_reg_mask & (class_reg_base_mask << j)) {
112 for (int k = 0; k < PPIR_FULL_REG_NUM; k++) {
113 ra_add_reg_conflict(ret, k * 4 + h,
114 ppir_ra_reg_base[i] + k * class_reg_num[i] + j);
115 }
116 }
117 }
118 }
119 }
120 /* build all other confliction by the base reg confliction */
121 for (int i = 0; i < PPIR_VEC1_REG_NUM; i++)
122 ra_make_reg_conflicts_transitive(ret, i);
123
124 for (int i = 0; i < ppir_ra_reg_class_num; i++)
125 ra_alloc_reg_class(ret);
126
127 int reg_index = 0;
128 for (int i = 0; i < ppir_ra_reg_class_num; i++) {
129 while (reg_index < ppir_ra_reg_base[i + 1])
130 ra_class_add_reg(ret, i, reg_index++);
131 }
132
133 ra_set_finalize(ret, ppir_ra_reg_q_values);
134 return ret;
135 }
136
137 static ppir_reg *get_src_reg(ppir_src *src)
138 {
139 switch (src->type) {
140 case ppir_target_ssa:
141 return src->ssa;
142 case ppir_target_register:
143 return src->reg;
144 default:
145 return NULL;
146 }
147 }
148
149 static void ppir_regalloc_update_reglist_ssa(ppir_compiler *comp)
150 {
151 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
152 list_for_each_entry(ppir_node, node, &block->node_list, list) {
153 if (node->op == ppir_op_store_color)
154 continue;
155
156 if (!node->instr || node->op == ppir_op_const)
157 continue;
158
159 ppir_dest *dest = ppir_node_get_dest(node);
160 if (dest) {
161 ppir_reg *reg = NULL;
162
163 if (dest->type == ppir_target_ssa) {
164 reg = &dest->ssa;
165 list_addtail(&reg->list, &comp->reg_list);
166 }
167 }
168 }
169 }
170 }
171
172 static ppir_reg *ppir_regalloc_build_liveness_info(ppir_compiler *comp)
173 {
174 ppir_reg *ret = NULL;
175
176 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
177 list_for_each_entry(ppir_node, node, &block->node_list, list) {
178 if (node->op == ppir_op_store_color) {
179 ppir_store_node *store = ppir_node_to_store(node);
180 if (store->src.type == ppir_target_ssa)
181 ret = store->src.ssa;
182 else
183 ret = store->src.reg;
184 ret->live_out = INT_MAX;
185 continue;
186 }
187
188 if (!node->instr || node->op == ppir_op_const)
189 continue;
190
191 /* update reg live_in from node dest (write) */
192 ppir_dest *dest = ppir_node_get_dest(node);
193 if (dest) {
194 ppir_reg *reg = NULL;
195
196 if (dest->type == ppir_target_ssa) {
197 reg = &dest->ssa;
198 }
199 else if (dest->type == ppir_target_register)
200 reg = dest->reg;
201
202 if (reg && node->instr->seq < reg->live_in)
203 reg->live_in = node->instr->seq;
204 }
205
206 /* update reg live_out from node src (read) */
207 switch (node->type) {
208 case ppir_node_type_alu:
209 {
210 ppir_alu_node *alu = ppir_node_to_alu(node);
211 for (int i = 0; i < alu->num_src; i++) {
212 ppir_reg *reg = get_src_reg(alu->src + i);
213 if (reg && node->instr->seq > reg->live_out)
214 reg->live_out = node->instr->seq;
215 }
216 break;
217 }
218 case ppir_node_type_store:
219 {
220 ppir_store_node *store = ppir_node_to_store(node);
221 ppir_reg *reg = get_src_reg(&store->src);
222 if (reg && node->instr->seq > reg->live_out)
223 reg->live_out = node->instr->seq;
224 break;
225 }
226 case ppir_node_type_load:
227 {
228 ppir_load_node *load = ppir_node_to_load(node);
229 ppir_reg *reg = get_src_reg(&load->src);
230 if (reg && node->instr->seq > reg->live_out)
231 reg->live_out = node->instr->seq;
232 break;
233 }
234 case ppir_node_type_load_texture:
235 {
236 ppir_load_texture_node *load_tex = ppir_node_to_load_texture(node);
237 ppir_reg *reg = get_src_reg(&load_tex->src_coords);
238 if (reg && node->instr->seq > reg->live_out)
239 reg->live_out = node->instr->seq;
240 break;
241 }
242 case ppir_node_type_branch:
243 {
244 ppir_branch_node *branch = ppir_node_to_branch(node);
245 for (int i = 0; i < 2; i++) {
246 ppir_reg *reg = get_src_reg(branch->src + i);
247 if (reg && node->instr->seq > reg->live_out)
248 reg->live_out = node->instr->seq;
249 }
250 break;
251 }
252 default:
253 break;
254 }
255 }
256 }
257
258 return ret;
259 }
260
261 static int get_phy_reg_index(int reg)
262 {
263 int i;
264
265 for (i = 0; i < ppir_ra_reg_class_num; i++) {
266 if (reg < ppir_ra_reg_base[i + 1]) {
267 reg -= ppir_ra_reg_base[i];
268 break;
269 }
270 }
271
272 if (i < ppir_ra_reg_class_head_vec1)
273 return reg / (4 - i) * 4 + reg % (4 - i);
274 else
275 return reg * 4;
276 }
277
278 static void ppir_regalloc_print_result(ppir_compiler *comp)
279 {
280 printf("======ppir regalloc result======\n");
281 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
282 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
283 printf("%03d:", instr->index);
284 for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
285 ppir_node *node = instr->slots[i];
286 if (!node)
287 continue;
288
289 printf(" (%d|", node->index);
290
291 ppir_dest *dest = ppir_node_get_dest(node);
292 if (dest)
293 printf("%d", ppir_target_get_dest_reg_index(dest));
294
295 printf("|");
296
297 switch (node->type) {
298 case ppir_node_type_alu:
299 {
300 ppir_alu_node *alu = ppir_node_to_alu(node);
301 for (int j = 0; j < alu->num_src; j++) {
302 if (j)
303 printf(" ");
304
305 printf("%d", ppir_target_get_src_reg_index(alu->src + j));
306 }
307 break;
308 }
309 case ppir_node_type_store:
310 {
311 ppir_store_node *store = ppir_node_to_store(node);
312 printf("%d", ppir_target_get_src_reg_index(&store->src));
313 break;
314 }
315 case ppir_node_type_load:
316 {
317 ppir_load_node *load = ppir_node_to_load(node);
318 if (!load->num_components)
319 printf("%d", ppir_target_get_src_reg_index(&load->src));
320 break;
321 }
322 case ppir_node_type_load_texture:
323 {
324 ppir_load_texture_node *load_tex = ppir_node_to_load_texture(node);
325 printf("%d", ppir_target_get_src_reg_index(&load_tex->src_coords));
326 break;
327 }
328 case ppir_node_type_branch:
329 {
330 ppir_branch_node *branch = ppir_node_to_branch(node);
331 for (int j = 0; j < 2; j++) {
332 if (j)
333 printf(" ");
334
335 printf("%d", ppir_target_get_src_reg_index(branch->src + j));
336 }
337 break;
338 }
339 default:
340 break;
341 }
342
343 printf(")");
344 }
345 printf("\n");
346 }
347 }
348 printf("--------------------------\n");
349 }
350
351 static bool create_new_instr_after(ppir_block *block, ppir_instr *ref,
352 ppir_node *node)
353 {
354 ppir_instr *newinstr = ppir_instr_create(block);
355 if (unlikely(!newinstr))
356 return false;
357
358 list_del(&newinstr->list);
359 list_add(&newinstr->list, &ref->list);
360
361 if (!ppir_instr_insert_node(newinstr, node))
362 return false;
363
364 list_for_each_entry_from(ppir_instr, instr, ref, &block->instr_list, list) {
365 instr->seq++;
366 }
367 newinstr->seq = ref->seq+1;
368 newinstr->scheduled = true;
369 return true;
370 }
371
372 static bool create_new_instr_before(ppir_block *block, ppir_instr *ref,
373 ppir_node *node)
374 {
375 ppir_instr *newinstr = ppir_instr_create(block);
376 if (unlikely(!newinstr))
377 return false;
378
379 list_del(&newinstr->list);
380 list_addtail(&newinstr->list, &ref->list);
381
382 if (!ppir_instr_insert_node(newinstr, node))
383 return false;
384
385 list_for_each_entry_from(ppir_instr, instr, ref, &block->instr_list, list) {
386 instr->seq++;
387 }
388 newinstr->seq = ref->seq-1;
389 newinstr->scheduled = true;
390 return true;
391 }
392
393 static ppir_alu_node* ppir_update_spilled_src(ppir_compiler *comp,
394 ppir_block *block,
395 ppir_node *node, ppir_src *src,
396 ppir_alu_node *move_alu)
397 {
398 /* alu nodes may have multiple references to the same value.
399 * try to avoid unnecessary loads for the same alu node by
400 * saving the node resulting from the temporary load */
401 if (move_alu)
402 goto update_src;
403
404 /* alloc new node to load value */
405 ppir_node *load_node = ppir_node_create(block, ppir_op_load_temp, -1, 0);
406 if (!load_node)
407 return NULL;
408 list_addtail(&load_node->list, &node->list);
409
410 ppir_load_node *load = ppir_node_to_load(load_node);
411
412 load->index = -comp->prog->stack_size; /* index sizes are negative */
413 load->num_components = 4;
414
415 ppir_dest *ld_dest = &load->dest;
416 ld_dest->type = ppir_target_pipeline;
417 ld_dest->pipeline = ppir_pipeline_reg_uniform;
418 ld_dest->write_mask = 0xf;
419
420 create_new_instr_before(block, node->instr, load_node);
421
422 /* Create move node */
423 ppir_node *move_node = ppir_node_create(block, ppir_op_mov, -1 , 0);
424 if (unlikely(!move_node))
425 return false;
426 list_addtail(&move_node->list, &node->list);
427
428 move_alu = ppir_node_to_alu(move_node);
429
430 move_alu->num_src = 1;
431 move_alu->src->type = ppir_target_pipeline;
432 move_alu->src->pipeline = ppir_pipeline_reg_uniform;
433 for (int i = 0; i < 4; i++)
434 move_alu->src->swizzle[i] = i;
435
436 ppir_dest *alu_dest = &move_alu->dest;
437 alu_dest->type = ppir_target_ssa;
438 alu_dest->ssa.num_components = 4;
439 alu_dest->ssa.live_in = INT_MAX;
440 alu_dest->ssa.live_out = 0;
441 alu_dest->write_mask = 0xf;
442
443 list_addtail(&alu_dest->ssa.list, &comp->reg_list);
444
445 if (!ppir_instr_insert_node(load_node->instr, move_node))
446 return false;
447
448 /* insert the new node as predecessor */
449 ppir_node_foreach_pred_safe(node, dep) {
450 ppir_node *pred = dep->pred;
451 ppir_node_remove_dep(dep);
452 ppir_node_add_dep(load_node, pred);
453 }
454 ppir_node_add_dep(node, move_node);
455 ppir_node_add_dep(move_node, load_node);
456
457 update_src:
458 /* switch node src to use the new ssa instead */
459 src->type = ppir_target_ssa;
460 src->ssa = &move_alu->dest.ssa;
461
462 return move_alu;
463 }
464
465 static ppir_reg *create_reg(ppir_compiler *comp, int num_components)
466 {
467 ppir_reg *r = rzalloc(comp, ppir_reg);
468 if (!r)
469 return NULL;
470
471 r->num_components = num_components;
472 r->live_in = INT_MAX;
473 r->live_out = 0;
474 r->is_head = false;
475 list_addtail(&r->list, &comp->reg_list);
476
477 return r;
478 }
479
480 static bool ppir_update_spilled_dest(ppir_compiler *comp, ppir_block *block,
481 ppir_node *node, ppir_dest *dest)
482 {
483 assert(dest != NULL);
484 ppir_reg *reg = NULL;
485 if (dest->type == ppir_target_register) {
486 reg = dest->reg;
487 reg->num_components = 4;
488 reg->spilled = true;
489 }
490 else {
491 reg = create_reg(comp, 4);
492 reg->spilled = true;
493 list_del(&dest->ssa.list);
494 }
495
496 /* alloc new node to load value */
497 ppir_node *load_node = ppir_node_create(block, ppir_op_load_temp, -1, 0);
498 if (!load_node)
499 return NULL;
500 list_addtail(&load_node->list, &node->list);
501
502 ppir_load_node *load = ppir_node_to_load(load_node);
503
504 load->index = -comp->prog->stack_size; /* index sizes are negative */
505 load->num_components = 4;
506
507 load->dest.type = ppir_target_pipeline;
508 load->dest.pipeline = ppir_pipeline_reg_uniform;
509 load->dest.write_mask = 0xf;
510
511 create_new_instr_before(block, node->instr, load_node);
512
513 /* Create move node */
514 ppir_node *move_node = ppir_node_create(block, ppir_op_mov, -1 , 0);
515 if (unlikely(!move_node))
516 return false;
517 list_addtail(&move_node->list, &node->list);
518
519 ppir_alu_node *move_alu = ppir_node_to_alu(move_node);
520
521 move_alu->num_src = 1;
522 move_alu->src->type = ppir_target_pipeline;
523 move_alu->src->pipeline = ppir_pipeline_reg_uniform;
524 for (int i = 0; i < 4; i++)
525 move_alu->src->swizzle[i] = i;
526
527 move_alu->dest.type = ppir_target_register;
528 move_alu->dest.reg = reg;
529 move_alu->dest.write_mask = 0x0f;
530
531 if (!ppir_instr_insert_node(load_node->instr, move_node))
532 return false;
533
534 ppir_node_foreach_pred_safe(node, dep) {
535 ppir_node *pred = dep->pred;
536 ppir_node_remove_dep(dep);
537 ppir_node_add_dep(load_node, pred);
538 }
539 ppir_node_add_dep(node, move_node);
540 ppir_node_add_dep(move_node, load_node);
541
542 dest->type = ppir_target_register;
543 dest->reg = reg;
544
545 /* alloc new node to store value */
546 ppir_node *store_node = ppir_node_create(block, ppir_op_store_temp, -1, 0);
547 if (!store_node)
548 return false;
549 list_addtail(&store_node->list, &node->list);
550
551 ppir_store_node *store = ppir_node_to_store(store_node);
552
553 store->index = -comp->prog->stack_size; /* index sizes are negative */
554 store->num_components = 4;
555
556 store->src.type = ppir_target_register;
557 store->src.reg = dest->reg;
558
559 /* insert the new node as successor */
560 ppir_node_foreach_succ_safe(node, dep) {
561 ppir_node *succ = dep->succ;
562 ppir_node_remove_dep(dep);
563 ppir_node_add_dep(succ, store_node);
564 }
565 ppir_node_add_dep(store_node, node);
566
567 create_new_instr_after(block, node->instr, store_node);
568
569 return true;
570 }
571
572 static bool ppir_regalloc_spill_reg(ppir_compiler *comp, ppir_reg *chosen)
573 {
574 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
575 list_for_each_entry(ppir_node, node, &block->node_list, list) {
576
577 ppir_dest *dest = ppir_node_get_dest(node);
578 ppir_reg *reg = NULL;
579 if (dest) {
580 if (dest->type == ppir_target_ssa)
581 reg = &dest->ssa;
582 else if (dest->type == ppir_target_register)
583 reg = dest->reg;
584
585 if (reg == chosen)
586 ppir_update_spilled_dest(comp, block, node, dest);
587 }
588
589 switch (node->type) {
590 case ppir_node_type_alu:
591 {
592 /* alu nodes may have multiple references to the same value.
593 * try to avoid unnecessary loads for the same alu node by
594 * saving the node resulting from the temporary load */
595 ppir_alu_node *move_alu = NULL;
596 ppir_alu_node *alu = ppir_node_to_alu(node);
597 for (int i = 0; i < alu->num_src; i++) {
598 reg = get_src_reg(alu->src + i);
599 if (reg == chosen) {
600 move_alu = ppir_update_spilled_src(comp, block, node,
601 alu->src + i, move_alu);
602 }
603 }
604 break;
605 }
606 case ppir_node_type_store:
607 {
608 ppir_store_node *store = ppir_node_to_store(node);
609 reg = get_src_reg(&store->src);
610 if (reg == chosen) {
611 ppir_update_spilled_src(comp, block, node, &store->src, NULL);
612 }
613 break;
614 }
615 case ppir_node_type_load:
616 {
617 ppir_load_node *load = ppir_node_to_load(node);
618 reg = get_src_reg(&load->src);
619 if (reg == chosen) {
620 ppir_update_spilled_src(comp, block, node, &load->src, NULL);
621 }
622 break;
623 }
624 case ppir_node_type_load_texture:
625 {
626 ppir_load_texture_node *load_tex = ppir_node_to_load_texture(node);
627 reg = get_src_reg(&load_tex->src_coords);
628 if (reg == chosen) {
629 ppir_update_spilled_src(comp, block, node, &load_tex->src_coords,
630 NULL);
631 }
632 break;
633 }
634 case ppir_node_type_branch:
635 {
636 ppir_branch_node *branch = ppir_node_to_branch(node);
637 for (int i = 0; i < 2; i++) {
638 reg = get_src_reg(branch->src + i);
639 if (reg == chosen) {
640 ppir_update_spilled_src(comp, block, node,
641 branch->src + i, NULL);
642 }
643 }
644 break;
645 }
646 default:
647 break;
648 }
649 }
650 }
651
652 return true;
653 }
654
655 static ppir_reg *ppir_regalloc_choose_spill_node(ppir_compiler *comp,
656 struct ra_graph *g)
657 {
658 int max_range = -1;
659 ppir_reg *chosen = NULL;
660
661 list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
662 int range = reg->live_out - reg->live_in;
663
664 if (!reg->spilled && reg->live_out != INT_MAX && range > max_range) {
665 chosen = reg;
666 max_range = range;
667 }
668 }
669
670 if (chosen)
671 chosen->spilled = true;
672
673 return chosen;
674 }
675
676 static void ppir_regalloc_reset_liveness_info(ppir_compiler *comp)
677 {
678 list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
679 reg->live_in = INT_MAX;
680 reg->live_out = 0;
681 }
682 }
683
684 int lima_ppir_force_spilling = 0;
685
686 static bool ppir_regalloc_prog_try(ppir_compiler *comp, bool *spilled)
687 {
688 ppir_reg *end_reg;
689
690 ppir_regalloc_reset_liveness_info(comp);
691 end_reg = ppir_regalloc_build_liveness_info(comp);
692
693 struct ra_graph *g = ra_alloc_interference_graph(
694 comp->ra, list_length(&comp->reg_list));
695
696 int n = 0, end_reg_index = 0;
697 list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
698 int c = ppir_ra_reg_class_vec1 + (reg->num_components - 1);
699 if (reg->is_head)
700 c += 4;
701 if (reg == end_reg)
702 end_reg_index = n;
703 ra_set_node_class(g, n++, c);
704 }
705
706 int n1 = 0;
707 list_for_each_entry(ppir_reg, reg1, &comp->reg_list, list) {
708 int n2 = n1 + 1;
709 list_for_each_entry_from(ppir_reg, reg2, reg1->list.next,
710 &comp->reg_list, list) {
711 bool interference = false;
712 if (reg1->live_in < reg2->live_in) {
713 if (reg1->live_out > reg2->live_in)
714 interference = true;
715 }
716 else if (reg1->live_in > reg2->live_in) {
717 if (reg2->live_out > reg1->live_in)
718 interference = true;
719 }
720 else
721 interference = true;
722
723 if (interference)
724 ra_add_node_interference(g, n1, n2);
725
726 n2++;
727 }
728 n1++;
729 }
730
731 ra_set_node_reg(g, end_reg_index, ppir_ra_reg_base[ppir_ra_reg_class_vec4]);
732
733 *spilled = false;
734 bool ok = ra_allocate(g);
735 if (!ok || (comp->force_spilling-- > 0)) {
736 ppir_reg *chosen = ppir_regalloc_choose_spill_node(comp, g);
737 if (chosen) {
738 /* stack_size will be used to assemble the frame reg in lima_draw.
739 * It is also be used in the spilling code, as negative indices
740 * starting from -1, to create stack addresses. */
741 comp->prog->stack_size++;
742 ppir_regalloc_spill_reg(comp, chosen);
743 /* Ask the outer loop to call back in. */
744 *spilled = true;
745
746 ppir_debug("ppir: spilled register\n");
747 goto err_out;
748 }
749
750 ppir_error("ppir: regalloc fail\n");
751 goto err_out;
752 }
753
754 n = 0;
755 list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
756 int reg_index = ra_get_node_reg(g, n++);
757 reg->index = get_phy_reg_index(reg_index);
758 }
759
760 ralloc_free(g);
761
762 if (lima_debug & LIMA_DEBUG_PP)
763 ppir_regalloc_print_result(comp);
764
765 return true;
766
767 err_out:
768 ralloc_free(g);
769 return false;
770 }
771
772 bool ppir_regalloc_prog(ppir_compiler *comp)
773 {
774 bool spilled = false;
775 comp->prog->stack_size = 0;
776
777 /* Set from an environment variable to force spilling
778 * for debugging purposes, see lima_screen.c */
779 comp->force_spilling = lima_ppir_force_spilling;
780
781 ppir_regalloc_update_reglist_ssa(comp);
782
783 /* No registers? Probably shader consists of discard instruction */
784 if (list_empty(&comp->reg_list))
785 return true;
786
787 /* this will most likely succeed in the first
788 * try, except for very complicated shaders */
789 while (!ppir_regalloc_prog_try(comp, &spilled))
790 if (!spilled)
791 return false;
792
793 return true;
794 }