nv50: yet another case we need a nop.exit
[mesa.git] / src / gallium / drivers / nv50 / nv50_pc_regalloc.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "nv50_context.h"
24 #include "nv50_pc.h"
25
26 #include "util/u_simple_list.h"
27
28 #define NUM_REGISTER_FILES 4
29
30 struct register_set {
31 struct nv_pc *pc;
32
33 uint32_t last[NUM_REGISTER_FILES];
34 uint32_t bits[NUM_REGISTER_FILES][8];
35 };
36
37 struct nv_pc_pass {
38 struct nv_pc *pc;
39
40 struct nv_instruction **insns;
41 int num_insns;
42
43 uint pass_seq;
44 };
45
46 static void
47 ranges_coalesce(struct nv_range *range)
48 {
49 while (range->next && range->end >= range->next->bgn) {
50 struct nv_range *rnn = range->next->next;
51 assert(range->bgn <= range->next->bgn);
52 range->end = MAX2(range->end, range->next->end);
53 FREE(range->next);
54 range->next = rnn;
55 }
56 }
57
58 static boolean
59 add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range)
60 {
61 struct nv_range *range, **nextp = &val->livei;
62
63 for (range = val->livei; range; range = range->next) {
64 if (end < range->bgn)
65 break; /* insert before */
66
67 if (bgn > range->end) {
68 nextp = &range->next;
69 continue; /* insert after */
70 }
71
72 /* overlap */
73 if (bgn < range->bgn) {
74 range->bgn = bgn;
75 if (end > range->end)
76 range->end = end;
77 ranges_coalesce(range);
78 return TRUE;
79 }
80 if (end > range->end) {
81 range->end = end;
82 ranges_coalesce(range);
83 return TRUE;
84 }
85 assert(bgn >= range->bgn);
86 assert(end <= range->end);
87 return TRUE;
88 }
89
90 if (!new_range)
91 new_range = CALLOC_STRUCT(nv_range);
92
93 new_range->bgn = bgn;
94 new_range->end = end;
95 new_range->next = range;
96 *(nextp) = new_range;
97 return FALSE;
98 }
99
100 static void
101 add_range(struct nv_value *val, struct nv_basic_block *b, int end)
102 {
103 int bgn;
104
105 if (!val->insn) /* ignore non-def values */
106 return;
107 assert(b->entry->serial <= b->exit->serial);
108 assert(b->phi->serial <= end);
109 assert(b->exit->serial + 1 >= end);
110
111 bgn = val->insn->serial;
112 if (bgn < b->entry->serial || bgn > b->exit->serial)
113 bgn = b->entry->serial;
114
115 if (bgn > end) {
116 debug_printf("Aieee! BLOCK [%i, %i], RANGE [%i, %i)\n",
117 b->entry->serial, b->exit->serial, bgn, end);
118 }
119 assert(bgn <= end);
120
121 if (bgn < val->insn->serial)
122 debug_printf("WARNING: leaking value %i ?\n", val->n);
123
124 add_range_ex(val, bgn, end, NULL);
125 }
126
127 #ifdef NV50_RA_DEBUG_JOIN
128 static void
129 livei_print(struct nv_value *a)
130 {
131 struct nv_range *r = a->livei;
132
133 debug_printf("livei %i: ", a->n);
134 while (r) {
135 debug_printf("[%i, %i) ", r->bgn, r->end);
136 r = r->next;
137 }
138 debug_printf("\n");
139 }
140 #endif
141
142 static void
143 livei_unify(struct nv_value *dst, struct nv_value *src)
144 {
145 struct nv_range *range, *next;
146
147 for (range = src->livei; range; range = next) {
148 next = range->next;
149 if (add_range_ex(dst, range->bgn, range->end, range))
150 FREE(range);
151 }
152 src->livei = NULL;
153 }
154
155 static void
156 livei_release(struct nv_value *val)
157 {
158 struct nv_range *range, *next;
159
160 for (range = val->livei; range; range = next) {
161 next = range->next;
162 FREE(range);
163 }
164 }
165
166 static boolean
167 livei_have_overlap(struct nv_value *a, struct nv_value *b)
168 {
169 struct nv_range *r_a, *r_b;
170
171 for (r_a = a->livei; r_a; r_a = r_a->next) {
172 for (r_b = b->livei; r_b; r_b = r_b->next) {
173 if (r_b->bgn < r_a->end &&
174 r_b->end > r_a->bgn)
175 return TRUE;
176 }
177 }
178 return FALSE;
179 }
180
181 static int
182 livei_end(struct nv_value *a)
183 {
184 struct nv_range *r = a->livei;
185
186 assert(r);
187 while (r->next)
188 r = r->next;
189 return r->end;
190 }
191
192 static boolean
193 livei_contains(struct nv_value *a, int pos)
194 {
195 struct nv_range *r;
196
197 for (r = a->livei; r && r->bgn <= pos; r = r->next)
198 if (r->end > pos)
199 return TRUE;
200 return FALSE;
201 }
202
203 static boolean
204 reg_assign(struct register_set *set, struct nv_value **def, int n)
205 {
206 int i, id, s;
207 uint m;
208 int f = def[0]->reg.file;
209
210 s = n << (nv_type_order(def[0]->reg.type) - 1);
211 m = (1 << s) - 1;
212
213 id = set->last[f];
214
215 for (i = 0; i * 32 < set->last[f]; ++i) {
216 if (set->bits[f][i] == 0xffffffff)
217 continue;
218
219 for (id = 0; id < 32; id += s)
220 if (!(set->bits[f][i] & (m << id)))
221 break;
222 if (id < 32)
223 break;
224 }
225 if (i * 32 + id > set->last[f])
226 return FALSE;
227
228 set->bits[f][i] |= m << id;
229
230 id += i * 32;
231
232 set->pc->max_reg[f] = MAX2(set->pc->max_reg[f], id + s - 1);
233
234 id >>= nv_type_order(def[0]->reg.type) - 1;
235
236 for (i = 0; i < n; ++i)
237 if (def[i]->livei)
238 def[i]->reg.id = id++;
239
240 return TRUE;
241 }
242
243 static INLINE void
244 reg_occupy(struct register_set *set, struct nv_value *val)
245 {
246 int s, id = val->reg.id, f = val->reg.file;
247 uint m;
248
249 if (id < 0)
250 return;
251 s = nv_type_order(val->reg.type) - 1;
252 id <<= s;
253 m = (1 << (1 << s)) - 1;
254
255 set->bits[f][id / 32] |= m << (id % 32);
256
257 if (set->pc->max_reg[f] < id)
258 set->pc->max_reg[f] = id;
259 }
260
261 static INLINE void
262 reg_release(struct register_set *set, struct nv_value *val)
263 {
264 int s, id = val->reg.id, f = val->reg.file;
265 uint m;
266
267 if (id < 0)
268 return;
269
270 s = nv_type_order(val->reg.type) - 1;
271 id <<= s;
272 m = (1 << (1 << s)) - 1;
273
274 set->bits[f][id / 32] &= ~(m << (id % 32));
275 }
276
277 static INLINE boolean
278 join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
279 {
280 int i;
281 struct nv_value *val;
282
283 if (a->reg.file != b->reg.file ||
284 nv_type_sizeof(a->reg.type) != nv_type_sizeof(b->reg.type))
285 return FALSE;
286
287 if (a->join->reg.id == b->join->reg.id)
288 return TRUE;
289
290 #if 1
291 /* either a or b or both have been assigned */
292
293 if (a->join->reg.id >= 0 && b->join->reg.id >= 0)
294 return FALSE;
295 else
296 if (b->join->reg.id >= 0) {
297 if (a->join->reg.id >= 0)
298 return FALSE;
299 val = a;
300 a = b;
301 b = val;
302 }
303
304 for (i = 0; i < ctx->pc->num_values; ++i) {
305 val = &ctx->pc->values[i];
306
307 if (val->join->reg.id != a->join->reg.id)
308 continue;
309 if (val->join != a->join && livei_have_overlap(val->join, b->join))
310 return FALSE;
311 }
312 return TRUE;
313 #endif
314 return FALSE;
315 }
316
317 static INLINE void
318 do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
319 {
320 int j;
321 struct nv_value *bjoin = b->join;
322
323 if (b->join->reg.id >= 0)
324 a->join->reg.id = b->join->reg.id;
325
326 livei_unify(a->join, b->join);
327
328 #ifdef NV50_RA_DEBUG_JOIN
329 debug_printf("joining %i to %i\n", b->n, a->n);
330 #endif
331
332 /* make a->join the new representative */
333 for (j = 0; j < ctx->pc->num_values; ++j)
334 if (ctx->pc->values[j].join == bjoin)
335 ctx->pc->values[j].join = a->join;
336
337 assert(b->join == a->join);
338 }
339
340 static INLINE void
341 try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
342 {
343 if (!join_allowed(ctx, a, b)) {
344 #ifdef NV50_RA_DEBUG_JOIN
345 debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n);
346 #endif
347 return;
348 }
349 if (livei_have_overlap(a->join, b->join)) {
350 #ifdef NV50_RA_DEBUG_JOIN
351 debug_printf("cannot join %i to %i: livei overlap\n", b->n, a->n);
352 livei_print(a);
353 livei_print(b);
354 #endif
355 return;
356 }
357
358 do_join_values(ctx, a, b);
359 }
360
361 static INLINE boolean
362 need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p)
363 {
364 int i = 0, n = 0;
365
366 for (; i < 2; ++i)
367 if (p->out[i] && p->out_kind[i] != CFG_EDGE_LOOP_LEAVE)
368 ++n;
369
370 return (b->num_in > 1) && (n == 2);
371 }
372
373 /* For each operand of each PHI in b, generate a new value by inserting a MOV
374 * at the end of the block it is coming from and replace the operand with its
375 * result. This eliminates liveness conflicts and enables us to let values be
376 * copied to the right register if such a conflict exists nonetheless.
377 */
378 static int
379 pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
380 {
381 struct nv_instruction *i, *ni;
382 struct nv_value *val;
383 struct nv_basic_block *p, *pn;
384 int n, j;
385
386 b->pass_seq = ctx->pc->pass_seq;
387
388 for (n = 0; n < b->num_in; ++n) {
389 p = pn = b->in[n];
390 assert(p);
391
392 if (need_new_else_block(b, p)) {
393 pn = new_basic_block(ctx->pc);
394
395 if (p->out[0] == b)
396 p->out[0] = pn;
397 else
398 p->out[1] = pn;
399
400 if (p->exit->target == b) /* target to new else-block */
401 p->exit->target = pn;
402
403 b->in[n] = pn;
404
405 pn->out[0] = b;
406 pn->in[0] = p;
407 pn->num_in = 1;
408 }
409 ctx->pc->current_block = pn;
410
411 for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) {
412 for (j = 0; j < 4 && i->src[j]; ++j) {
413 if (nvbb_reachable_by(p, i->src[j]->value->insn->bb, b))
414 break;
415 }
416 if (j >= 4 || !i->src[j])
417 continue;
418 val = i->src[j]->value;
419
420 ni = new_instruction(ctx->pc, NV_OP_MOV);
421
422 /* TODO: insert instruction at correct position in the first place */
423 if (ni->prev && ni->prev->target)
424 nv_nvi_permute(ni->prev, ni);
425
426 ni->def[0] = new_value(ctx->pc, val->reg.file, val->reg.type);
427 ni->def[0]->insn = ni;
428 ni->src[0] = new_ref(ctx->pc, val);
429
430 nv_reference(ctx->pc, &i->src[j], ni->def[0]);
431 }
432
433 if (pn != p && pn->exit) {
434 ctx->pc->current_block = b->in[n ? 0 : 1];
435 ni = new_instruction(ctx->pc, NV_OP_BRA);
436 ni->target = b;
437 ni->is_terminator = 1;
438 }
439 }
440
441 for (j = 0; j < 2; ++j)
442 if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq)
443 pass_generate_phi_movs(ctx, b->out[j]);
444
445 return 0;
446 }
447
448 static int
449 pass_join_values(struct nv_pc_pass *ctx, int iter)
450 {
451 int c, n;
452
453 for (n = 0; n < ctx->num_insns; ++n) {
454 struct nv_instruction *i = ctx->insns[n];
455
456 switch (i->opcode) {
457 case NV_OP_PHI:
458 if (!iter)
459 continue;
460 try_join_values(ctx, i->src[0]->value, i->src[1]->value);
461 try_join_values(ctx, i->def[0], i->src[0]->value);
462 break;
463 case NV_OP_MOV:
464 if (iter && i->src[0]->value->insn &&
465 !nv_is_vector_op(i->src[0]->value->join->insn->opcode))
466 try_join_values(ctx, i->def[0], i->src[0]->value);
467 break;
468 case NV_OP_SELECT:
469 if (!iter)
470 break;
471 assert(join_allowed(ctx, i->def[0], i->src[0]->value));
472 assert(join_allowed(ctx, i->def[0], i->src[1]->value));
473 do_join_values(ctx, i->def[0], i->src[0]->value);
474 do_join_values(ctx, i->def[0], i->src[1]->value);
475 break;
476 case NV_OP_TEX:
477 case NV_OP_TXB:
478 case NV_OP_TXL:
479 case NV_OP_TXQ:
480 if (iter)
481 break;
482 for (c = 0; c < 4; ++c) {
483 if (!i->src[c])
484 break;
485 do_join_values(ctx, i->def[c], i->src[c]->value);
486 }
487 break;
488 default:
489 break;
490 }
491 }
492 return 0;
493 }
494
495 /* Order the instructions so that live intervals can be expressed in numbers. */
496 static void
497 pass_order_instructions(void *priv, struct nv_basic_block *b)
498 {
499 struct nv_pc_pass *ctx = (struct nv_pc_pass *)priv;
500 struct nv_instruction *i;
501
502 b->pass_seq = ctx->pc->pass_seq;
503
504 assert(!b->exit || !b->exit->next);
505 for (i = b->phi; i; i = i->next) {
506 i->serial = ctx->num_insns;
507 ctx->insns[ctx->num_insns++] = i;
508 }
509 }
510
511 static void
512 bb_live_set_print(struct nv_pc *pc, struct nv_basic_block *b)
513 {
514 #ifdef NV50_RA_DEBUG_LIVE_SETS
515 int j;
516 struct nv_value *val;
517
518 debug_printf("LIVE-INs of BB:%i: ", b->id);
519
520 for (j = 0; j < pc->num_values; ++j) {
521 if (!(b->live_set[j / 32] & (1 << (j % 32))))
522 continue;
523 val = &pc->values[j];
524 if (!val->insn)
525 continue;
526 debug_printf("%i ", val->n);
527 }
528 debug_printf("\n");
529 #endif
530 }
531
532 static INLINE void
533 live_set_add(struct nv_basic_block *b, struct nv_value *val)
534 {
535 if (!val->insn) /* don't add non-def values */
536 return;
537 b->live_set[val->n / 32] |= 1 << (val->n % 32);
538 }
539
540 static INLINE void
541 live_set_rem(struct nv_basic_block *b, struct nv_value *val)
542 {
543 b->live_set[val->n / 32] &= ~(1 << (val->n % 32));
544 }
545
546 static INLINE boolean
547 live_set_test(struct nv_basic_block *b, struct nv_ref *ref)
548 {
549 int n = ref->value->n;
550 return b->live_set[n / 32] & (1 << (n % 32));
551 }
552
553 /* The live set of a block contains those values that are live immediately
554 * before the beginning of the block, so do a backwards scan.
555 */
556 static int
557 pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b)
558 {
559 struct nv_instruction *i;
560 int j, n, ret = 0;
561
562 debug_printf("pass_build_live_sets BB:%i\n", b->id);
563
564 if (b->pass_seq >= ctx->pc->pass_seq) {
565 debug_printf("already visited\n");
566 return 0;
567 }
568 b->pass_seq = ctx->pc->pass_seq;
569
570 /* slight hack for undecidedness: set phi = entry if it's undefined */
571 if (!b->phi)
572 b->phi = b->entry;
573
574 for (n = 0; n < 2; ++n) {
575 if (!b->out[n] || b->out[n] == b)
576 continue;
577 ret = pass_build_live_sets(ctx, b->out[n]);
578 if (ret)
579 return ret;
580
581 if (n == 0) {
582 for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
583 b->live_set[j] = b->out[n]->live_set[j];
584 } else {
585 for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
586 b->live_set[j] |= b->out[n]->live_set[j];
587 }
588
589 /* Kick values out of our live set that are created in incoming
590 * blocks of our successors that are not us.
591 */
592 for (i = b->out[n]->phi; i && i->opcode == NV_OP_PHI; i = i->next) {
593 for (j = 0; j < 4; ++j) {
594 if (!i->src[j])
595 break;
596 assert(i->src[j]->value->insn);
597
598 if (nvbb_reachable_by(b, i->src[j]->value->insn->bb, b->out[n])) {
599 live_set_add(b, i->src[j]->value);
600 debug_printf("BB:%i liveset + %i\n", b->id, i->src[j]->value->n);
601 } else {
602 live_set_rem(b, i->src[j]->value);
603 debug_printf("BB:%i liveset - %i\n", b->id, i->src[j]->value->n);
604 }
605 }
606 }
607 }
608
609 if (!b->entry)
610 return 0;
611
612 bb_live_set_print(ctx->pc, b);
613
614 for (i = b->exit; i; i = i->prev) {
615 for (j = 0; j < 4; j++) {
616 if (!i->def[j])
617 break;
618 live_set_rem(b, i->def[j]);
619 }
620 for (j = 0; j < 4; j++) {
621 if (!i->src[j])
622 break;
623 live_set_add(b, i->src[j]->value);
624 }
625 if (i->src[4])
626 live_set_add(b, i->src[4]->value);
627 if (i->flags_def)
628 live_set_rem(b, i->flags_def);
629 if (i->flags_src)
630 live_set_add(b, i->flags_src->value);
631 }
632 bb_live_set_print(ctx->pc, b);
633
634 return 0;
635 }
636
637 static void collect_live_values(struct nv_basic_block *b, const int n)
638 {
639 int i;
640
641 if (b->out[0]) {
642 if (b->out[1]) { /* what to do about back-edges ? */
643 for (i = 0; i < n; ++i)
644 b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i];
645 } else {
646 memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t));
647 }
648 } else
649 if (b->out[1]) {
650 memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t));
651 } else {
652 memset(b->live_set, 0, n * sizeof(uint32_t));
653 }
654 }
655
656 /* NOTE: the live intervals of phi functions start the the first non-phi instruction */
657 static int
658 pass_build_intervals(struct nv_pc_pass *ctx, struct nv_basic_block *b)
659 {
660 struct nv_instruction *i, *i_stop;
661 int j, s;
662 const int n = (ctx->pc->num_values + 31) / 32;
663
664 debug_printf("building intervals for BB %i\n", b->id);
665
666 /* verify that first block does not have live-in values */
667 if (b->num_in == 0)
668 for (j = 0; j < n; ++j)
669 assert(b->live_set[j] == 0);
670
671 collect_live_values(b, n);
672
673 /* remove live-outs def'd in a parallel block, hopefully they're all phi'd */
674 for (j = 0; j < 2; ++j) {
675 if (!b->out[j] || !b->out[j]->phi)
676 continue;
677 for (i = b->out[j]->phi; i->opcode == NV_OP_PHI; i = i->next) {
678 live_set_rem(b, i->def[0]);
679
680 for (s = 0; s < 4; ++s) {
681 if (!i->src[s])
682 break;
683 assert(i->src[s]->value->insn);
684 if (nvbb_reachable_by(b, i->src[s]->value->insn->bb, b->out[j]))
685 live_set_add(b, i->src[s]->value);
686 else
687 live_set_rem(b, i->src[s]->value);
688 }
689 }
690 }
691
692 /* remaining live-outs are live until the end */
693 if (b->exit) {
694 for (j = 0; j < ctx->pc->num_values; ++j) {
695 if (!(b->live_set[j / 32] & (1 << (j % 32))))
696 continue;
697 #ifdef NV50_RA_DEBUG_LIVEI
698 debug_printf("adding range for live value %i\n", j);
699 #endif
700 add_range(&ctx->pc->values[j], b, b->exit->serial + 1);
701 }
702 }
703 debug_printf("%s: looping through instructions now\n", __func__);
704
705 i_stop = b->entry ? b->entry->prev : NULL;
706
707 /* don't have to include phi functions here (will have 0 live range) */
708 for (i = b->exit; i != i_stop; i = i->prev) {
709 assert(i->serial >= b->phi->serial && i->serial <= b->exit->serial);
710 for (j = 0; j < 4; ++j) {
711 if (i->def[j])
712 live_set_rem(b, i->def[j]);
713 }
714 if (i->flags_def)
715 live_set_rem(b, i->flags_def);
716
717 for (j = 0; j < 5; ++j) {
718 if (i->src[j] && !live_set_test(b, i->src[j])) {
719 live_set_add(b, i->src[j]->value);
720 #ifdef NV50_RA_DEBUG_LIVEI
721 debug_printf("adding range for source that ends living: %i\n",
722 i->src[j]->value->n);
723 #endif
724 add_range(i->src[j]->value, b, i->serial);
725 }
726 }
727 if (i->flags_src && !live_set_test(b, i->flags_src)) {
728 live_set_add(b, i->flags_src->value);
729 #ifdef NV50_RA_DEBUG_LIVEI
730 debug_printf("adding range for source that ends living: %i\n",
731 i->flags_src->value->n);
732 #endif
733 add_range(i->flags_src->value, b, i->serial);
734 }
735 }
736
737 b->pass_seq = ctx->pc->pass_seq;
738
739 if (b->out[0] && b->out[0]->pass_seq < ctx->pc->pass_seq)
740 pass_build_intervals(ctx, b->out[0]);
741
742 if (b->out[1] && b->out[1]->pass_seq < ctx->pc->pass_seq)
743 pass_build_intervals(ctx, b->out[1]);
744
745 return 0;
746 }
747
748 static INLINE void
749 nv50_ctor_register_set(struct nv_pc *pc, struct register_set *set)
750 {
751 memset(set, 0, sizeof(*set));
752
753 set->last[NV_FILE_GPR] = 255;
754 set->last[NV_FILE_OUT] = 127;
755 set->last[NV_FILE_FLAGS] = 4;
756 set->last[NV_FILE_ADDR] = 4;
757
758 set->pc = pc;
759 }
760
761 static void
762 insert_ordered_tail(struct nv_value *list, struct nv_value *nval)
763 {
764 struct nv_value *elem = list->prev;
765
766 // debug_printf("inserting value %i\n", nval->n);
767
768 for (elem = list->prev;
769 elem != list && elem->livei->bgn > nval->livei->bgn;
770 elem = elem->prev);
771 /* now elem begins before or at the same time as val */
772
773 nval->prev = elem;
774 nval->next = elem->next;
775 elem->next->prev = nval;
776 elem->next = nval;
777 }
778
779 static int
780 pass_linear_scan(struct nv_pc_pass *ctx, int iter)
781 {
782 struct nv_instruction *i;
783 struct register_set f, free;
784 int k, n;
785 struct nv_value *cur, *val, *tmp[2];
786 struct nv_value active, inactive, handled, unhandled;
787
788 make_empty_list(&active);
789 make_empty_list(&inactive);
790 make_empty_list(&handled);
791 make_empty_list(&unhandled);
792
793 nv50_ctor_register_set(ctx->pc, &free);
794
795 /* joined values should have range = NULL and thus not be added;
796 * also, fixed memory values won't be added because they're not
797 * def'd, just used
798 */
799 for (n = 0; n < ctx->num_insns; ++n) {
800 i = ctx->insns[n];
801
802 for (k = 0; k < 4; ++k) {
803 if (i->def[k] && i->def[k]->livei)
804 insert_ordered_tail(&unhandled, i->def[k]);
805 else
806 if (0 && i->def[k])
807 debug_printf("skipping def'd value %i: no livei\n", i->def[k]->n);
808 }
809 if (i->flags_def && i->flags_def->livei)
810 insert_ordered_tail(&unhandled, i->flags_def);
811 }
812
813 for (val = unhandled.next; val != unhandled.prev; val = val->next) {
814 assert(val->join == val);
815 assert(val->livei->bgn <= val->next->livei->bgn);
816 }
817
818 foreach_s(cur, tmp[0], &unhandled) {
819 remove_from_list(cur);
820
821 /* debug_printf("handling value %i\n", cur->n); */
822
823 foreach_s(val, tmp[1], &active) {
824 if (livei_end(val) <= cur->livei->bgn) {
825 reg_release(&free, val);
826 move_to_head(&handled, val);
827 } else
828 if (!livei_contains(val, cur->livei->bgn)) {
829 reg_release(&free, val);
830 move_to_head(&inactive, val);
831 }
832 }
833
834 foreach_s(val, tmp[1], &inactive) {
835 if (livei_end(val) <= cur->livei->bgn)
836 move_to_head(&handled, val);
837 else
838 if (livei_contains(val, cur->livei->bgn)) {
839 reg_occupy(&free, val);
840 move_to_head(&active, val);
841 }
842 }
843
844 f = free;
845
846 foreach(val, &inactive)
847 if (livei_have_overlap(val, cur))
848 reg_occupy(&f, val);
849
850 foreach(val, &unhandled)
851 if (val->reg.id >= 0 && livei_have_overlap(val, cur))
852 reg_occupy(&f, val);
853
854 if (cur->reg.id < 0) {
855 boolean mem = FALSE;
856
857 if (nv_is_vector_op(cur->insn->opcode))
858 mem = !reg_assign(&f, &cur->insn->def[0], 4);
859 else
860 if (iter)
861 mem = !reg_assign(&f, &cur, 1);
862
863 if (mem) {
864 NOUVEAU_ERR("out of registers\n");
865 abort();
866 }
867 }
868 insert_at_head(&active, cur);
869 reg_occupy(&free, cur);
870 }
871
872 return 0;
873 }
874
875 int
876 nv_pc_exec_pass1(struct nv_pc *pc)
877 {
878 struct nv_pc_pass *ctx;
879 int i, ret;
880
881 debug_printf("REGISTER ALLOCATION - entering\n");
882
883 ctx = CALLOC_STRUCT(nv_pc_pass);
884 if (!ctx)
885 return -1;
886 ctx->pc = pc;
887
888 nv_print_program(ctx->pc->root);
889
890 ctx->insns = CALLOC(NV_PC_MAX_INSTRUCTIONS, sizeof(struct nv_instruction *));
891
892 pc->pass_seq++;
893 ret = pass_generate_phi_movs(ctx, pc->root);
894 assert(!ret);
895
896 nv_print_program(ctx->pc->root);
897
898 for (i = 0; i < pc->loop_nesting_bound; ++i) {
899 pc->pass_seq++;
900 ret = pass_build_live_sets(ctx, pc->root);
901 assert(!ret && "live sets");
902 if (ret) {
903 NOUVEAU_ERR("failed to build live sets (iteration %d)\n", i);
904 goto out;
905 }
906 }
907
908 pc->pass_seq++;
909 nv_pc_pass_in_order(pc->root, pass_order_instructions, ctx);
910
911 pc->pass_seq++;
912 ret = pass_build_intervals(ctx, pc->root);
913 assert(!ret && "build intervals");
914 if (ret) {
915 NOUVEAU_ERR("failed to build live intervals\n");
916 goto out;
917 }
918
919 #ifdef NV50_RA_DEBUG_LIVEI
920 for (i = 0; i < pc->num_values; ++i)
921 livei_print(&pc->values[i]);
922 #endif
923
924 for (i = 0; i < 2; ++i) {
925 ret = pass_join_values(ctx, i);
926 if (ret)
927 goto out;
928 ret = pass_linear_scan(ctx, i);
929 if (ret)
930 goto out;
931 }
932 assert(!ret && "joining");
933
934 for (i = 0; i < pc->num_values; ++i)
935 livei_release(&pc->values[i]);
936
937 debug_printf("REGISTER ALLOCATION - leaving\n");
938 nv_print_program(ctx->pc->root);
939
940 out:
941 FREE(ctx);
942 return ret;
943 }