nv50: fix for GPR allocation granularity being 16 bit
[mesa.git] / src / gallium / drivers / nv50 / nv50_pc_regalloc.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 /* #define NV50PC_DEBUG */
24
25 /* #define NV50_RA_DEBUG_LIVEI */
26 /* #define NV50_RA_DEBUG_LIVE_SETS */
27 /* #define NV50_RA_DEBUG_JOIN */
28
29 #include "nv50_context.h"
30 #include "nv50_pc.h"
31
32 #include "util/u_simple_list.h"
33
34 #define NUM_REGISTER_FILES 4
35 #define MAX_REGISTER_COUNT 256
36
37 struct register_set {
38 struct nv_pc *pc;
39
40 uint32_t last[NUM_REGISTER_FILES];
41 uint32_t bits[NUM_REGISTER_FILES][(MAX_REGISTER_COUNT + 31) / 32];
42 };
43
44 /* using OR because a set bit means occupied/unavailable, aliasing is allowed */
45 static void
46 intersect_register_sets(struct register_set *dst,
47 struct register_set *src1, struct register_set *src2)
48 {
49 int i, j;
50
51 for (i = 0; i < NUM_REGISTER_FILES; ++i) {
52 for (j = 0; j < (MAX_REGISTER_COUNT + 31) / 32; ++j)
53 dst->bits[i][j] = src1->bits[i][j] | src2->bits[i][j];
54 }
55 }
56
57 static void
58 mask_register_set(struct register_set *set, uint32_t mask, uint32_t umask)
59 {
60 int i, j;
61
62 for (i = 0; i < NUM_REGISTER_FILES; ++i) {
63 for (j = 0; j < (MAX_REGISTER_COUNT + 31) / 32; ++j)
64 set->bits[i][j] = (set->bits[i][j] | mask) & umask;
65 }
66 }
67
68 struct nv_pc_pass {
69 struct nv_pc *pc;
70
71 struct nv_instruction **insns;
72 int num_insns;
73
74 uint pass_seq;
75 };
76
77 static void
78 ranges_coalesce(struct nv_range *range)
79 {
80 while (range->next && range->end >= range->next->bgn) {
81 struct nv_range *rnn = range->next->next;
82 assert(range->bgn <= range->next->bgn);
83 range->end = MAX2(range->end, range->next->end);
84 FREE(range->next);
85 range->next = rnn;
86 }
87 }
88
89 /* @return: TRUE if @new_range can be freed (i.e. was not reused) */
90 static boolean
91 add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range)
92 {
93 struct nv_range *range, **nextp = &val->livei;
94
95 if (bgn == end) /* [a, a) is invalid / empty */
96 return TRUE;
97
98 for (range = val->livei; range; range = range->next) {
99 if (end < range->bgn)
100 break; /* insert before */
101
102 if (bgn > range->end) {
103 nextp = &range->next;
104 continue; /* insert after */
105 }
106
107 /* overlap */
108 if (bgn < range->bgn) {
109 range->bgn = bgn;
110 if (end > range->end)
111 range->end = end;
112 ranges_coalesce(range);
113 return TRUE;
114 }
115 if (end > range->end) {
116 range->end = end;
117 ranges_coalesce(range);
118 return TRUE;
119 }
120 assert(bgn >= range->bgn);
121 assert(end <= range->end);
122 return TRUE;
123 }
124
125 if (!new_range)
126 new_range = CALLOC_STRUCT(nv_range);
127
128 new_range->bgn = bgn;
129 new_range->end = end;
130 new_range->next = range;
131 *(nextp) = new_range;
132 return FALSE;
133 }
134
135 static void
136 add_range(struct nv_value *val, struct nv_basic_block *b, int end)
137 {
138 int bgn;
139
140 if (!val->insn) /* ignore non-def values */
141 return;
142 assert(b->entry->serial <= b->exit->serial);
143 assert(b->phi->serial <= end);
144 assert(b->exit->serial + 1 >= end);
145
146 bgn = val->insn->serial;
147 if (bgn < b->entry->serial || bgn > b->exit->serial)
148 bgn = b->entry->serial;
149
150 assert(bgn <= end);
151
152 add_range_ex(val, bgn, end, NULL);
153 }
154
155 #if defined(NV50_RA_DEBUG_JOIN) || defined(NV50_RA_DEBUG_LIVEI)
156 static void
157 livei_print(struct nv_value *a)
158 {
159 struct nv_range *r = a->livei;
160
161 debug_printf("livei %i: ", a->n);
162 while (r) {
163 debug_printf("[%i, %i) ", r->bgn, r->end);
164 r = r->next;
165 }
166 debug_printf("\n");
167 }
168 #endif
169
170 static void
171 livei_unify(struct nv_value *dst, struct nv_value *src)
172 {
173 struct nv_range *range, *next;
174
175 for (range = src->livei; range; range = next) {
176 next = range->next;
177 if (add_range_ex(dst, range->bgn, range->end, range))
178 FREE(range);
179 }
180 src->livei = NULL;
181 }
182
183 static void
184 livei_release(struct nv_value *val)
185 {
186 struct nv_range *range, *next;
187
188 for (range = val->livei; range; range = next) {
189 next = range->next;
190 FREE(range);
191 }
192 }
193
194 static boolean
195 livei_have_overlap(struct nv_value *a, struct nv_value *b)
196 {
197 struct nv_range *r_a, *r_b;
198
199 for (r_a = a->livei; r_a; r_a = r_a->next) {
200 for (r_b = b->livei; r_b; r_b = r_b->next) {
201 if (r_b->bgn < r_a->end &&
202 r_b->end > r_a->bgn)
203 return TRUE;
204 }
205 }
206 return FALSE;
207 }
208
209 static int
210 livei_end(struct nv_value *a)
211 {
212 struct nv_range *r = a->livei;
213
214 assert(r);
215 while (r->next)
216 r = r->next;
217 return r->end;
218 }
219
220 static boolean
221 livei_contains(struct nv_value *a, int pos)
222 {
223 struct nv_range *r;
224
225 for (r = a->livei; r && r->bgn <= pos; r = r->next)
226 if (r->end > pos)
227 return TRUE;
228 return FALSE;
229 }
230
231 static boolean
232 reg_assign(struct register_set *set, struct nv_value **def, int n)
233 {
234 int i, id, s;
235 uint m;
236 int f = def[0]->reg.file;
237
238 s = n << (nv_type_order(def[0]->reg.type) - 1);
239 m = (1 << s) - 1;
240
241 id = set->last[f];
242
243 for (i = 0; i * 32 < set->last[f]; ++i) {
244 if (set->bits[f][i] == 0xffffffff)
245 continue;
246
247 for (id = 0; id < 32; id += s)
248 if (!(set->bits[f][i] & (m << id)))
249 break;
250 if (id < 32)
251 break;
252 }
253 if (i * 32 + id > set->last[f])
254 return FALSE;
255
256 set->bits[f][i] |= m << id;
257
258 id += i * 32;
259
260 set->pc->max_reg[f] = MAX2(set->pc->max_reg[f], id + s - 1);
261
262 id >>= nv_type_order(def[0]->reg.type) - 1;
263
264 for (i = 0; i < n; ++i)
265 if (def[i]->livei)
266 def[i]->reg.id = id++;
267
268 return TRUE;
269 }
270
271 static INLINE void
272 reg_occupy(struct register_set *set, struct nv_value *val)
273 {
274 int s, id = val->reg.id, f = val->reg.file;
275 uint m;
276
277 if (id < 0)
278 return;
279 s = nv_type_order(val->reg.type) - 1;
280 id <<= s;
281 m = (1 << (1 << s)) - 1;
282
283 assert(s >= 0); /* XXX: remove me */
284
285 set->bits[f][id / 32] |= m << (id % 32);
286
287 if (set->pc->max_reg[f] < id)
288 set->pc->max_reg[f] = id;
289 }
290
291 static INLINE void
292 reg_release(struct register_set *set, struct nv_value *val)
293 {
294 int s, id = val->reg.id, f = val->reg.file;
295 uint m;
296
297 if (id < 0)
298 return;
299
300 s = nv_type_order(val->reg.type) - 1;
301 id <<= s;
302 m = (1 << (1 << s)) - 1;
303
304 set->bits[f][id / 32] &= ~(m << (id % 32));
305 }
306
307 static INLINE boolean
308 join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
309 {
310 int i;
311 struct nv_value *val;
312
313 if (a->reg.file != b->reg.file ||
314 nv_type_sizeof(a->reg.type) != nv_type_sizeof(b->reg.type))
315 return FALSE;
316
317 if (a->join->reg.id == b->join->reg.id)
318 return TRUE;
319
320 /* either a or b or both have been assigned */
321
322 if (a->join->reg.id >= 0 && b->join->reg.id >= 0)
323 return FALSE;
324 else
325 if (b->join->reg.id >= 0) {
326 val = a;
327 a = b;
328 b = val;
329 }
330
331 for (i = 0; i < ctx->pc->num_values; ++i) {
332 val = &ctx->pc->values[i];
333
334 if (val->join->reg.id != a->join->reg.id)
335 continue;
336 if (val->join != a->join && livei_have_overlap(val->join, b->join))
337 return FALSE;
338 }
339 return TRUE;
340 }
341
342 static INLINE void
343 do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
344 {
345 int j;
346 struct nv_value *bjoin = b->join;
347
348 if (b->join->reg.id >= 0)
349 a->join->reg.id = b->join->reg.id;
350
351 livei_unify(a->join, b->join);
352
353 #ifdef NV50_RA_DEBUG_JOIN
354 debug_printf("joining %i to %i\n", b->n, a->n);
355 #endif
356
357 /* make a->join the new representative */
358 for (j = 0; j < ctx->pc->num_values; ++j)
359 if (ctx->pc->values[j].join == bjoin)
360 ctx->pc->values[j].join = a->join;
361
362 assert(b->join == a->join);
363 }
364
365 static INLINE boolean
366 try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
367 {
368 if (!join_allowed(ctx, a, b)) {
369 #ifdef NV50_RA_DEBUG_JOIN
370 debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n);
371 #endif
372 return FALSE;
373 }
374 if (livei_have_overlap(a->join, b->join)) {
375 #ifdef NV50_RA_DEBUG_JOIN
376 debug_printf("cannot join %i to %i: livei overlap\n", b->n, a->n);
377 livei_print(a);
378 livei_print(b);
379 #endif
380 return FALSE;
381 }
382
383 do_join_values(ctx, a, b);
384
385 return TRUE;
386 }
387
388 static void
389 join_values_nofail(struct nv_pc_pass *ctx,
390 struct nv_value *a, struct nv_value *b, boolean type_only)
391 {
392 if (type_only) {
393 assert(join_allowed(ctx, a, b));
394 do_join_values(ctx, a, b);
395 } else {
396 boolean ok = try_join_values(ctx, a, b);
397 if (!ok) {
398 NOUVEAU_ERR("failed to coalesce values\n");
399 }
400 }
401 }
402
403 static INLINE boolean
404 need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p)
405 {
406 int i = 0, n = 0;
407
408 for (; i < 2; ++i)
409 if (p->out[i] && !IS_LOOP_EDGE(p->out_kind[i]))
410 ++n;
411
412 return (b->num_in > 1) && (n == 2);
413 }
414
415 /* Look for the @phi's operand whose definition reaches @b. */
416 static int
417 phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b,
418 struct nv_basic_block *tb)
419 {
420 struct nv_ref *srci, *srcj;
421 int i, j;
422
423 for (j = -1, i = 0; i < 6 && phi->src[i]; ++i) {
424 srci = phi->src[i];
425 /* if already replaced, check with original source first */
426 if (srci->flags & NV_REF_FLAG_REGALLOC_PRIV)
427 srci = srci->value->insn->src[0];
428 if (!nvbb_reachable_by(b, srci->value->insn->bb, NULL))
429 continue;
430 /* NOTE: back-edges are ignored by the reachable-by check */
431 if (j < 0 || !nvbb_reachable_by(srcj->value->insn->bb,
432 srci->value->insn->bb, NULL)) {
433 j = i;
434 srcj = srci;
435 }
436 }
437 if (j >= 0 && nvbb_reachable_by(b, phi->def[0]->insn->bb, NULL))
438 if (!nvbb_reachable_by(srcj->value->insn->bb,
439 phi->def[0]->insn->bb, NULL))
440 j = -1;
441 return j;
442 }
443
444 /* For each operand of each PHI in b, generate a new value by inserting a MOV
445 * at the end of the block it is coming from and replace the operand with its
446 * result. This eliminates liveness conflicts and enables us to let values be
447 * copied to the right register if such a conflict exists nonetheless.
448 *
449 * These MOVs are also crucial in making sure the live intervals of phi srces
450 * are extended until the end of the loop, since they are not included in the
451 * live-in sets.
452 */
453 static int
454 pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
455 {
456 struct nv_instruction *i, *ni;
457 struct nv_value *val;
458 struct nv_basic_block *p, *pn;
459 int n, j;
460
461 b->pass_seq = ctx->pc->pass_seq;
462
463 for (n = 0; n < b->num_in; ++n) {
464 p = pn = b->in[n];
465 assert(p);
466
467 if (need_new_else_block(b, p)) {
468 pn = new_basic_block(ctx->pc);
469
470 if (p->out[0] == b)
471 p->out[0] = pn;
472 else
473 p->out[1] = pn;
474
475 if (p->exit->target == b) /* target to new else-block */
476 p->exit->target = pn;
477
478 b->in[n] = pn;
479
480 pn->out[0] = b;
481 pn->in[0] = p;
482 pn->num_in = 1;
483 }
484 ctx->pc->current_block = pn;
485
486 for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) {
487 j = phi_opnd_for_bb(i, p, b);
488
489 if (j < 0) {
490 val = i->def[0];
491 } else {
492 val = i->src[j]->value;
493 if (i->src[j]->flags & NV_REF_FLAG_REGALLOC_PRIV) {
494 j = -1;
495 /* use original value, we already encountered & replaced it */
496 val = val->insn->src[0]->value;
497 }
498 }
499 if (j < 0) /* need an additional source ? */
500 for (j = 0; j < 5 && i->src[j] && i->src[j]->value != val; ++j);
501 assert(j < 5);
502
503 ni = new_instruction(ctx->pc, NV_OP_MOV);
504
505 /* TODO: insert instruction at correct position in the first place */
506 if (ni->prev && ni->prev->target)
507 nv_nvi_permute(ni->prev, ni);
508
509 ni->def[0] = new_value(ctx->pc, val->reg.file, val->reg.type);
510 ni->def[0]->insn = ni;
511 ni->src[0] = new_ref(ctx->pc, val);
512
513 nv_reference(ctx->pc, &i->src[j], ni->def[0]);
514
515 i->src[j]->flags |= NV_REF_FLAG_REGALLOC_PRIV;
516 }
517
518 if (pn != p && pn->exit) {
519 ctx->pc->current_block = b->in[n ? 0 : 1];
520 ni = new_instruction(ctx->pc, NV_OP_BRA);
521 ni->target = b;
522 ni->is_terminator = 1;
523 }
524 }
525
526 for (j = 0; j < 2; ++j)
527 if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq)
528 pass_generate_phi_movs(ctx, b->out[j]);
529
530 return 0;
531 }
532
533 #define JOIN_MASK_PHI (1 << 0)
534 #define JOIN_MASK_SELECT (1 << 1)
535 #define JOIN_MASK_MOV (1 << 2)
536 #define JOIN_MASK_TEX (1 << 3)
537
538 static int
539 pass_join_values(struct nv_pc_pass *ctx, unsigned mask)
540 {
541 int c, n;
542
543 for (n = 0; n < ctx->num_insns; ++n) {
544 struct nv_instruction *nvi, *i = ctx->insns[n];
545
546 switch (i->opcode) {
547 case NV_OP_PHI:
548 if (!(mask & JOIN_MASK_PHI))
549 break;
550 for (c = 0; c < 5 && i->src[c]; ++c)
551 join_values_nofail(ctx, i->def[0], i->src[c]->value, FALSE);
552 break;
553 case NV_OP_MOV:
554 if (!(mask & JOIN_MASK_MOV))
555 break;
556 nvi = i->src[0]->value->join->insn;
557 if (nvi && !nv_is_vector_op(nvi->opcode))
558 try_join_values(ctx, i->def[0], i->src[0]->value);
559 break;
560 case NV_OP_SELECT:
561 if (!(mask & JOIN_MASK_SELECT))
562 break;
563 for (c = 0; c < 5 && i->src[c]; ++c)
564 join_values_nofail(ctx, i->def[0], i->src[c]->value, TRUE);
565 break;
566 case NV_OP_TEX:
567 case NV_OP_TXB:
568 case NV_OP_TXL:
569 case NV_OP_TXQ:
570 if (!(mask & JOIN_MASK_TEX))
571 break;
572 /* This should work without conflicts because we always generate
573 * extra MOVs for the sources of a TEX.
574 */
575 for (c = 0; c < 4 && i->src[c]; ++c)
576 join_values_nofail(ctx, i->def[c], i->src[c]->value, TRUE);
577 break;
578 default:
579 break;
580 }
581 }
582 return 0;
583 }
584
585 /* Order the instructions so that live intervals can be expressed in numbers. */
586 static void
587 pass_order_instructions(void *priv, struct nv_basic_block *b)
588 {
589 struct nv_pc_pass *ctx = (struct nv_pc_pass *)priv;
590 struct nv_instruction *i;
591
592 b->pass_seq = ctx->pc->pass_seq;
593
594 assert(!b->exit || !b->exit->next);
595 for (i = b->phi; i; i = i->next) {
596 i->serial = ctx->num_insns;
597 ctx->insns[ctx->num_insns++] = i;
598 }
599 }
600
601 static void
602 bb_live_set_print(struct nv_pc *pc, struct nv_basic_block *b)
603 {
604 #ifdef NV50_RA_DEBUG_LIVE_SETS
605 int j;
606 struct nv_value *val;
607
608 debug_printf("LIVE-INs of BB:%i: ", b->id);
609
610 for (j = 0; j < pc->num_values; ++j) {
611 if (!(b->live_set[j / 32] & (1 << (j % 32))))
612 continue;
613 val = &pc->values[j];
614 if (!val->insn)
615 continue;
616 debug_printf("%i ", val->n);
617 }
618 debug_printf("\n");
619 #endif
620 }
621
622 static INLINE void
623 live_set_add(struct nv_basic_block *b, struct nv_value *val)
624 {
625 if (!val->insn) /* don't add non-def values */
626 return;
627 b->live_set[val->n / 32] |= 1 << (val->n % 32);
628 }
629
630 static INLINE void
631 live_set_rem(struct nv_basic_block *b, struct nv_value *val)
632 {
633 b->live_set[val->n / 32] &= ~(1 << (val->n % 32));
634 }
635
636 static INLINE boolean
637 live_set_test(struct nv_basic_block *b, struct nv_ref *ref)
638 {
639 int n = ref->value->n;
640 return b->live_set[n / 32] & (1 << (n % 32));
641 }
642
643 /* The live set of a block contains those values that are live immediately
644 * before the beginning of the block, so do a backwards scan.
645 */
646 static int
647 pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b)
648 {
649 struct nv_instruction *i;
650 int j, n, ret = 0;
651
652 if (b->pass_seq >= ctx->pc->pass_seq)
653 return 0;
654 b->pass_seq = ctx->pc->pass_seq;
655
656 /* slight hack for undecidedness: set phi = entry if it's undefined */
657 if (!b->phi)
658 b->phi = b->entry;
659
660 for (n = 0; n < 2; ++n) {
661 if (!b->out[n] || b->out[n] == b)
662 continue;
663 ret = pass_build_live_sets(ctx, b->out[n]);
664 if (ret)
665 return ret;
666
667 if (n == 0) {
668 for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
669 b->live_set[j] = b->out[n]->live_set[j];
670 } else {
671 for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
672 b->live_set[j] |= b->out[n]->live_set[j];
673 }
674 }
675
676 if (!b->entry)
677 return 0;
678
679 bb_live_set_print(ctx->pc, b);
680
681 for (i = b->exit; i != b->entry->prev; i = i->prev) {
682 for (j = 0; j < 4; j++) {
683 if (!i->def[j])
684 break;
685 live_set_rem(b, i->def[j]);
686 }
687 for (j = 0; j < 4; j++) {
688 if (!i->src[j])
689 break;
690 live_set_add(b, i->src[j]->value);
691 }
692 if (i->src[4])
693 live_set_add(b, i->src[4]->value);
694 if (i->flags_def)
695 live_set_rem(b, i->flags_def);
696 if (i->flags_src)
697 live_set_add(b, i->flags_src->value);
698 }
699 for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next)
700 live_set_rem(b, i->def[0]);
701
702 bb_live_set_print(ctx->pc, b);
703
704 return 0;
705 }
706
707 static void collect_live_values(struct nv_basic_block *b, const int n)
708 {
709 int i;
710
711 if (b->out[0] && b->out_kind[0] != CFG_EDGE_FAKE) {
712 if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) {
713 for (i = 0; i < n; ++i)
714 b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i];
715 } else {
716 memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t));
717 }
718 } else
719 if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) {
720 memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t));
721 } else {
722 memset(b->live_set, 0, n * sizeof(uint32_t));
723 }
724 }
725
726 /* NOTE: the live intervals of phi functions start at the first non-phi insn. */
727 static int
728 pass_build_intervals(struct nv_pc_pass *ctx, struct nv_basic_block *b)
729 {
730 struct nv_instruction *i, *i_stop;
731 int j, s;
732 const int n = (ctx->pc->num_values + 31) / 32;
733
734 /* verify that first block does not have live-in values */
735 if (b->num_in == 0)
736 for (j = 0; j < n; ++j)
737 assert(b->live_set[j] == 0);
738
739 collect_live_values(b, n);
740
741 /* remove live-outs def'd in a parallel block, hopefully they're all phi'd */
742 for (j = 0; j < 2; ++j) {
743 if (!b->out[j] || !b->out[j]->phi)
744 continue;
745 for (i = b->out[j]->phi; i->opcode == NV_OP_PHI; i = i->next) {
746 live_set_rem(b, i->def[0]);
747
748 for (s = 0; s < 4; ++s) {
749 if (!i->src[s])
750 break;
751 assert(i->src[s]->value->insn);
752 if (nvbb_reachable_by(b, i->src[s]->value->insn->bb, b->out[j]))
753 live_set_add(b, i->src[s]->value);
754 else
755 live_set_rem(b, i->src[s]->value);
756 }
757 }
758 }
759
760 /* remaining live-outs are live until the end */
761 if (b->exit) {
762 for (j = 0; j < ctx->pc->num_values; ++j) {
763 if (!(b->live_set[j / 32] & (1 << (j % 32))))
764 continue;
765 add_range(&ctx->pc->values[j], b, b->exit->serial + 1);
766 #ifdef NV50_RA_DEBUG_LIVEI
767 debug_printf("adding range for live value %i: ", j);
768 livei_print(&ctx->pc->values[j]);
769 #endif
770
771 }
772 }
773
774 i_stop = b->entry ? b->entry->prev : NULL;
775
776 /* don't have to include phi functions here (will have 0 live range) */
777 for (i = b->exit; i != i_stop; i = i->prev) {
778 assert(i->serial >= b->phi->serial && i->serial <= b->exit->serial);
779 for (j = 0; j < 4; ++j) {
780 if (i->def[j])
781 live_set_rem(b, i->def[j]);
782 }
783 if (i->flags_def)
784 live_set_rem(b, i->flags_def);
785
786 for (j = 0; j < 5; ++j) {
787 if (i->src[j] && !live_set_test(b, i->src[j])) {
788 live_set_add(b, i->src[j]->value);
789 add_range(i->src[j]->value, b, i->serial);
790 #ifdef NV50_RA_DEBUG_LIVEI
791 debug_printf("adding range for source %i (ends living): ",
792 i->src[j]->value->n);
793 livei_print(i->src[j]->value);
794 #endif
795 }
796 }
797 if (i->flags_src && !live_set_test(b, i->flags_src)) {
798 live_set_add(b, i->flags_src->value);
799 add_range(i->flags_src->value, b, i->serial);
800 #ifdef NV50_RA_DEBUG_LIVEI
801 debug_printf("adding range for source %i (ends living): ",
802 i->flags_src->value->n);
803 livei_print(i->flags_src->value);
804 #endif
805 }
806 }
807
808 b->pass_seq = ctx->pc->pass_seq;
809
810 if (b->out[0] && b->out[0]->pass_seq < ctx->pc->pass_seq)
811 pass_build_intervals(ctx, b->out[0]);
812
813 if (b->out[1] && b->out[1]->pass_seq < ctx->pc->pass_seq)
814 pass_build_intervals(ctx, b->out[1]);
815
816 return 0;
817 }
818
819 static INLINE void
820 nv50_ctor_register_set(struct nv_pc *pc, struct register_set *set)
821 {
822 memset(set, 0, sizeof(*set));
823
824 set->last[NV_FILE_GPR] = 255;
825 set->last[NV_FILE_OUT] = 127;
826 set->last[NV_FILE_FLAGS] = 4;
827 set->last[NV_FILE_ADDR] = 4;
828
829 set->pc = pc;
830 }
831
832 static void
833 insert_ordered_tail(struct nv_value *list, struct nv_value *nval)
834 {
835 struct nv_value *elem;
836
837 for (elem = list->prev;
838 elem != list && elem->livei->bgn > nval->livei->bgn;
839 elem = elem->prev);
840 /* now elem begins before or at the same time as val */
841
842 nval->prev = elem;
843 nval->next = elem->next;
844 elem->next->prev = nval;
845 elem->next = nval;
846 }
847
848 static void
849 collect_register_values(struct nv_pc_pass *ctx, struct nv_value *head,
850 boolean assigned_only)
851 {
852 struct nv_value *val;
853 int k, n;
854
855 make_empty_list(head);
856
857 for (n = 0; n < ctx->num_insns; ++n) {
858 struct nv_instruction *i = ctx->insns[n];
859
860 /* for joined values, only the representative will have livei != NULL */
861 for (k = 0; k < 4; ++k) {
862 if (i->def[k] && i->def[k]->livei)
863 if (!assigned_only || i->def[k]->reg.id >= 0)
864 insert_ordered_tail(head, i->def[k]);
865 }
866 if (i->flags_def && i->flags_def->livei)
867 if (!assigned_only || i->flags_def->reg.id >= 0)
868 insert_ordered_tail(head, i->flags_def);
869 }
870
871 for (val = head->next; val != head->prev; val = val->next) {
872 assert(val->join == val);
873 assert(val->livei->bgn <= val->next->livei->bgn);
874 }
875 }
876
877 static int
878 pass_linear_scan(struct nv_pc_pass *ctx, int iter)
879 {
880 struct register_set f, free;
881 struct nv_value *cur, *val, *tmp[2];
882 struct nv_value active, inactive, handled, unhandled;
883
884 make_empty_list(&active);
885 make_empty_list(&inactive);
886 make_empty_list(&handled);
887
888 nv50_ctor_register_set(ctx->pc, &free);
889
890 collect_register_values(ctx, &unhandled, FALSE);
891
892 foreach_s(cur, tmp[0], &unhandled) {
893 remove_from_list(cur);
894
895 foreach_s(val, tmp[1], &active) {
896 if (livei_end(val) <= cur->livei->bgn) {
897 reg_release(&free, val);
898 move_to_head(&handled, val);
899 } else
900 if (!livei_contains(val, cur->livei->bgn)) {
901 reg_release(&free, val);
902 move_to_head(&inactive, val);
903 }
904 }
905
906 foreach_s(val, tmp[1], &inactive) {
907 if (livei_end(val) <= cur->livei->bgn)
908 move_to_head(&handled, val);
909 else
910 if (livei_contains(val, cur->livei->bgn)) {
911 reg_occupy(&free, val);
912 move_to_head(&active, val);
913 }
914 }
915
916 f = free;
917
918 foreach(val, &inactive)
919 if (livei_have_overlap(val, cur))
920 reg_occupy(&f, val);
921
922 foreach(val, &unhandled)
923 if (val->reg.id >= 0 && livei_have_overlap(val, cur))
924 reg_occupy(&f, val);
925
926 if (cur->reg.id < 0) {
927 boolean mem = !reg_assign(&f, &cur, 1);
928
929 if (mem) {
930 NOUVEAU_ERR("out of registers\n");
931 abort();
932 }
933 }
934 insert_at_head(&active, cur);
935 reg_occupy(&free, cur);
936 }
937
938 return 0;
939 }
940
941 /* Allocate values defined by instructions such as TEX, which have to be
942 * assigned to consecutive registers.
943 * Linear scan doesn't really work here since the values can have different
944 * live intervals.
945 */
946 static int
947 pass_allocate_constrained_values(struct nv_pc_pass *ctx)
948 {
949 struct nv_value regvals, *val;
950 struct nv_instruction *i;
951 struct nv_value *defs[4];
952 struct register_set regs[4];
953 int n, vsize, c;
954 uint32_t mask;
955 boolean mem;
956
957 collect_register_values(ctx, &regvals, TRUE);
958
959 for (n = 0; n < ctx->num_insns; ++n) {
960 i = ctx->insns[n];
961 vsize = nvi_vector_size(i);
962 if (!(vsize > 1))
963 continue;
964 assert(vsize <= 4);
965 for (c = 0; c < vsize; ++c)
966 defs[c] = i->def[c]->join;
967
968 if (defs[0]->reg.id >= 0) {
969 for (c = 1; c < vsize; ++c)
970 assert(defs[c]->reg.id >= 0);
971 continue;
972 }
973
974 /* Compute registers available for this "vector" of consecutive registers.
975 * Each value (component) has its own independent live interval.
976 */
977 for (c = 0; c < vsize; ++c) {
978 nv50_ctor_register_set(ctx->pc, &regs[c]);
979
980 foreach(val, &regvals) {
981 if (val->reg.id >= 0 && livei_have_overlap(val, defs[c]))
982 reg_occupy(&regs[c], val);
983 }
984 /* Only 32 bit GPRs will be allocated here, but register set
985 * granularity for GPRs is 16 bit.
986 */
987 mask = 0x03030303;
988 if (vsize == 2) /* granularity is 2 and not 4 */
989 mask |= 0x03030303 << 4;
990 mask_register_set(&regs[c], 0, mask << (c * 2));
991
992 if (defs[c]->livei)
993 insert_ordered_tail(&regvals, defs[c]);
994 }
995 for (c = 1; c < vsize; ++c)
996 intersect_register_sets(&regs[0], &regs[0], &regs[c]);
997
998 mem = !reg_assign(&regs[0], &defs[0], vsize);
999
1000 if (mem) {
1001 NOUVEAU_ERR("out of registers\n");
1002 abort();
1003 }
1004 }
1005 return 0;
1006 }
1007
1008 static int
1009 nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root)
1010 {
1011 struct nv_pc_pass *ctx;
1012 int i, ret;
1013
1014 NV50_DBGMSG("REGISTER ALLOCATION - entering\n");
1015
1016 ctx = CALLOC_STRUCT(nv_pc_pass);
1017 if (!ctx)
1018 return -1;
1019 ctx->pc = pc;
1020
1021 ctx->insns = CALLOC(NV_PC_MAX_INSTRUCTIONS, sizeof(struct nv_instruction *));
1022 if (!ctx->insns) {
1023 FREE(ctx);
1024 return -1;
1025 }
1026
1027 pc->pass_seq++;
1028 ret = pass_generate_phi_movs(ctx, root);
1029 assert(!ret);
1030
1031 for (i = 0; i < pc->loop_nesting_bound; ++i) {
1032 pc->pass_seq++;
1033 ret = pass_build_live_sets(ctx, root);
1034 assert(!ret && "live sets");
1035 if (ret) {
1036 NOUVEAU_ERR("failed to build live sets (iteration %d)\n", i);
1037 goto out;
1038 }
1039 }
1040
1041 pc->pass_seq++;
1042 nv_pc_pass_in_order(root, pass_order_instructions, ctx);
1043
1044 pc->pass_seq++;
1045 ret = pass_build_intervals(ctx, root);
1046 assert(!ret && "build intervals");
1047 if (ret) {
1048 NOUVEAU_ERR("failed to build live intervals\n");
1049 goto out;
1050 }
1051
1052 #ifdef NV50_RA_DEBUG_LIVEI
1053 for (i = 0; i < pc->num_values; ++i)
1054 livei_print(&pc->values[i]);
1055 #endif
1056
1057 ret = pass_join_values(ctx, JOIN_MASK_PHI);
1058 if (ret)
1059 goto out;
1060 ret = pass_join_values(ctx, JOIN_MASK_SELECT | JOIN_MASK_TEX);
1061 if (ret)
1062 goto out;
1063 ret = pass_join_values(ctx, JOIN_MASK_MOV);
1064 if (ret)
1065 goto out;
1066 ret = pass_allocate_constrained_values(ctx);
1067 if (ret)
1068 goto out;
1069 ret = pass_linear_scan(ctx, 1);
1070 if (ret)
1071 goto out;
1072
1073 for (i = 0; i < pc->num_values; ++i)
1074 livei_release(&pc->values[i]);
1075
1076 NV50_DBGMSG("REGISTER ALLOCATION - leaving\n");
1077
1078 out:
1079 FREE(ctx->insns);
1080 FREE(ctx);
1081 return ret;
1082 }
1083
1084 int
1085 nv_pc_exec_pass1(struct nv_pc *pc)
1086 {
1087 int i, ret;
1088
1089 for (i = 0; i < pc->num_subroutines + 1; ++i)
1090 if (pc->root[i] && (ret = nv_pc_pass1(pc, pc->root[i])))
1091 return ret;
1092 return 0;
1093 }