[g3dvl] move zscan into shaders
[mesa.git] / src / gallium / drivers / nv50 / nv50_pc_regalloc.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #if NV50_DEBUG & NV50_DEBUG_PROG_RA
24 # define NV50_RA_DEBUG_LIVEI
25 # define NV50_RA_DEBUG_LIVE_SETS
26 # define NV50_RA_DEBUG_JOIN
27 #endif
28
29 #include "nv50_context.h"
30 #include "nv50_pc.h"
31
32 #include "util/u_simple_list.h"
33
34 #define NUM_REGISTER_FILES 4
35 #define MAX_REGISTER_COUNT 256
36
37 struct register_set {
38 struct nv_pc *pc;
39
40 uint32_t last[NUM_REGISTER_FILES];
41 uint32_t bits[NUM_REGISTER_FILES][(MAX_REGISTER_COUNT + 31) / 32];
42 };
43
44 /* using OR because a set bit means occupied/unavailable, aliasing is allowed */
45 static void
46 intersect_register_sets(struct register_set *dst,
47 struct register_set *src1, struct register_set *src2)
48 {
49 int i, j;
50
51 for (i = 0; i < NUM_REGISTER_FILES; ++i) {
52 for (j = 0; j < (MAX_REGISTER_COUNT + 31) / 32; ++j)
53 dst->bits[i][j] = src1->bits[i][j] | src2->bits[i][j];
54 }
55 }
56
57 static void
58 mask_register_set(struct register_set *set, uint32_t mask, uint32_t umask)
59 {
60 int i, j;
61
62 for (i = 0; i < NUM_REGISTER_FILES; ++i) {
63 for (j = 0; j < (MAX_REGISTER_COUNT + 31) / 32; ++j)
64 set->bits[i][j] = (set->bits[i][j] | mask) & umask;
65 }
66 }
67
68 struct nv_pc_pass {
69 struct nv_pc *pc;
70
71 struct nv_instruction **insns;
72 int num_insns;
73
74 uint pass_seq;
75 };
76
77 static void
78 ranges_coalesce(struct nv_range *range)
79 {
80 while (range->next && range->end >= range->next->bgn) {
81 struct nv_range *rnn = range->next->next;
82 assert(range->bgn <= range->next->bgn);
83 range->end = MAX2(range->end, range->next->end);
84 FREE(range->next);
85 range->next = rnn;
86 }
87 }
88
89 /* @return: TRUE if @new_range can be freed (i.e. was not reused) */
90 static boolean
91 add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range)
92 {
93 struct nv_range *range, **nextp = &val->livei;
94
95 if (bgn == end) /* [a, a) is invalid / empty */
96 return TRUE;
97
98 for (range = val->livei; range; range = range->next) {
99 if (end < range->bgn)
100 break; /* insert before */
101
102 if (bgn > range->end) {
103 nextp = &range->next;
104 continue; /* insert after */
105 }
106
107 /* overlap */
108 if (bgn < range->bgn) {
109 range->bgn = bgn;
110 if (end > range->end)
111 range->end = end;
112 ranges_coalesce(range);
113 return TRUE;
114 }
115 if (end > range->end) {
116 range->end = end;
117 ranges_coalesce(range);
118 return TRUE;
119 }
120 assert(bgn >= range->bgn);
121 assert(end <= range->end);
122 return TRUE;
123 }
124
125 if (!new_range)
126 new_range = CALLOC_STRUCT(nv_range);
127
128 new_range->bgn = bgn;
129 new_range->end = end;
130 new_range->next = range;
131 *(nextp) = new_range;
132 return FALSE;
133 }
134
135 static void
136 add_range(struct nv_value *val, struct nv_basic_block *b, int end)
137 {
138 int bgn;
139
140 if (!val->insn) /* ignore non-def values */
141 return;
142 assert(b->entry->serial <= b->exit->serial);
143 assert(b->phi->serial <= end);
144 assert(b->exit->serial + 1 >= end);
145
146 bgn = val->insn->serial;
147 if (bgn < b->entry->serial || bgn > b->exit->serial)
148 bgn = b->entry->serial;
149
150 assert(bgn <= end);
151
152 add_range_ex(val, bgn, end, NULL);
153 }
154
155 #if defined(NV50_RA_DEBUG_JOIN) || defined(NV50_RA_DEBUG_LIVEI)
156 static void
157 livei_print(struct nv_value *a)
158 {
159 struct nv_range *r = a->livei;
160
161 debug_printf("livei %i: ", a->n);
162 while (r) {
163 debug_printf("[%i, %i) ", r->bgn, r->end);
164 r = r->next;
165 }
166 debug_printf("\n");
167 }
168 #endif
169
170 static void
171 livei_unify(struct nv_value *dst, struct nv_value *src)
172 {
173 struct nv_range *range, *next;
174
175 for (range = src->livei; range; range = next) {
176 next = range->next;
177 if (add_range_ex(dst, range->bgn, range->end, range))
178 FREE(range);
179 }
180 src->livei = NULL;
181 }
182
183 static void
184 livei_release(struct nv_value *val)
185 {
186 struct nv_range *range, *next;
187
188 for (range = val->livei; range; range = next) {
189 next = range->next;
190 FREE(range);
191 }
192 }
193
194 static boolean
195 livei_have_overlap(struct nv_value *a, struct nv_value *b)
196 {
197 struct nv_range *r_a, *r_b;
198
199 for (r_a = a->livei; r_a; r_a = r_a->next) {
200 for (r_b = b->livei; r_b; r_b = r_b->next) {
201 if (r_b->bgn < r_a->end &&
202 r_b->end > r_a->bgn)
203 return TRUE;
204 }
205 }
206 return FALSE;
207 }
208
209 static int
210 livei_end(struct nv_value *a)
211 {
212 struct nv_range *r = a->livei;
213
214 assert(r);
215 while (r->next)
216 r = r->next;
217 return r->end;
218 }
219
220 static boolean
221 livei_contains(struct nv_value *a, int pos)
222 {
223 struct nv_range *r;
224
225 for (r = a->livei; r && r->bgn <= pos; r = r->next)
226 if (r->end > pos)
227 return TRUE;
228 return FALSE;
229 }
230
231 static boolean
232 reg_assign(struct register_set *set, struct nv_value **def, int n)
233 {
234 int i, id, s;
235 uint m;
236 int f = def[0]->reg.file;
237
238 s = n << (nv_type_order(def[0]->reg.type) - 1);
239 m = (1 << s) - 1;
240
241 id = set->last[f];
242
243 for (i = 0; i * 32 < set->last[f]; ++i) {
244 if (set->bits[f][i] == 0xffffffff)
245 continue;
246
247 for (id = 0; id < 32; id += s)
248 if (!(set->bits[f][i] & (m << id)))
249 break;
250 if (id < 32)
251 break;
252 }
253 if (i * 32 + id > set->last[f])
254 return FALSE;
255
256 set->bits[f][i] |= m << id;
257
258 id += i * 32;
259
260 set->pc->max_reg[f] = MAX2(set->pc->max_reg[f], id + s - 1);
261
262 id >>= nv_type_order(def[0]->reg.type) - 1;
263
264 for (i = 0; i < n; ++i)
265 if (def[i]->livei)
266 def[i]->reg.id = id++;
267
268 return TRUE;
269 }
270
271 static INLINE void
272 reg_occupy(struct register_set *set, struct nv_value *val)
273 {
274 int s, id = val->reg.id, f = val->reg.file;
275 uint m;
276
277 if (id < 0)
278 return;
279 s = nv_type_order(val->reg.type) - 1;
280 id <<= s;
281 m = (1 << (1 << s)) - 1;
282
283 assert(s >= 0); /* XXX: remove me */
284
285 set->bits[f][id / 32] |= m << (id % 32);
286
287 if (set->pc->max_reg[f] < id)
288 set->pc->max_reg[f] = id;
289 }
290
291 static INLINE void
292 reg_release(struct register_set *set, struct nv_value *val)
293 {
294 int s, id = val->reg.id, f = val->reg.file;
295 uint m;
296
297 if (id < 0)
298 return;
299
300 s = nv_type_order(val->reg.type) - 1;
301 id <<= s;
302 m = (1 << (1 << s)) - 1;
303
304 set->bits[f][id / 32] &= ~(m << (id % 32));
305 }
306
307 static INLINE boolean
308 join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
309 {
310 int i;
311 struct nv_value *val;
312
313 if (a->reg.file != b->reg.file ||
314 nv_type_sizeof(a->reg.type) != nv_type_sizeof(b->reg.type))
315 return FALSE;
316
317 if (a->join->reg.id == b->join->reg.id)
318 return TRUE;
319
320 /* either a or b or both have been assigned */
321
322 if (a->join->reg.id >= 0 && b->join->reg.id >= 0)
323 return FALSE;
324 else
325 if (b->join->reg.id >= 0) {
326 val = a;
327 a = b;
328 b = val;
329 }
330
331 for (i = 0; i < ctx->pc->num_values; ++i) {
332 val = &ctx->pc->values[i];
333
334 if (val->join->reg.id != a->join->reg.id)
335 continue;
336 if (val->join != a->join && livei_have_overlap(val->join, b->join))
337 return FALSE;
338 }
339 return TRUE;
340 }
341
342 static INLINE void
343 do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
344 {
345 int j;
346 struct nv_value *bjoin = b->join;
347
348 if (b->join->reg.id >= 0)
349 a->join->reg.id = b->join->reg.id;
350
351 livei_unify(a->join, b->join);
352
353 #ifdef NV50_RA_DEBUG_JOIN
354 debug_printf("joining %i to %i\n", b->n, a->n);
355 #endif
356
357 /* make a->join the new representative */
358 for (j = 0; j < ctx->pc->num_values; ++j)
359 if (ctx->pc->values[j].join == bjoin)
360 ctx->pc->values[j].join = a->join;
361
362 assert(b->join == a->join);
363 }
364
365 static INLINE boolean
366 try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
367 {
368 if (!join_allowed(ctx, a, b)) {
369 #ifdef NV50_RA_DEBUG_JOIN
370 debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n);
371 #endif
372 return FALSE;
373 }
374 if (livei_have_overlap(a->join, b->join)) {
375 #ifdef NV50_RA_DEBUG_JOIN
376 debug_printf("cannot join %i to %i: livei overlap\n", b->n, a->n);
377 livei_print(a);
378 livei_print(b);
379 #endif
380 return FALSE;
381 }
382
383 do_join_values(ctx, a, b);
384
385 return TRUE;
386 }
387
388 static void
389 join_values_nofail(struct nv_pc_pass *ctx,
390 struct nv_value *a, struct nv_value *b, boolean type_only)
391 {
392 if (type_only) {
393 assert(join_allowed(ctx, a, b));
394 do_join_values(ctx, a, b);
395 } else {
396 boolean ok = try_join_values(ctx, a, b);
397 if (!ok) {
398 NOUVEAU_ERR("failed to coalesce values\n");
399 }
400 }
401 }
402
403 static INLINE boolean
404 need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p)
405 {
406 int i = 0, n = 0;
407
408 for (; i < 2; ++i)
409 if (p->out[i] && !IS_LOOP_EDGE(p->out_kind[i]))
410 ++n;
411
412 return (b->num_in > 1) && (n == 2);
413 }
414
415 /* Look for the @phi's operand whose definition reaches @b. */
416 static int
417 phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b,
418 struct nv_basic_block *tb)
419 {
420 struct nv_ref *srci, *srcj;
421 int i, j;
422
423 for (j = -1, i = 0; i < 6 && phi->src[i]; ++i) {
424 srci = phi->src[i];
425 /* if already replaced, check with original source first */
426 if (srci->flags & NV_REF_FLAG_REGALLOC_PRIV)
427 srci = srci->value->insn->src[0];
428 if (!nvbb_reachable_by(b, srci->value->insn->bb, NULL))
429 continue;
430 /* NOTE: back-edges are ignored by the reachable-by check */
431 if (j < 0 || !nvbb_reachable_by(srcj->value->insn->bb,
432 srci->value->insn->bb, NULL)) {
433 j = i;
434 srcj = srci;
435 }
436 }
437 if (j >= 0 && nvbb_reachable_by(b, phi->def[0]->insn->bb, NULL))
438 if (!nvbb_reachable_by(srcj->value->insn->bb,
439 phi->def[0]->insn->bb, NULL))
440 j = -1;
441 return j;
442 }
443
444 /* For each operand of each PHI in b, generate a new value by inserting a MOV
445 * at the end of the block it is coming from and replace the operand with its
446 * result. This eliminates liveness conflicts and enables us to let values be
447 * copied to the right register if such a conflict exists nonetheless.
448 *
449 * These MOVs are also crucial in making sure the live intervals of phi srces
450 * are extended until the end of the loop, since they are not included in the
451 * live-in sets.
452 */
453 static int
454 pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
455 {
456 struct nv_instruction *i, *ni;
457 struct nv_value *val;
458 struct nv_basic_block *p, *pn;
459 int n, j;
460
461 b->pass_seq = ctx->pc->pass_seq;
462
463 for (n = 0; n < b->num_in; ++n) {
464 p = pn = b->in[n];
465 assert(p);
466
467 if (need_new_else_block(b, p)) {
468 pn = new_basic_block(ctx->pc);
469
470 if (p->out[0] == b)
471 p->out[0] = pn;
472 else
473 p->out[1] = pn;
474
475 if (p->exit->target == b) /* target to new else-block */
476 p->exit->target = pn;
477
478 b->in[n] = pn;
479
480 pn->out[0] = b;
481 pn->in[0] = p;
482 pn->num_in = 1;
483 }
484 ctx->pc->current_block = pn;
485
486 for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) {
487 j = phi_opnd_for_bb(i, p, b);
488
489 if (j < 0) {
490 val = i->def[0];
491 } else {
492 val = i->src[j]->value;
493 if (i->src[j]->flags & NV_REF_FLAG_REGALLOC_PRIV) {
494 j = -1;
495 /* use original value, we already encountered & replaced it */
496 val = val->insn->src[0]->value;
497 }
498 }
499 if (j < 0) /* need an additional source ? */
500 for (j = 0; j < 5 && i->src[j] && i->src[j]->value != val; ++j);
501 assert(j < 5);
502
503 ni = new_instruction(ctx->pc, NV_OP_MOV);
504
505 /* TODO: insert instruction at correct position in the first place */
506 if (ni->prev && ni->prev->target)
507 nv_nvi_permute(ni->prev, ni);
508
509 ni->def[0] = new_value(ctx->pc, val->reg.file, val->reg.type);
510 ni->def[0]->insn = ni;
511 ni->src[0] = new_ref(ctx->pc, val);
512
513 nv_reference(ctx->pc, &i->src[j], ni->def[0]);
514
515 i->src[j]->flags |= NV_REF_FLAG_REGALLOC_PRIV;
516 }
517
518 if (pn != p && pn->exit) {
519 assert(!b->in[!n]->exit || b->in[!n]->exit->is_terminator);
520 /* insert terminator (branch to ENDIF) in new else block */
521 ctx->pc->current_block = pn;
522 ni = new_instruction(ctx->pc, NV_OP_BRA);
523 ni->target = b;
524 ni->is_terminator = 1;
525 }
526 }
527
528 for (j = 0; j < 2; ++j)
529 if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq)
530 pass_generate_phi_movs(ctx, b->out[j]);
531
532 return 0;
533 }
534
535 #define JOIN_MASK_PHI (1 << 0)
536 #define JOIN_MASK_SELECT (1 << 1)
537 #define JOIN_MASK_MOV (1 << 2)
538 #define JOIN_MASK_TEX (1 << 3)
539
540 static int
541 pass_join_values(struct nv_pc_pass *ctx, unsigned mask)
542 {
543 int c, n;
544
545 for (n = 0; n < ctx->num_insns; ++n) {
546 struct nv_instruction *nvi, *i = ctx->insns[n];
547
548 switch (i->opcode) {
549 case NV_OP_PHI:
550 if (!(mask & JOIN_MASK_PHI))
551 break;
552 for (c = 0; c < 5 && i->src[c]; ++c)
553 join_values_nofail(ctx, i->def[0], i->src[c]->value, FALSE);
554 break;
555 case NV_OP_MOV:
556 if (!(mask & JOIN_MASK_MOV))
557 break;
558 nvi = i->src[0]->value->join->insn;
559 if (nvi && !nv_is_vector_op(nvi->opcode))
560 try_join_values(ctx, i->def[0], i->src[0]->value);
561 break;
562 case NV_OP_SELECT:
563 if (!(mask & JOIN_MASK_SELECT))
564 break;
565 for (c = 0; c < 5 && i->src[c]; ++c)
566 join_values_nofail(ctx, i->def[0], i->src[c]->value, TRUE);
567 break;
568 case NV_OP_TEX:
569 case NV_OP_TXB:
570 case NV_OP_TXL:
571 case NV_OP_TXQ:
572 if (!(mask & JOIN_MASK_TEX))
573 break;
574 /* This should work without conflicts because we always generate
575 * extra MOVs for the sources of a TEX.
576 */
577 for (c = 0; c < 4 && i->src[c]; ++c)
578 join_values_nofail(ctx, i->def[c], i->src[c]->value, TRUE);
579 break;
580 default:
581 break;
582 }
583 }
584 return 0;
585 }
586
587 /* Order the instructions so that live intervals can be expressed in numbers. */
588 static void
589 pass_order_instructions(void *priv, struct nv_basic_block *b)
590 {
591 struct nv_pc_pass *ctx = (struct nv_pc_pass *)priv;
592 struct nv_instruction *i;
593
594 b->pass_seq = ctx->pc->pass_seq;
595
596 assert(!b->exit || !b->exit->next);
597 for (i = b->phi; i; i = i->next) {
598 i->serial = ctx->num_insns;
599 ctx->insns[ctx->num_insns++] = i;
600 }
601 }
602
603 static void
604 bb_live_set_print(struct nv_pc *pc, struct nv_basic_block *b)
605 {
606 #ifdef NV50_RA_DEBUG_LIVE_SETS
607 int j;
608 struct nv_value *val;
609
610 debug_printf("LIVE-INs of BB:%i: ", b->id);
611
612 for (j = 0; j < pc->num_values; ++j) {
613 if (!(b->live_set[j / 32] & (1 << (j % 32))))
614 continue;
615 val = &pc->values[j];
616 if (!val->insn)
617 continue;
618 debug_printf("%i ", val->n);
619 }
620 debug_printf("\n");
621 #endif
622 }
623
624 static INLINE void
625 live_set_add(struct nv_basic_block *b, struct nv_value *val)
626 {
627 if (!val->insn) /* don't add non-def values */
628 return;
629 b->live_set[val->n / 32] |= 1 << (val->n % 32);
630 }
631
632 static INLINE void
633 live_set_rem(struct nv_basic_block *b, struct nv_value *val)
634 {
635 b->live_set[val->n / 32] &= ~(1 << (val->n % 32));
636 }
637
638 static INLINE boolean
639 live_set_test(struct nv_basic_block *b, struct nv_ref *ref)
640 {
641 int n = ref->value->n;
642 return b->live_set[n / 32] & (1 << (n % 32));
643 }
644
645 /* The live set of a block contains those values that are live immediately
646 * before the beginning of the block, so do a backwards scan.
647 */
648 static int
649 pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b)
650 {
651 struct nv_instruction *i;
652 int j, n, ret = 0;
653
654 if (b->pass_seq >= ctx->pc->pass_seq)
655 return 0;
656 b->pass_seq = ctx->pc->pass_seq;
657
658 /* slight hack for undecidedness: set phi = entry if it's undefined */
659 if (!b->phi)
660 b->phi = b->entry;
661
662 for (n = 0; n < 2; ++n) {
663 if (!b->out[n] || b->out[n] == b)
664 continue;
665 ret = pass_build_live_sets(ctx, b->out[n]);
666 if (ret)
667 return ret;
668
669 if (n == 0) {
670 for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
671 b->live_set[j] = b->out[n]->live_set[j];
672 } else {
673 for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
674 b->live_set[j] |= b->out[n]->live_set[j];
675 }
676 }
677
678 if (!b->entry)
679 return 0;
680
681 bb_live_set_print(ctx->pc, b);
682
683 for (i = b->exit; i != b->entry->prev; i = i->prev) {
684 for (j = 0; j < 4; j++) {
685 if (!i->def[j])
686 break;
687 live_set_rem(b, i->def[j]);
688 }
689 for (j = 0; j < 4; j++) {
690 if (!i->src[j])
691 break;
692 live_set_add(b, i->src[j]->value);
693 }
694 if (i->src[4])
695 live_set_add(b, i->src[4]->value);
696 if (i->flags_def)
697 live_set_rem(b, i->flags_def);
698 if (i->flags_src)
699 live_set_add(b, i->flags_src->value);
700 }
701 for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next)
702 live_set_rem(b, i->def[0]);
703
704 bb_live_set_print(ctx->pc, b);
705
706 return 0;
707 }
708
709 static void collect_live_values(struct nv_basic_block *b, const int n)
710 {
711 int i;
712
713 if (b->out[0] && b->out_kind[0] != CFG_EDGE_FAKE) {
714 if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) {
715 for (i = 0; i < n; ++i)
716 b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i];
717 } else {
718 memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t));
719 }
720 } else
721 if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) {
722 memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t));
723 } else {
724 memset(b->live_set, 0, n * sizeof(uint32_t));
725 }
726 }
727
728 /* NOTE: the live intervals of phi functions start at the first non-phi insn. */
729 static int
730 pass_build_intervals(struct nv_pc_pass *ctx, struct nv_basic_block *b)
731 {
732 struct nv_instruction *i, *i_stop;
733 int j, s;
734 const int n = (ctx->pc->num_values + 31) / 32;
735
736 /* verify that first block does not have live-in values */
737 if (b->num_in == 0)
738 for (j = 0; j < n; ++j)
739 assert(b->live_set[j] == 0);
740
741 collect_live_values(b, n);
742
743 /* remove live-outs def'd in a parallel block, hopefully they're all phi'd */
744 for (j = 0; j < 2; ++j) {
745 if (!b->out[j] || !b->out[j]->phi)
746 continue;
747 for (i = b->out[j]->phi; i->opcode == NV_OP_PHI; i = i->next) {
748 live_set_rem(b, i->def[0]);
749
750 for (s = 0; s < 4; ++s) {
751 if (!i->src[s])
752 break;
753 assert(i->src[s]->value->insn);
754 if (nvbb_reachable_by(b, i->src[s]->value->insn->bb, b->out[j]))
755 live_set_add(b, i->src[s]->value);
756 else
757 live_set_rem(b, i->src[s]->value);
758 }
759 }
760 }
761
762 /* remaining live-outs are live until the end */
763 if (b->exit) {
764 for (j = 0; j < ctx->pc->num_values; ++j) {
765 if (!(b->live_set[j / 32] & (1 << (j % 32))))
766 continue;
767 add_range(&ctx->pc->values[j], b, b->exit->serial + 1);
768 #ifdef NV50_RA_DEBUG_LIVEI
769 debug_printf("adding range for live value %i: ", j);
770 livei_print(&ctx->pc->values[j]);
771 #endif
772
773 }
774 }
775
776 i_stop = b->entry ? b->entry->prev : NULL;
777
778 /* don't have to include phi functions here (will have 0 live range) */
779 for (i = b->exit; i != i_stop; i = i->prev) {
780 assert(i->serial >= b->phi->serial && i->serial <= b->exit->serial);
781 for (j = 0; j < 4; ++j) {
782 if (i->def[j])
783 live_set_rem(b, i->def[j]);
784 }
785 if (i->flags_def)
786 live_set_rem(b, i->flags_def);
787
788 for (j = 0; j < 5; ++j) {
789 if (i->src[j] && !live_set_test(b, i->src[j])) {
790 live_set_add(b, i->src[j]->value);
791 add_range(i->src[j]->value, b, i->serial);
792 #ifdef NV50_RA_DEBUG_LIVEI
793 debug_printf("adding range for source %i (ends living): ",
794 i->src[j]->value->n);
795 livei_print(i->src[j]->value);
796 #endif
797 }
798 }
799 if (i->flags_src && !live_set_test(b, i->flags_src)) {
800 live_set_add(b, i->flags_src->value);
801 add_range(i->flags_src->value, b, i->serial);
802 #ifdef NV50_RA_DEBUG_LIVEI
803 debug_printf("adding range for source %i (ends living): ",
804 i->flags_src->value->n);
805 livei_print(i->flags_src->value);
806 #endif
807 }
808 }
809
810 b->pass_seq = ctx->pc->pass_seq;
811
812 if (b->out[0] && b->out[0]->pass_seq < ctx->pc->pass_seq)
813 pass_build_intervals(ctx, b->out[0]);
814
815 if (b->out[1] && b->out[1]->pass_seq < ctx->pc->pass_seq)
816 pass_build_intervals(ctx, b->out[1]);
817
818 return 0;
819 }
820
821 static INLINE void
822 nv50_ctor_register_set(struct nv_pc *pc, struct register_set *set)
823 {
824 memset(set, 0, sizeof(*set));
825
826 set->last[NV_FILE_GPR] = 255;
827 set->last[NV_FILE_OUT] = 127;
828 set->last[NV_FILE_FLAGS] = 4;
829 set->last[NV_FILE_ADDR] = 4;
830
831 set->pc = pc;
832 }
833
834 static void
835 insert_ordered_tail(struct nv_value *list, struct nv_value *nval)
836 {
837 struct nv_value *elem;
838
839 for (elem = list->prev;
840 elem != list && elem->livei->bgn > nval->livei->bgn;
841 elem = elem->prev);
842 /* now elem begins before or at the same time as val */
843
844 nval->prev = elem;
845 nval->next = elem->next;
846 elem->next->prev = nval;
847 elem->next = nval;
848 }
849
850 static void
851 collect_register_values(struct nv_pc_pass *ctx, struct nv_value *head,
852 boolean assigned_only)
853 {
854 struct nv_value *val;
855 int k, n;
856
857 make_empty_list(head);
858
859 for (n = 0; n < ctx->num_insns; ++n) {
860 struct nv_instruction *i = ctx->insns[n];
861
862 /* for joined values, only the representative will have livei != NULL */
863 for (k = 0; k < 4; ++k) {
864 if (i->def[k] && i->def[k]->livei)
865 if (!assigned_only || i->def[k]->reg.id >= 0)
866 insert_ordered_tail(head, i->def[k]);
867 }
868 if (i->flags_def && i->flags_def->livei)
869 if (!assigned_only || i->flags_def->reg.id >= 0)
870 insert_ordered_tail(head, i->flags_def);
871 }
872
873 for (val = head->next; val != head->prev; val = val->next) {
874 assert(val->join == val);
875 assert(val->livei->bgn <= val->next->livei->bgn);
876 }
877 }
878
879 static int
880 pass_linear_scan(struct nv_pc_pass *ctx, int iter)
881 {
882 struct register_set f, free;
883 struct nv_value *cur, *val, *tmp[2];
884 struct nv_value active, inactive, handled, unhandled;
885
886 make_empty_list(&active);
887 make_empty_list(&inactive);
888 make_empty_list(&handled);
889
890 nv50_ctor_register_set(ctx->pc, &free);
891
892 collect_register_values(ctx, &unhandled, FALSE);
893
894 foreach_s(cur, tmp[0], &unhandled) {
895 remove_from_list(cur);
896
897 foreach_s(val, tmp[1], &active) {
898 if (livei_end(val) <= cur->livei->bgn) {
899 reg_release(&free, val);
900 move_to_head(&handled, val);
901 } else
902 if (!livei_contains(val, cur->livei->bgn)) {
903 reg_release(&free, val);
904 move_to_head(&inactive, val);
905 }
906 }
907
908 foreach_s(val, tmp[1], &inactive) {
909 if (livei_end(val) <= cur->livei->bgn)
910 move_to_head(&handled, val);
911 else
912 if (livei_contains(val, cur->livei->bgn)) {
913 reg_occupy(&free, val);
914 move_to_head(&active, val);
915 }
916 }
917
918 f = free;
919
920 foreach(val, &inactive)
921 if (livei_have_overlap(val, cur))
922 reg_occupy(&f, val);
923
924 foreach(val, &unhandled)
925 if (val->reg.id >= 0 && livei_have_overlap(val, cur))
926 reg_occupy(&f, val);
927
928 if (cur->reg.id < 0) {
929 boolean mem = !reg_assign(&f, &cur, 1);
930
931 if (mem) {
932 NOUVEAU_ERR("out of registers\n");
933 abort();
934 }
935 }
936 insert_at_head(&active, cur);
937 reg_occupy(&free, cur);
938 }
939
940 return 0;
941 }
942
943 /* Allocate values defined by instructions such as TEX, which have to be
944 * assigned to consecutive registers.
945 * Linear scan doesn't really work here since the values can have different
946 * live intervals.
947 */
948 static int
949 pass_allocate_constrained_values(struct nv_pc_pass *ctx)
950 {
951 struct nv_value regvals, *val;
952 struct nv_instruction *i;
953 struct nv_value *defs[4];
954 struct register_set regs[4];
955 int n, vsize, c;
956 uint32_t mask;
957 boolean mem;
958
959 collect_register_values(ctx, &regvals, TRUE);
960
961 for (n = 0; n < ctx->num_insns; ++n) {
962 i = ctx->insns[n];
963 vsize = nvi_vector_size(i);
964 if (!(vsize > 1))
965 continue;
966 assert(vsize <= 4);
967 for (c = 0; c < vsize; ++c)
968 defs[c] = i->def[c]->join;
969
970 if (defs[0]->reg.id >= 0) {
971 for (c = 1; c < vsize; ++c)
972 assert(defs[c]->reg.id >= 0);
973 continue;
974 }
975
976 /* Compute registers available for this "vector" of consecutive registers.
977 * Each value (component) has its own independent live interval.
978 */
979 for (c = 0; c < vsize; ++c) {
980 nv50_ctor_register_set(ctx->pc, &regs[c]);
981
982 foreach(val, &regvals) {
983 if (val->reg.id >= 0 && livei_have_overlap(val, defs[c]))
984 reg_occupy(&regs[c], val);
985 }
986 /* Only 32 bit GPRs will be allocated here, but register set
987 * granularity for GPRs is 16 bit.
988 */
989 mask = 0x03030303;
990 if (vsize == 2) /* granularity is 2 and not 4 */
991 mask |= 0x03030303 << 4;
992 mask_register_set(&regs[c], 0, mask << (c * 2));
993
994 if (defs[c]->livei)
995 insert_ordered_tail(&regvals, defs[c]);
996 }
997 for (c = 1; c < vsize; ++c)
998 intersect_register_sets(&regs[0], &regs[0], &regs[c]);
999
1000 mem = !reg_assign(&regs[0], &defs[0], vsize);
1001
1002 if (mem) {
1003 NOUVEAU_ERR("out of registers\n");
1004 abort();
1005 }
1006 }
1007 return 0;
1008 }
1009
1010 static int
1011 nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root)
1012 {
1013 struct nv_pc_pass *ctx;
1014 int i, ret;
1015
1016 NV50_DBGMSG(PROG_RA, "REGISTER ALLOCATION - entering\n");
1017
1018 ctx = CALLOC_STRUCT(nv_pc_pass);
1019 if (!ctx)
1020 return -1;
1021 ctx->pc = pc;
1022
1023 ctx->insns = CALLOC(NV_PC_MAX_INSTRUCTIONS, sizeof(struct nv_instruction *));
1024 if (!ctx->insns) {
1025 FREE(ctx);
1026 return -1;
1027 }
1028
1029 pc->pass_seq++;
1030 ret = pass_generate_phi_movs(ctx, root);
1031 assert(!ret);
1032
1033 for (i = 0; i < pc->loop_nesting_bound; ++i) {
1034 pc->pass_seq++;
1035 ret = pass_build_live_sets(ctx, root);
1036 assert(!ret && "live sets");
1037 if (ret) {
1038 NOUVEAU_ERR("failed to build live sets (iteration %d)\n", i);
1039 goto out;
1040 }
1041 }
1042
1043 pc->pass_seq++;
1044 nv_pc_pass_in_order(root, pass_order_instructions, ctx);
1045
1046 pc->pass_seq++;
1047 ret = pass_build_intervals(ctx, root);
1048 assert(!ret && "build intervals");
1049 if (ret) {
1050 NOUVEAU_ERR("failed to build live intervals\n");
1051 goto out;
1052 }
1053
1054 #ifdef NV50_RA_DEBUG_LIVEI
1055 for (i = 0; i < pc->num_values; ++i)
1056 livei_print(&pc->values[i]);
1057 #endif
1058
1059 ret = pass_join_values(ctx, JOIN_MASK_PHI);
1060 if (ret)
1061 goto out;
1062 ret = pass_join_values(ctx, JOIN_MASK_SELECT | JOIN_MASK_TEX);
1063 if (ret)
1064 goto out;
1065 ret = pass_join_values(ctx, JOIN_MASK_MOV);
1066 if (ret)
1067 goto out;
1068 ret = pass_allocate_constrained_values(ctx);
1069 if (ret)
1070 goto out;
1071 ret = pass_linear_scan(ctx, 1);
1072 if (ret)
1073 goto out;
1074
1075 for (i = 0; i < pc->num_values; ++i)
1076 livei_release(&pc->values[i]);
1077
1078 NV50_DBGMSG(PROG_RA, "REGISTER ALLOCATION - leaving\n");
1079
1080 out:
1081 FREE(ctx->insns);
1082 FREE(ctx);
1083 return ret;
1084 }
1085
1086 int
1087 nv_pc_exec_pass1(struct nv_pc *pc)
1088 {
1089 int i, ret;
1090
1091 for (i = 0; i < pc->num_subroutines + 1; ++i)
1092 if (pc->root[i] && (ret = nv_pc_pass1(pc, pc->root[i])))
1093 return ret;
1094 return 0;
1095 }