nv50: must join SELECT inputs before MOV inputs
[mesa.git] / src / gallium / drivers / nv50 / nv50_pc_regalloc.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 /* #define NV50PC_DEBUG */
24
25 /* #define NV50_RA_DEBUG_LIVEI */
26 /* #define NV50_RA_DEBUG_LIVE_SETS */
27 /* #define NV50_RA_DEBUG_JOIN */
28
29 #include "nv50_context.h"
30 #include "nv50_pc.h"
31
32 #include "util/u_simple_list.h"
33
34 #define NUM_REGISTER_FILES 4
35
36 struct register_set {
37 struct nv_pc *pc;
38
39 uint32_t last[NUM_REGISTER_FILES];
40 uint32_t bits[NUM_REGISTER_FILES][8];
41 };
42
43 struct nv_pc_pass {
44 struct nv_pc *pc;
45
46 struct nv_instruction **insns;
47 int num_insns;
48
49 uint pass_seq;
50 };
51
52 static void
53 ranges_coalesce(struct nv_range *range)
54 {
55 while (range->next && range->end >= range->next->bgn) {
56 struct nv_range *rnn = range->next->next;
57 assert(range->bgn <= range->next->bgn);
58 range->end = MAX2(range->end, range->next->end);
59 FREE(range->next);
60 range->next = rnn;
61 }
62 }
63
64 static boolean
65 add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range)
66 {
67 struct nv_range *range, **nextp = &val->livei;
68
69 for (range = val->livei; range; range = range->next) {
70 if (end < range->bgn)
71 break; /* insert before */
72
73 if (bgn > range->end) {
74 nextp = &range->next;
75 continue; /* insert after */
76 }
77
78 /* overlap */
79 if (bgn < range->bgn) {
80 range->bgn = bgn;
81 if (end > range->end)
82 range->end = end;
83 ranges_coalesce(range);
84 return TRUE;
85 }
86 if (end > range->end) {
87 range->end = end;
88 ranges_coalesce(range);
89 return TRUE;
90 }
91 assert(bgn >= range->bgn);
92 assert(end <= range->end);
93 return TRUE;
94 }
95
96 if (!new_range)
97 new_range = CALLOC_STRUCT(nv_range);
98
99 new_range->bgn = bgn;
100 new_range->end = end;
101 new_range->next = range;
102 *(nextp) = new_range;
103 return FALSE;
104 }
105
106 static void
107 add_range(struct nv_value *val, struct nv_basic_block *b, int end)
108 {
109 int bgn;
110
111 if (!val->insn) /* ignore non-def values */
112 return;
113 assert(b->entry->serial <= b->exit->serial);
114 assert(b->phi->serial <= end);
115 assert(b->exit->serial + 1 >= end);
116
117 bgn = val->insn->serial;
118 if (bgn < b->entry->serial || bgn > b->exit->serial)
119 bgn = b->entry->serial;
120
121 assert(bgn <= end);
122
123 add_range_ex(val, bgn, end, NULL);
124 }
125
126 #if defined(NV50_RA_DEBUG_JOIN) || defined(NV50_RA_DEBUG_LIVEI)
127 static void
128 livei_print(struct nv_value *a)
129 {
130 struct nv_range *r = a->livei;
131
132 debug_printf("livei %i: ", a->n);
133 while (r) {
134 debug_printf("[%i, %i) ", r->bgn, r->end);
135 r = r->next;
136 }
137 debug_printf("\n");
138 }
139 #endif
140
141 static void
142 livei_unify(struct nv_value *dst, struct nv_value *src)
143 {
144 struct nv_range *range, *next;
145
146 for (range = src->livei; range; range = next) {
147 next = range->next;
148 if (add_range_ex(dst, range->bgn, range->end, range))
149 FREE(range);
150 }
151 src->livei = NULL;
152 }
153
154 static void
155 livei_release(struct nv_value *val)
156 {
157 struct nv_range *range, *next;
158
159 for (range = val->livei; range; range = next) {
160 next = range->next;
161 FREE(range);
162 }
163 }
164
165 static boolean
166 livei_have_overlap(struct nv_value *a, struct nv_value *b)
167 {
168 struct nv_range *r_a, *r_b;
169
170 for (r_a = a->livei; r_a; r_a = r_a->next) {
171 for (r_b = b->livei; r_b; r_b = r_b->next) {
172 if (r_b->bgn < r_a->end &&
173 r_b->end > r_a->bgn)
174 return TRUE;
175 }
176 }
177 return FALSE;
178 }
179
180 static int
181 livei_end(struct nv_value *a)
182 {
183 struct nv_range *r = a->livei;
184
185 assert(r);
186 while (r->next)
187 r = r->next;
188 return r->end;
189 }
190
191 static boolean
192 livei_contains(struct nv_value *a, int pos)
193 {
194 struct nv_range *r;
195
196 for (r = a->livei; r && r->bgn <= pos; r = r->next)
197 if (r->end > pos)
198 return TRUE;
199 return FALSE;
200 }
201
202 static boolean
203 reg_assign(struct register_set *set, struct nv_value **def, int n)
204 {
205 int i, id, s;
206 uint m;
207 int f = def[0]->reg.file;
208
209 s = n << (nv_type_order(def[0]->reg.type) - 1);
210 m = (1 << s) - 1;
211
212 id = set->last[f];
213
214 for (i = 0; i * 32 < set->last[f]; ++i) {
215 if (set->bits[f][i] == 0xffffffff)
216 continue;
217
218 for (id = 0; id < 32; id += s)
219 if (!(set->bits[f][i] & (m << id)))
220 break;
221 if (id < 32)
222 break;
223 }
224 if (i * 32 + id > set->last[f])
225 return FALSE;
226
227 set->bits[f][i] |= m << id;
228
229 id += i * 32;
230
231 set->pc->max_reg[f] = MAX2(set->pc->max_reg[f], id + s - 1);
232
233 id >>= nv_type_order(def[0]->reg.type) - 1;
234
235 for (i = 0; i < n; ++i)
236 if (def[i]->livei)
237 def[i]->reg.id = id++;
238
239 return TRUE;
240 }
241
242 static INLINE void
243 reg_occupy(struct register_set *set, struct nv_value *val)
244 {
245 int s, id = val->reg.id, f = val->reg.file;
246 uint m;
247
248 if (id < 0)
249 return;
250 s = nv_type_order(val->reg.type) - 1;
251 id <<= s;
252 m = (1 << (1 << s)) - 1;
253
254 set->bits[f][id / 32] |= m << (id % 32);
255
256 if (set->pc->max_reg[f] < id)
257 set->pc->max_reg[f] = id;
258 }
259
260 static INLINE void
261 reg_release(struct register_set *set, struct nv_value *val)
262 {
263 int s, id = val->reg.id, f = val->reg.file;
264 uint m;
265
266 if (id < 0)
267 return;
268
269 s = nv_type_order(val->reg.type) - 1;
270 id <<= s;
271 m = (1 << (1 << s)) - 1;
272
273 set->bits[f][id / 32] &= ~(m << (id % 32));
274 }
275
276 static INLINE boolean
277 join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
278 {
279 int i;
280 struct nv_value *val;
281
282 if (a->reg.file != b->reg.file ||
283 nv_type_sizeof(a->reg.type) != nv_type_sizeof(b->reg.type))
284 return FALSE;
285
286 if (a->join->reg.id == b->join->reg.id)
287 return TRUE;
288
289 #if 1
290 /* either a or b or both have been assigned */
291
292 if (a->join->reg.id >= 0 && b->join->reg.id >= 0)
293 return FALSE;
294 else
295 if (b->join->reg.id >= 0) {
296 if (a->join->reg.id >= 0)
297 return FALSE;
298 val = a;
299 a = b;
300 b = val;
301 }
302
303 for (i = 0; i < ctx->pc->num_values; ++i) {
304 val = &ctx->pc->values[i];
305
306 if (val->join->reg.id != a->join->reg.id)
307 continue;
308 if (val->join != a->join && livei_have_overlap(val->join, b->join))
309 return FALSE;
310 }
311 return TRUE;
312 #endif
313 return FALSE;
314 }
315
316 static INLINE void
317 do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
318 {
319 int j;
320 struct nv_value *bjoin = b->join;
321
322 if (b->join->reg.id >= 0)
323 a->join->reg.id = b->join->reg.id;
324
325 livei_unify(a->join, b->join);
326
327 #ifdef NV50_RA_DEBUG_JOIN
328 debug_printf("joining %i to %i\n", b->n, a->n);
329 #endif
330
331 /* make a->join the new representative */
332 for (j = 0; j < ctx->pc->num_values; ++j)
333 if (ctx->pc->values[j].join == bjoin)
334 ctx->pc->values[j].join = a->join;
335
336 assert(b->join == a->join);
337 }
338
339 static INLINE void
340 try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
341 {
342 if (!join_allowed(ctx, a, b)) {
343 #ifdef NV50_RA_DEBUG_JOIN
344 debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n);
345 #endif
346 return;
347 }
348 if (livei_have_overlap(a->join, b->join)) {
349 #ifdef NV50_RA_DEBUG_JOIN
350 debug_printf("cannot join %i to %i: livei overlap\n", b->n, a->n);
351 livei_print(a);
352 livei_print(b);
353 #endif
354 return;
355 }
356
357 do_join_values(ctx, a, b);
358 }
359
360 static INLINE boolean
361 need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p)
362 {
363 int i = 0, n = 0;
364
365 for (; i < 2; ++i)
366 if (p->out[i] && !IS_LOOP_EDGE(p->out_kind[i]))
367 ++n;
368
369 return (b->num_in > 1) && (n == 2);
370 }
371
372 static int
373 phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b,
374 struct nv_basic_block *tb)
375 {
376 int i, j;
377
378 for (j = -1, i = 0; i < 4 && phi->src[i]; ++i) {
379 if (!nvbb_reachable_by(b, phi->src[i]->value->insn->bb, tb))
380 continue;
381 /* NOTE: back-edges are ignored by the reachable-by check */
382 if (j < 0 || !nvbb_reachable_by(phi->src[j]->value->insn->bb,
383 phi->src[i]->value->insn->bb, tb))
384 j = i;
385 }
386 return j;
387 }
388
389 /* For each operand of each PHI in b, generate a new value by inserting a MOV
390 * at the end of the block it is coming from and replace the operand with its
391 * result. This eliminates liveness conflicts and enables us to let values be
392 * copied to the right register if such a conflict exists nonetheless.
393 *
394 * These MOVs are also crucial in making sure the live intervals of phi srces
395 * are extended until the end of the loop, since they are not included in the
396 * live-in sets.
397 */
398 static int
399 pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
400 {
401 struct nv_instruction *i, *ni;
402 struct nv_value *val;
403 struct nv_basic_block *p, *pn;
404 int n, j;
405
406 b->pass_seq = ctx->pc->pass_seq;
407
408 for (n = 0; n < b->num_in; ++n) {
409 p = pn = b->in[n];
410 assert(p);
411
412 if (need_new_else_block(b, p)) {
413 pn = new_basic_block(ctx->pc);
414
415 if (p->out[0] == b)
416 p->out[0] = pn;
417 else
418 p->out[1] = pn;
419
420 if (p->exit->target == b) /* target to new else-block */
421 p->exit->target = pn;
422
423 b->in[n] = pn;
424
425 pn->out[0] = b;
426 pn->in[0] = p;
427 pn->num_in = 1;
428 }
429 ctx->pc->current_block = pn;
430
431 for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) {
432 if ((j = phi_opnd_for_bb(i, p, b)) < 0)
433 continue;
434 val = i->src[j]->value;
435
436 if (i->src[j]->flags) {
437 val = val->insn->src[0]->value;
438 while (j < 4 && i->src[j])
439 ++j;
440 assert(j < 4);
441 }
442
443 ni = new_instruction(ctx->pc, NV_OP_MOV);
444
445 /* TODO: insert instruction at correct position in the first place */
446 if (ni->prev && ni->prev->target)
447 nv_nvi_permute(ni->prev, ni);
448
449 ni->def[0] = new_value(ctx->pc, val->reg.file, val->reg.type);
450 ni->def[0]->insn = ni;
451 ni->src[0] = new_ref(ctx->pc, val);
452
453 nv_reference(ctx->pc, &i->src[j], ni->def[0]);
454
455 i->src[j]->flags = 1;
456 }
457
458 if (pn != p && pn->exit) {
459 ctx->pc->current_block = b->in[n ? 0 : 1];
460 ni = new_instruction(ctx->pc, NV_OP_BRA);
461 ni->target = b;
462 ni->is_terminator = 1;
463 }
464 }
465
466 for (j = 0; j < 2; ++j)
467 if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq)
468 pass_generate_phi_movs(ctx, b->out[j]);
469
470 return 0;
471 }
472
473 static int
474 pass_join_values(struct nv_pc_pass *ctx, int iter)
475 {
476 int c, n;
477
478 for (n = 0; n < ctx->num_insns; ++n) {
479 struct nv_instruction *i = ctx->insns[n];
480
481 switch (i->opcode) {
482 case NV_OP_PHI:
483 if (iter != 2)
484 break;
485 for (c = 0; c < 4 && i->src[c]; ++c)
486 try_join_values(ctx, i->def[0], i->src[c]->value);
487 break;
488 case NV_OP_MOV:
489 if ((iter == 2) && i->src[0]->value->insn &&
490 !nv_is_vector_op(i->src[0]->value->join->insn->opcode))
491 try_join_values(ctx, i->def[0], i->src[0]->value);
492 break;
493 case NV_OP_SELECT:
494 if (iter != 1)
495 break;
496 for (c = 0; c < 4 && i->src[c]; ++c) {
497 assert(join_allowed(ctx, i->def[0], i->src[c]->value));
498 do_join_values(ctx, i->def[0], i->src[c]->value);
499 }
500 break;
501 case NV_OP_TEX:
502 case NV_OP_TXB:
503 case NV_OP_TXL:
504 case NV_OP_TXQ:
505 if (iter)
506 break;
507 for (c = 0; c < 4; ++c) {
508 if (!i->src[c])
509 break;
510 do_join_values(ctx, i->def[c], i->src[c]->value);
511 }
512 break;
513 default:
514 break;
515 }
516 }
517 return 0;
518 }
519
520 /* Order the instructions so that live intervals can be expressed in numbers. */
521 static void
522 pass_order_instructions(void *priv, struct nv_basic_block *b)
523 {
524 struct nv_pc_pass *ctx = (struct nv_pc_pass *)priv;
525 struct nv_instruction *i;
526
527 b->pass_seq = ctx->pc->pass_seq;
528
529 assert(!b->exit || !b->exit->next);
530 for (i = b->phi; i; i = i->next) {
531 i->serial = ctx->num_insns;
532 ctx->insns[ctx->num_insns++] = i;
533 }
534 }
535
536 static void
537 bb_live_set_print(struct nv_pc *pc, struct nv_basic_block *b)
538 {
539 #ifdef NV50_RA_DEBUG_LIVE_SETS
540 int j;
541 struct nv_value *val;
542
543 debug_printf("LIVE-INs of BB:%i: ", b->id);
544
545 for (j = 0; j < pc->num_values; ++j) {
546 if (!(b->live_set[j / 32] & (1 << (j % 32))))
547 continue;
548 val = &pc->values[j];
549 if (!val->insn)
550 continue;
551 debug_printf("%i ", val->n);
552 }
553 debug_printf("\n");
554 #endif
555 }
556
557 static INLINE void
558 live_set_add(struct nv_basic_block *b, struct nv_value *val)
559 {
560 if (!val->insn) /* don't add non-def values */
561 return;
562 b->live_set[val->n / 32] |= 1 << (val->n % 32);
563 }
564
565 static INLINE void
566 live_set_rem(struct nv_basic_block *b, struct nv_value *val)
567 {
568 b->live_set[val->n / 32] &= ~(1 << (val->n % 32));
569 }
570
571 static INLINE boolean
572 live_set_test(struct nv_basic_block *b, struct nv_ref *ref)
573 {
574 int n = ref->value->n;
575 return b->live_set[n / 32] & (1 << (n % 32));
576 }
577
578 /* The live set of a block contains those values that are live immediately
579 * before the beginning of the block, so do a backwards scan.
580 */
581 static int
582 pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b)
583 {
584 struct nv_instruction *i;
585 int j, n, ret = 0;
586
587 if (b->pass_seq >= ctx->pc->pass_seq)
588 return 0;
589 b->pass_seq = ctx->pc->pass_seq;
590
591 /* slight hack for undecidedness: set phi = entry if it's undefined */
592 if (!b->phi)
593 b->phi = b->entry;
594
595 for (n = 0; n < 2; ++n) {
596 if (!b->out[n] || b->out[n] == b)
597 continue;
598 ret = pass_build_live_sets(ctx, b->out[n]);
599 if (ret)
600 return ret;
601
602 if (n == 0) {
603 for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
604 b->live_set[j] = b->out[n]->live_set[j];
605 } else {
606 for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
607 b->live_set[j] |= b->out[n]->live_set[j];
608 }
609 }
610
611 if (!b->entry)
612 return 0;
613
614 bb_live_set_print(ctx->pc, b);
615
616 for (i = b->exit; i != b->entry->prev; i = i->prev) {
617 for (j = 0; j < 4; j++) {
618 if (!i->def[j])
619 break;
620 live_set_rem(b, i->def[j]);
621 }
622 for (j = 0; j < 4; j++) {
623 if (!i->src[j])
624 break;
625 live_set_add(b, i->src[j]->value);
626 }
627 if (i->src[4])
628 live_set_add(b, i->src[4]->value);
629 if (i->flags_def)
630 live_set_rem(b, i->flags_def);
631 if (i->flags_src)
632 live_set_add(b, i->flags_src->value);
633 }
634 for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next)
635 live_set_rem(b, i->def[0]);
636
637 bb_live_set_print(ctx->pc, b);
638
639 return 0;
640 }
641
642 static void collect_live_values(struct nv_basic_block *b, const int n)
643 {
644 int i;
645
646 if (b->out[0]) {
647 if (b->out[1]) { /* what to do about back-edges ? */
648 for (i = 0; i < n; ++i)
649 b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i];
650 } else {
651 memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t));
652 }
653 } else
654 if (b->out[1]) {
655 memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t));
656 } else {
657 memset(b->live_set, 0, n * sizeof(uint32_t));
658 }
659 }
660
661 /* NOTE: the live intervals of phi functions start at the first non-phi insn. */
662 static int
663 pass_build_intervals(struct nv_pc_pass *ctx, struct nv_basic_block *b)
664 {
665 struct nv_instruction *i, *i_stop;
666 int j, s;
667 const int n = (ctx->pc->num_values + 31) / 32;
668
669 /* verify that first block does not have live-in values */
670 if (b->num_in == 0)
671 for (j = 0; j < n; ++j)
672 assert(b->live_set[j] == 0);
673
674 collect_live_values(b, n);
675
676 /* remove live-outs def'd in a parallel block, hopefully they're all phi'd */
677 for (j = 0; j < 2; ++j) {
678 if (!b->out[j] || !b->out[j]->phi)
679 continue;
680 for (i = b->out[j]->phi; i->opcode == NV_OP_PHI; i = i->next) {
681 live_set_rem(b, i->def[0]);
682
683 for (s = 0; s < 4; ++s) {
684 if (!i->src[s])
685 break;
686 assert(i->src[s]->value->insn);
687 if (nvbb_reachable_by(b, i->src[s]->value->insn->bb, b->out[j]))
688 live_set_add(b, i->src[s]->value);
689 else
690 live_set_rem(b, i->src[s]->value);
691 }
692 }
693 }
694
695 /* remaining live-outs are live until the end */
696 if (b->exit) {
697 for (j = 0; j < ctx->pc->num_values; ++j) {
698 if (!(b->live_set[j / 32] & (1 << (j % 32))))
699 continue;
700 add_range(&ctx->pc->values[j], b, b->exit->serial + 1);
701 #ifdef NV50_RA_DEBUG_LIVEI
702 debug_printf("adding range for live value %i: ", j);
703 livei_print(&ctx->pc->values[j]);
704 #endif
705
706 }
707 }
708
709 i_stop = b->entry ? b->entry->prev : NULL;
710
711 /* don't have to include phi functions here (will have 0 live range) */
712 for (i = b->exit; i != i_stop; i = i->prev) {
713 assert(i->serial >= b->phi->serial && i->serial <= b->exit->serial);
714 for (j = 0; j < 4; ++j) {
715 if (i->def[j])
716 live_set_rem(b, i->def[j]);
717 }
718 if (i->flags_def)
719 live_set_rem(b, i->flags_def);
720
721 for (j = 0; j < 5; ++j) {
722 if (i->src[j] && !live_set_test(b, i->src[j])) {
723 live_set_add(b, i->src[j]->value);
724 add_range(i->src[j]->value, b, i->serial);
725 #ifdef NV50_RA_DEBUG_LIVEI
726 debug_printf("adding range for source %i (ends living): ",
727 i->src[j]->value->n);
728 livei_print(i->src[j]->value);
729 #endif
730 }
731 }
732 if (i->flags_src && !live_set_test(b, i->flags_src)) {
733 live_set_add(b, i->flags_src->value);
734 add_range(i->flags_src->value, b, i->serial);
735 #ifdef NV50_RA_DEBUG_LIVEI
736 debug_printf("adding range for source %i (ends living): ",
737 i->flags_src->value->n);
738 livei_print(i->flags_src->value);
739 #endif
740 }
741 }
742
743 b->pass_seq = ctx->pc->pass_seq;
744
745 if (b->out[0] && b->out[0]->pass_seq < ctx->pc->pass_seq)
746 pass_build_intervals(ctx, b->out[0]);
747
748 if (b->out[1] && b->out[1]->pass_seq < ctx->pc->pass_seq)
749 pass_build_intervals(ctx, b->out[1]);
750
751 return 0;
752 }
753
754 static INLINE void
755 nv50_ctor_register_set(struct nv_pc *pc, struct register_set *set)
756 {
757 memset(set, 0, sizeof(*set));
758
759 set->last[NV_FILE_GPR] = 255;
760 set->last[NV_FILE_OUT] = 127;
761 set->last[NV_FILE_FLAGS] = 4;
762 set->last[NV_FILE_ADDR] = 4;
763
764 set->pc = pc;
765 }
766
767 static void
768 insert_ordered_tail(struct nv_value *list, struct nv_value *nval)
769 {
770 struct nv_value *elem = list->prev;
771
772 for (elem = list->prev;
773 elem != list && elem->livei->bgn > nval->livei->bgn;
774 elem = elem->prev);
775 /* now elem begins before or at the same time as val */
776
777 nval->prev = elem;
778 nval->next = elem->next;
779 elem->next->prev = nval;
780 elem->next = nval;
781 }
782
783 static int
784 pass_linear_scan(struct nv_pc_pass *ctx, int iter)
785 {
786 struct nv_instruction *i;
787 struct register_set f, free;
788 int k, n;
789 struct nv_value *cur, *val, *tmp[2];
790 struct nv_value active, inactive, handled, unhandled;
791
792 make_empty_list(&active);
793 make_empty_list(&inactive);
794 make_empty_list(&handled);
795 make_empty_list(&unhandled);
796
797 nv50_ctor_register_set(ctx->pc, &free);
798
799 /* joined values should have range = NULL and thus not be added;
800 * also, fixed memory values won't be added because they're not
801 * def'd, just used
802 */
803 for (n = 0; n < ctx->num_insns; ++n) {
804 i = ctx->insns[n];
805
806 for (k = 0; k < 4; ++k) {
807 if (i->def[k] && i->def[k]->livei)
808 insert_ordered_tail(&unhandled, i->def[k]);
809 else
810 if (0 && i->def[k])
811 debug_printf("skipping def'd value %i: no livei\n", i->def[k]->n);
812 }
813 if (i->flags_def && i->flags_def->livei)
814 insert_ordered_tail(&unhandled, i->flags_def);
815 }
816
817 for (val = unhandled.next; val != unhandled.prev; val = val->next) {
818 assert(val->join == val);
819 assert(val->livei->bgn <= val->next->livei->bgn);
820 }
821
822 foreach_s(cur, tmp[0], &unhandled) {
823 remove_from_list(cur);
824
825 foreach_s(val, tmp[1], &active) {
826 if (livei_end(val) <= cur->livei->bgn) {
827 reg_release(&free, val);
828 move_to_head(&handled, val);
829 } else
830 if (!livei_contains(val, cur->livei->bgn)) {
831 reg_release(&free, val);
832 move_to_head(&inactive, val);
833 }
834 }
835
836 foreach_s(val, tmp[1], &inactive) {
837 if (livei_end(val) <= cur->livei->bgn)
838 move_to_head(&handled, val);
839 else
840 if (livei_contains(val, cur->livei->bgn)) {
841 reg_occupy(&free, val);
842 move_to_head(&active, val);
843 }
844 }
845
846 f = free;
847
848 foreach(val, &inactive)
849 if (livei_have_overlap(val, cur))
850 reg_occupy(&f, val);
851
852 foreach(val, &unhandled)
853 if (val->reg.id >= 0 && livei_have_overlap(val, cur))
854 reg_occupy(&f, val);
855
856 if (cur->reg.id < 0) {
857 boolean mem = FALSE;
858
859 if (nv_is_vector_op(cur->insn->opcode))
860 mem = !reg_assign(&f, &cur->insn->def[0], 4);
861 else
862 if (iter)
863 mem = !reg_assign(&f, &cur, 1);
864
865 if (mem) {
866 NOUVEAU_ERR("out of registers\n");
867 abort();
868 }
869 }
870 insert_at_head(&active, cur);
871 reg_occupy(&free, cur);
872 }
873
874 return 0;
875 }
876
877 int
878 nv_pc_exec_pass1(struct nv_pc *pc)
879 {
880 struct nv_pc_pass *ctx;
881 int i, ret;
882
883 NV50_DBGMSG("REGISTER ALLOCATION - entering\n");
884
885 ctx = CALLOC_STRUCT(nv_pc_pass);
886 if (!ctx)
887 return -1;
888 ctx->pc = pc;
889
890 ctx->insns = CALLOC(NV_PC_MAX_INSTRUCTIONS, sizeof(struct nv_instruction *));
891
892 pc->pass_seq++;
893 ret = pass_generate_phi_movs(ctx, pc->root);
894 assert(!ret);
895
896 for (i = 0; i < pc->loop_nesting_bound; ++i) {
897 pc->pass_seq++;
898 ret = pass_build_live_sets(ctx, pc->root);
899 assert(!ret && "live sets");
900 if (ret) {
901 NOUVEAU_ERR("failed to build live sets (iteration %d)\n", i);
902 goto out;
903 }
904 }
905
906 pc->pass_seq++;
907 nv_pc_pass_in_order(pc->root, pass_order_instructions, ctx);
908
909 pc->pass_seq++;
910 ret = pass_build_intervals(ctx, pc->root);
911 assert(!ret && "build intervals");
912 if (ret) {
913 NOUVEAU_ERR("failed to build live intervals\n");
914 goto out;
915 }
916
917 #ifdef NV50_RA_DEBUG_LIVEI
918 for (i = 0; i < pc->num_values; ++i)
919 livei_print(&pc->values[i]);
920 #endif
921
922 ret = pass_join_values(ctx, 0);
923 if (ret)
924 goto out;
925 ret = pass_linear_scan(ctx, 0);
926 if (ret)
927 goto out;
928 ret = pass_join_values(ctx, 1);
929 if (ret)
930 goto out;
931 ret = pass_join_values(ctx, 2);
932 if (ret)
933 goto out;
934 ret = pass_linear_scan(ctx, 1);
935 if (ret)
936 goto out;
937
938 for (i = 0; i < pc->num_values; ++i)
939 livei_release(&pc->values[i]);
940
941 NV50_DBGMSG("REGISTER ALLOCATION - leaving\n");
942
943 out:
944 FREE(ctx);
945 return ret;
946 }