r600g: add L8A8 SRGB formats.
[mesa.git] / src / gallium / drivers / nvc0 / nvc0_pc_regalloc.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #define NOUVEAU_DEBUG 1
24
25 /* #define NVC0_RA_DEBUG_LIVEI */
26 /* #define NVC0_RA_DEBUG_LIVE_SETS */
27 /* #define NVC0_RA_DEBUG_JOIN */
28
29 #include "nvc0_pc.h"
30 #include "util/u_simple_list.h"
31
32 #define NVC0_NUM_REGISTER_FILES 3
33
34 /* @unit_shift: log2 of min allocation unit for register */
35 struct register_set {
36 uint32_t bits[NVC0_NUM_REGISTER_FILES][2];
37 uint32_t last[NVC0_NUM_REGISTER_FILES];
38 int log2_unit[NVC0_NUM_REGISTER_FILES];
39 struct nv_pc *pc;
40 };
41
42 struct nv_pc_pass {
43 struct nv_pc *pc;
44 struct nv_instruction **insns;
45 uint num_insns;
46 uint pass_seq;
47 };
48
49 static void
50 ranges_coalesce(struct nv_range *range)
51 {
52 while (range->next && range->end >= range->next->bgn) {
53 struct nv_range *rnn = range->next->next;
54 assert(range->bgn <= range->next->bgn);
55 range->end = MAX2(range->end, range->next->end);
56 FREE(range->next);
57 range->next = rnn;
58 }
59 }
60
61 static boolean
62 add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range)
63 {
64 struct nv_range *range, **nextp = &val->livei;
65
66 for (range = val->livei; range; range = range->next) {
67 if (end < range->bgn)
68 break; /* insert before */
69
70 if (bgn > range->end) {
71 nextp = &range->next;
72 continue; /* insert after */
73 }
74
75 /* overlap */
76 if (bgn < range->bgn) {
77 range->bgn = bgn;
78 if (end > range->end)
79 range->end = end;
80 ranges_coalesce(range);
81 return TRUE;
82 }
83 if (end > range->end) {
84 range->end = end;
85 ranges_coalesce(range);
86 return TRUE;
87 }
88 assert(bgn >= range->bgn);
89 assert(end <= range->end);
90 return TRUE;
91 }
92
93 if (!new_range)
94 new_range = CALLOC_STRUCT(nv_range);
95
96 new_range->bgn = bgn;
97 new_range->end = end;
98 new_range->next = range;
99 *(nextp) = new_range;
100 return FALSE;
101 }
102
103 static void
104 add_range(struct nv_value *val, struct nv_basic_block *b, int end)
105 {
106 int bgn;
107
108 if (!val->insn) /* ignore non-def values */
109 return;
110 assert(b->entry->serial <= b->exit->serial);
111 assert(b->phi->serial <= end);
112 assert(b->exit->serial + 1 >= end);
113
114 bgn = val->insn->serial;
115 if (bgn < b->entry->serial || bgn > b->exit->serial)
116 bgn = b->entry->serial;
117
118 assert(bgn <= end);
119
120 add_range_ex(val, bgn, end, NULL);
121 }
122
123 #if defined(NVC0_RA_DEBUG_JOIN) || defined(NVC0_RA_DEBUG_LIVEI)
124 static void
125 livei_print(struct nv_value *a)
126 {
127 struct nv_range *r = a->livei;
128
129 debug_printf("livei %i: ", a->n);
130 while (r) {
131 debug_printf("[%i, %i) ", r->bgn, r->end);
132 r = r->next;
133 }
134 debug_printf("\n");
135 }
136 #endif
137
138 static void
139 livei_unify(struct nv_value *dst, struct nv_value *src)
140 {
141 struct nv_range *range, *next;
142
143 for (range = src->livei; range; range = next) {
144 next = range->next;
145 if (add_range_ex(dst, range->bgn, range->end, range))
146 FREE(range);
147 }
148 src->livei = NULL;
149 }
150
151 static void
152 livei_release(struct nv_value *val)
153 {
154 struct nv_range *range, *next;
155
156 for (range = val->livei; range; range = next) {
157 next = range->next;
158 FREE(range);
159 }
160 }
161
162 static boolean
163 livei_have_overlap(struct nv_value *a, struct nv_value *b)
164 {
165 struct nv_range *r_a, *r_b;
166
167 for (r_a = a->livei; r_a; r_a = r_a->next) {
168 for (r_b = b->livei; r_b; r_b = r_b->next) {
169 if (r_b->bgn < r_a->end &&
170 r_b->end > r_a->bgn)
171 return TRUE;
172 }
173 }
174 return FALSE;
175 }
176
177 static int
178 livei_end(struct nv_value *a)
179 {
180 struct nv_range *r = a->livei;
181
182 assert(r);
183 while (r->next)
184 r = r->next;
185 return r->end;
186 }
187
188 static boolean
189 livei_contains(struct nv_value *a, int pos)
190 {
191 struct nv_range *r;
192
193 for (r = a->livei; r && r->bgn <= pos; r = r->next)
194 if (r->end > pos)
195 return TRUE;
196 return FALSE;
197 }
198
199 static boolean
200 reg_assign(struct register_set *set, struct nv_value **def, int n)
201 {
202 int i, id, s, k;
203 uint32_t m;
204 int f = def[0]->reg.file;
205
206 k = n;
207 if (k == 3)
208 k = 4;
209 s = (k * def[0]->reg.size) >> set->log2_unit[f];
210 m = (1 << s) - 1;
211
212 id = set->last[f];
213
214 for (i = 0; i * 32 < set->last[f]; ++i) {
215 if (set->bits[f][i] == 0xffffffff)
216 continue;
217
218 for (id = 0; id < 32; id += s)
219 if (!(set->bits[f][i] & (m << id)))
220 break;
221 if (id < 32)
222 break;
223 }
224 if (i * 32 + id > set->last[f])
225 return FALSE;
226
227 set->bits[f][i] |= m << id;
228
229 id += i * 32;
230
231 set->pc->max_reg[f] = MAX2(set->pc->max_reg[f], id + s - 1);
232
233 for (i = 0; i < n; ++i)
234 if (def[i]->livei)
235 def[i]->reg.id = id++;
236
237 return TRUE;
238 }
239
240 static INLINE void
241 reg_occupy(struct register_set *set, struct nv_value *val)
242 {
243 int id = val->reg.id, f = val->reg.file;
244 uint32_t m;
245
246 if (id < 0)
247 return;
248 m = (1 << (val->reg.size >> set->log2_unit[f])) - 1;
249
250 set->bits[f][id / 32] |= m << (id % 32);
251
252 if (set->pc->max_reg[f] < id)
253 set->pc->max_reg[f] = id;
254 }
255
256 static INLINE void
257 reg_release(struct register_set *set, struct nv_value *val)
258 {
259 int id = val->reg.id, f = val->reg.file;
260 uint32_t m;
261
262 if (id < 0)
263 return;
264 m = (1 << (val->reg.size >> set->log2_unit[f])) - 1;
265
266 set->bits[f][id / 32] &= ~(m << (id % 32));
267 }
268
269 static INLINE boolean
270 join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
271 {
272 int i;
273 struct nv_value *val;
274
275 if (a->reg.file != b->reg.file || a->reg.size != b->reg.size)
276 return FALSE;
277
278 if (a->join->reg.id == b->join->reg.id)
279 return TRUE;
280
281 /* either a or b or both have been assigned */
282
283 if (a->join->reg.id >= 0 && b->join->reg.id >= 0)
284 return FALSE;
285 else
286 if (b->join->reg.id >= 0) {
287 if (b->join->reg.id == 63)
288 return FALSE;
289 val = a;
290 a = b;
291 b = val;
292 } else
293 if (a->join->reg.id == 63)
294 return FALSE;
295
296 for (i = 0; i < ctx->pc->num_values; ++i) {
297 val = &ctx->pc->values[i];
298
299 if (val->join->reg.id != a->join->reg.id)
300 continue;
301 if (val->join != a->join && livei_have_overlap(val->join, b->join))
302 return FALSE;
303 }
304 return TRUE;
305 }
306
307 static INLINE void
308 do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
309 {
310 int j;
311 struct nv_value *bjoin = b->join;
312
313 if (b->join->reg.id >= 0)
314 a->join->reg.id = b->join->reg.id;
315
316 livei_unify(a->join, b->join);
317
318 #ifdef NVC0_RA_DEBUG_JOIN
319 debug_printf("joining %i to %i\n", b->n, a->n);
320 #endif
321
322 /* make a->join the new representative */
323 for (j = 0; j < ctx->pc->num_values; ++j)
324 if (ctx->pc->values[j].join == bjoin)
325 ctx->pc->values[j].join = a->join;
326
327 assert(b->join == a->join);
328 }
329
330 static INLINE void
331 try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
332 {
333 if (!join_allowed(ctx, a, b)) {
334 #ifdef NVC0_RA_DEBUG_JOIN
335 debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n);
336 #endif
337 return;
338 }
339 if (livei_have_overlap(a->join, b->join)) {
340 #ifdef NVC0_RA_DEBUG_JOIN
341 debug_printf("cannot join %i to %i: livei overlap\n", b->n, a->n);
342 livei_print(a);
343 livei_print(b);
344 #endif
345 return;
346 }
347
348 do_join_values(ctx, a, b);
349 }
350
351 static INLINE boolean
352 need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p)
353 {
354 int i = 0, n = 0;
355
356 for (; i < 2; ++i)
357 if (p->out[i] && !IS_LOOP_EDGE(p->out_kind[i]))
358 ++n;
359
360 return (b->num_in > 1) && (n == 2);
361 }
362
363 /* Look for the @phi's operand whose definition reaches @b. */
364 static int
365 phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b,
366 struct nv_basic_block *tb)
367 {
368 struct nv_ref *srci, *srcj;
369 int i, j;
370
371 for (j = -1, i = 0; i < 6 && phi->src[i]; ++i) {
372 srci = phi->src[i];
373 /* if already replaced, check with original source first */
374 if (srci->flags & NV_REF_FLAG_REGALLOC_PRIV)
375 srci = srci->value->insn->src[0];
376 if (!nvc0_bblock_reachable_by(b, srci->value->insn->bb, NULL))
377 continue;
378 /* NOTE: back-edges are ignored by the reachable-by check */
379 if (j < 0 || !nvc0_bblock_reachable_by(srcj->value->insn->bb,
380 srci->value->insn->bb, NULL)) {
381 j = i;
382 srcj = srci;
383 }
384 }
385 if (j >= 0 && nvc0_bblock_reachable_by(b, phi->def[0]->insn->bb, NULL))
386 if (!nvc0_bblock_reachable_by(srcj->value->insn->bb,
387 phi->def[0]->insn->bb, NULL))
388 j = -1;
389 return j;
390 }
391
392 /* For each operand of each PHI in b, generate a new value by inserting a MOV
393 * at the end of the block it is coming from and replace the operand with its
394 * result. This eliminates liveness conflicts and enables us to let values be
395 * copied to the right register if such a conflict exists nonetheless.
396 *
397 * These MOVs are also crucial in making sure the live intervals of phi srces
398 * are extended until the end of the loop, since they are not included in the
399 * live-in sets.
400 */
401 static int
402 pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
403 {
404 struct nv_instruction *i, *ni;
405 struct nv_value *val;
406 struct nv_basic_block *p, *pn;
407 int n, j;
408
409 b->pass_seq = ctx->pc->pass_seq;
410
411 for (n = 0; n < b->num_in; ++n) {
412 p = pn = b->in[n];
413 assert(p);
414
415 if (need_new_else_block(b, p)) {
416 pn = new_basic_block(ctx->pc);
417
418 if (p->out[0] == b)
419 p->out[0] = pn;
420 else
421 p->out[1] = pn;
422
423 if (p->exit->target == b) /* target to new else-block */
424 p->exit->target = pn;
425
426 b->in[n] = pn;
427
428 pn->out[0] = b;
429 pn->in[0] = p;
430 pn->num_in = 1;
431 }
432 ctx->pc->current_block = pn;
433
434 for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) {
435 j = phi_opnd_for_bb(i, p, b);
436
437 if (j < 0) {
438 val = i->def[0];
439 } else {
440 val = i->src[j]->value;
441 if (i->src[j]->flags & NV_REF_FLAG_REGALLOC_PRIV) {
442 j = -1;
443 /* use original value, we already encountered & replaced it */
444 val = val->insn->src[0]->value;
445 }
446 }
447 if (j < 0) /* need an additional source ? */
448 for (j = 0; j < 6 && i->src[j] && i->src[j]->value != val; ++j);
449 assert(j < 6); /* XXX: really ugly shaders */
450
451 ni = new_instruction(ctx->pc, NV_OP_MOV);
452 if (ni->prev && ni->prev->target)
453 nvc0_insns_permute(ni->prev, ni);
454
455 ni->def[0] = new_value_like(ctx->pc, val);
456 ni->def[0]->insn = ni;
457 nv_reference(ctx->pc, ni, 0, val);
458 nv_reference(ctx->pc, i, j, ni->def[0]); /* new phi source = MOV def */
459 i->src[j]->flags |= NV_REF_FLAG_REGALLOC_PRIV;
460 }
461
462 if (pn != p && pn->exit) {
463 ctx->pc->current_block = b->in[n ? 0 : 1];
464 ni = new_instruction(ctx->pc, NV_OP_BRA);
465 ni->target = b;
466 ni->terminator = 1;
467 }
468 }
469
470 for (j = 0; j < 2; ++j)
471 if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq)
472 pass_generate_phi_movs(ctx, b->out[j]);
473
474 return 0;
475 }
476
477 static int
478 pass_join_values(struct nv_pc_pass *ctx, int iter)
479 {
480 int c, n;
481
482 for (n = 0; n < ctx->num_insns; ++n) {
483 struct nv_instruction *i = ctx->insns[n];
484
485 switch (i->opcode) {
486 case NV_OP_PHI:
487 if (iter != 2)
488 break;
489 for (c = 0; c < 6 && i->src[c]; ++c)
490 try_join_values(ctx, i->def[0], i->src[c]->value);
491 break;
492 case NV_OP_MOV:
493 if ((iter == 2) && i->src[0]->value->insn &&
494 !nv_is_vector_op(i->src[0]->value->join->insn->opcode))
495 try_join_values(ctx, i->def[0], i->src[0]->value);
496 break;
497 case NV_OP_SELECT:
498 if (iter != 1)
499 break;
500 for (c = 0; c < 6 && i->src[c]; ++c) {
501 assert(join_allowed(ctx, i->def[0], i->src[c]->value));
502 do_join_values(ctx, i->def[0], i->src[c]->value);
503 }
504 break;
505 case NV_OP_BIND:
506 if (iter)
507 break;
508 for (c = 0; c < 4 && i->src[c]; ++c)
509 do_join_values(ctx, i->def[c], i->src[c]->value);
510 break;
511 case NV_OP_TEX:
512 case NV_OP_TXB:
513 case NV_OP_TXL:
514 case NV_OP_TXQ: /* on nvc0, TEX src and dst can differ */
515 default:
516 break;
517 }
518 }
519 return 0;
520 }
521
522 /* Order the instructions so that live intervals can be expressed in numbers. */
523 static void
524 pass_order_instructions(void *priv, struct nv_basic_block *b)
525 {
526 struct nv_pc_pass *ctx = (struct nv_pc_pass *)priv;
527 struct nv_instruction *i;
528
529 b->pass_seq = ctx->pc->pass_seq;
530
531 assert(!b->exit || !b->exit->next);
532 for (i = b->phi; i; i = i->next) {
533 i->serial = ctx->num_insns;
534 ctx->insns[ctx->num_insns++] = i;
535 }
536 }
537
538 static void
539 bb_live_set_print(struct nv_pc *pc, struct nv_basic_block *b)
540 {
541 #ifdef NVC0_RA_DEBUG_LIVE_SETS
542 struct nv_value *val;
543 int j;
544
545 debug_printf("LIVE-INs of BB:%i: ", b->id);
546
547 for (j = 0; j < pc->num_values; ++j) {
548 if (!(b->live_set[j / 32] & (1 << (j % 32))))
549 continue;
550 val = &pc->values[j];
551 if (!val->insn)
552 continue;
553 debug_printf("%i ", val->n);
554 }
555 debug_printf("\n");
556 #endif
557 }
558
559 static INLINE void
560 live_set_add(struct nv_basic_block *b, struct nv_value *val)
561 {
562 if (!val->insn) /* don't add non-def values */
563 return;
564 b->live_set[val->n / 32] |= 1 << (val->n % 32);
565 }
566
567 static INLINE void
568 live_set_rem(struct nv_basic_block *b, struct nv_value *val)
569 {
570 b->live_set[val->n / 32] &= ~(1 << (val->n % 32));
571 }
572
573 static INLINE boolean
574 live_set_test(struct nv_basic_block *b, struct nv_ref *ref)
575 {
576 int n = ref->value->n;
577 return b->live_set[n / 32] & (1 << (n % 32));
578 }
579
580 /* The live set of a block contains those values that are live immediately
581 * before the beginning of the block, so do a backwards scan.
582 */
583 static int
584 pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b)
585 {
586 struct nv_instruction *i;
587 int j, n, ret = 0;
588
589 if (b->pass_seq >= ctx->pc->pass_seq)
590 return 0;
591 b->pass_seq = ctx->pc->pass_seq;
592
593 /* slight hack for undecidedness: set phi = entry if it's undefined */
594 if (!b->phi)
595 b->phi = b->entry;
596
597 for (n = 0; n < 2; ++n) {
598 if (!b->out[n] || b->out[n] == b)
599 continue;
600 ret = pass_build_live_sets(ctx, b->out[n]);
601 if (ret)
602 return ret;
603
604 if (n == 0) {
605 for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
606 b->live_set[j] = b->out[n]->live_set[j];
607 } else {
608 for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
609 b->live_set[j] |= b->out[n]->live_set[j];
610 }
611 }
612
613 if (!b->entry)
614 return 0;
615
616 bb_live_set_print(ctx->pc, b);
617
618 for (i = b->exit; i != b->entry->prev; i = i->prev) {
619 for (j = 0; j < 5 && i->def[j]; j++)
620 live_set_rem(b, i->def[j]);
621 for (j = 0; j < 6 && i->src[j]; j++)
622 live_set_add(b, i->src[j]->value);
623 }
624 for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next)
625 live_set_rem(b, i->def[0]);
626
627 bb_live_set_print(ctx->pc, b);
628
629 return 0;
630 }
631
632 static void collect_live_values(struct nv_basic_block *b, const int n)
633 {
634 int i;
635
636 /* XXX: what to do about back/fake-edges (used to include both here) ? */
637 if (b->out[0] && b->out_kind[0] != CFG_EDGE_FAKE) {
638 if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) {
639 for (i = 0; i < n; ++i)
640 b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i];
641 } else {
642 memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t));
643 }
644 } else
645 if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) {
646 memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t));
647 } else {
648 memset(b->live_set, 0, n * sizeof(uint32_t));
649 }
650 }
651
652 /* NOTE: the live intervals of phi functions start at the first non-phi insn. */
653 static int
654 pass_build_intervals(struct nv_pc_pass *ctx, struct nv_basic_block *b)
655 {
656 struct nv_instruction *i, *i_stop;
657 int j, s;
658 const int n = (ctx->pc->num_values + 31) / 32;
659
660 /* verify that first block does not have live-in values */
661 if (b->num_in == 0)
662 for (j = 0; j < n; ++j)
663 assert(b->live_set[j] == 0);
664
665 collect_live_values(b, n);
666
667 /* remove live-outs def'd in a parallel block, hopefully they're all phi'd */
668 for (j = 0; j < 2; ++j) {
669 if (!b->out[j] || !b->out[j]->phi)
670 continue;
671 for (i = b->out[j]->phi; i->opcode == NV_OP_PHI; i = i->next) {
672 live_set_rem(b, i->def[0]);
673
674 for (s = 0; s < 6 && i->src[s]; ++s) {
675 assert(i->src[s]->value->insn);
676 if (nvc0_bblock_reachable_by(b, i->src[s]->value->insn->bb,
677 b->out[j]))
678 live_set_add(b, i->src[s]->value);
679 else
680 live_set_rem(b, i->src[s]->value);
681 }
682 }
683 }
684
685 /* remaining live-outs are live until the end */
686 if (b->exit) {
687 for (j = 0; j < ctx->pc->num_values; ++j) {
688 if (!(b->live_set[j / 32] & (1 << (j % 32))))
689 continue;
690 add_range(&ctx->pc->values[j], b, b->exit->serial + 1);
691 #ifdef NVC0_RA_DEBUG_LIVEI
692 debug_printf("adding range for live value %i: ", j);
693 livei_print(&ctx->pc->values[j]);
694 #endif
695 }
696 }
697
698 i_stop = b->entry ? b->entry->prev : NULL;
699
700 /* don't have to include phi functions here (will have 0 live range) */
701 for (i = b->exit; i != i_stop; i = i->prev) {
702 assert(i->serial >= b->phi->serial && i->serial <= b->exit->serial);
703 for (j = 0; j < 4 && i->def[j]; ++j)
704 live_set_rem(b, i->def[j]);
705
706 for (j = 0; j < 6 && i->src[j]; ++j) {
707 if (!live_set_test(b, i->src[j])) {
708 live_set_add(b, i->src[j]->value);
709 add_range(i->src[j]->value, b, i->serial);
710 #ifdef NVC0_RA_DEBUG_LIVEI
711 debug_printf("adding range for source %i (ends living): ",
712 i->src[j]->value->n);
713 livei_print(i->src[j]->value);
714 #endif
715 }
716 }
717 }
718
719 b->pass_seq = ctx->pc->pass_seq;
720
721 if (b->out[0] && b->out[0]->pass_seq < ctx->pc->pass_seq)
722 pass_build_intervals(ctx, b->out[0]);
723
724 if (b->out[1] && b->out[1]->pass_seq < ctx->pc->pass_seq)
725 pass_build_intervals(ctx, b->out[1]);
726
727 return 0;
728 }
729
730 static INLINE void
731 nvc0_ctor_register_set(struct nv_pc *pc, struct register_set *set)
732 {
733 memset(set, 0, sizeof(*set));
734
735 set->last[NV_FILE_GPR] = 62;
736 set->last[NV_FILE_PRED] = 6;
737 set->last[NV_FILE_COND] = 1;
738
739 set->log2_unit[NV_FILE_GPR] = 2;
740 set->log2_unit[NV_FILE_COND] = 0;
741 set->log2_unit[NV_FILE_PRED] = 0;
742
743 set->pc = pc;
744 }
745
746 /* We allocate registers for all defs of a vector instruction at once.
747 * Since we'll encounter all of them in the allocation loop, do the allocation
748 * when we're at the one with the live range that starts latest.
749 */
750 static boolean
751 is_best_representative(struct nv_value *val)
752 {
753 struct nv_instruction *nvi = val->insn;
754 int i;
755 for (i = 0; i < 4 && val->insn->def[i]; ++i)
756 if (nvi->def[i]->livei && nvi->def[i]->livei->bgn > val->livei->bgn)
757 return FALSE;
758 return TRUE;
759 }
760
761 static void
762 insert_ordered_tail(struct nv_value *list, struct nv_value *nval)
763 {
764 struct nv_value *elem;
765
766 for (elem = list->prev;
767 elem != list && elem->livei->bgn > nval->livei->bgn;
768 elem = elem->prev);
769 /* now elem begins before or at the same time as val */
770
771 nval->prev = elem;
772 nval->next = elem->next;
773 elem->next->prev = nval;
774 elem->next = nval;
775 }
776
777 static int
778 pass_linear_scan(struct nv_pc_pass *ctx, int iter)
779 {
780 struct nv_instruction *i;
781 struct register_set f, free;
782 int k, n;
783 struct nv_value *cur, *val, *tmp[2];
784 struct nv_value active, inactive, handled, unhandled;
785
786 make_empty_list(&active);
787 make_empty_list(&inactive);
788 make_empty_list(&handled);
789 make_empty_list(&unhandled);
790
791 nvc0_ctor_register_set(ctx->pc, &free);
792
793 /* joined values should have range = NULL and thus not be added;
794 * also, fixed memory values won't be added because they're not
795 * def'd, just used
796 */
797 for (n = 0; n < ctx->num_insns; ++n) {
798 i = ctx->insns[n];
799
800 for (k = 0; k < 5; ++k) {
801 if (i->def[k] && i->def[k]->livei)
802 insert_ordered_tail(&unhandled, i->def[k]);
803 else
804 if (0 && i->def[k])
805 debug_printf("skipping def'd value %i: no livei\n", i->def[k]->n);
806 }
807 }
808
809 for (val = unhandled.next; val != unhandled.prev; val = val->next) {
810 assert(val->join == val);
811 assert(val->livei->bgn <= val->next->livei->bgn);
812 }
813
814 foreach_s(cur, tmp[0], &unhandled) {
815 remove_from_list(cur);
816
817 foreach_s(val, tmp[1], &active) {
818 if (livei_end(val) <= cur->livei->bgn) {
819 reg_release(&free, val);
820 move_to_head(&handled, val);
821 } else
822 if (!livei_contains(val, cur->livei->bgn)) {
823 reg_release(&free, val);
824 move_to_head(&inactive, val);
825 }
826 }
827
828 foreach_s(val, tmp[1], &inactive) {
829 if (livei_end(val) <= cur->livei->bgn)
830 move_to_head(&handled, val);
831 else
832 if (livei_contains(val, cur->livei->bgn)) {
833 reg_occupy(&free, val);
834 move_to_head(&active, val);
835 }
836 }
837
838 f = free;
839
840 foreach(val, &inactive)
841 if (livei_have_overlap(val, cur))
842 reg_occupy(&f, val);
843
844 foreach(val, &unhandled)
845 if (val->reg.id >= 0 && livei_have_overlap(val, cur))
846 reg_occupy(&f, val);
847
848 if (cur->reg.id < 0) {
849 boolean mem = FALSE;
850 int v = nvi_vector_size(cur->insn);
851
852 if (v > 1) {
853 if (is_best_representative(cur))
854 mem = !reg_assign(&f, &cur->insn->def[0], v);
855 } else {
856 if (iter)
857 mem = !reg_assign(&f, &cur, 1);
858 }
859
860 if (mem) {
861 NOUVEAU_ERR("out of registers\n");
862 abort();
863 }
864 }
865 insert_at_head(&active, cur);
866 reg_occupy(&free, cur);
867 }
868
869 return 0;
870 }
871
872 static int
873 nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root)
874 {
875 struct nv_pc_pass *ctx;
876 int i, ret;
877
878 NOUVEAU_DBG("REGISTER ALLOCATION - entering\n");
879
880 ctx = CALLOC_STRUCT(nv_pc_pass);
881 if (!ctx)
882 return -1;
883 ctx->pc = pc;
884
885 ctx->insns = CALLOC(NV_PC_MAX_INSTRUCTIONS, sizeof(struct nv_instruction *));
886 if (!ctx->insns) {
887 FREE(ctx);
888 return -1;
889 }
890
891 pc->pass_seq++;
892 ret = pass_generate_phi_movs(ctx, root);
893 assert(!ret);
894
895 #ifdef NVC0_RA_DEBUG_LIVEI
896 nvc0_print_function(root);
897 #endif
898
899 for (i = 0; i < pc->loop_nesting_bound; ++i) {
900 pc->pass_seq++;
901 ret = pass_build_live_sets(ctx, root);
902 assert(!ret && "live sets");
903 if (ret) {
904 NOUVEAU_ERR("failed to build live sets (iteration %d)\n", i);
905 goto out;
906 }
907 }
908
909 pc->pass_seq++;
910 nvc0_pc_pass_in_order(root, pass_order_instructions, ctx);
911
912 pc->pass_seq++;
913 ret = pass_build_intervals(ctx, root);
914 assert(!ret && "build intervals");
915 if (ret) {
916 NOUVEAU_ERR("failed to build live intervals\n");
917 goto out;
918 }
919
920 #ifdef NVC0_RA_DEBUG_LIVEI
921 for (i = 0; i < pc->num_values; ++i)
922 livei_print(&pc->values[i]);
923 #endif
924
925 ret = pass_join_values(ctx, 0);
926 if (ret)
927 goto out;
928 ret = pass_linear_scan(ctx, 0);
929 if (ret)
930 goto out;
931 ret = pass_join_values(ctx, 1);
932 if (ret)
933 goto out;
934 ret = pass_join_values(ctx, 2);
935 if (ret)
936 goto out;
937 ret = pass_linear_scan(ctx, 1);
938 if (ret)
939 goto out;
940
941 for (i = 0; i < pc->num_values; ++i)
942 livei_release(&pc->values[i]);
943
944 NOUVEAU_DBG("REGISTER ALLOCATION - leaving\n");
945
946 out:
947 FREE(ctx->insns);
948 FREE(ctx);
949 return ret;
950 }
951
952 int
953 nvc0_pc_exec_pass1(struct nv_pc *pc)
954 {
955 int i, ret;
956
957 for (i = 0; i < pc->num_subroutines + 1; ++i)
958 if (pc->root[i] && (ret = nv_pc_pass1(pc, pc->root[i])))
959 return ret;
960 return 0;
961 }