2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 #define NOUVEAU_DEBUG 1
25 /* #define NVC0_RA_DEBUG_LIVEI */
26 /* #define NVC0_RA_DEBUG_LIVE_SETS */
27 /* #define NVC0_RA_DEBUG_JOIN */
30 #include "util/u_simple_list.h"
32 #define NVC0_NUM_REGISTER_FILES 3
34 /* @unit_shift: log2 of min allocation unit for register */
36 uint32_t bits
[NVC0_NUM_REGISTER_FILES
][2];
37 uint32_t last
[NVC0_NUM_REGISTER_FILES
];
38 int log2_unit
[NVC0_NUM_REGISTER_FILES
];
44 struct nv_instruction
**insns
;
50 ranges_coalesce(struct nv_range
*range
)
52 while (range
->next
&& range
->end
>= range
->next
->bgn
) {
53 struct nv_range
*rnn
= range
->next
->next
;
54 assert(range
->bgn
<= range
->next
->bgn
);
55 range
->end
= MAX2(range
->end
, range
->next
->end
);
62 add_range_ex(struct nv_value
*val
, int bgn
, int end
, struct nv_range
*new_range
)
64 struct nv_range
*range
, **nextp
= &val
->livei
;
66 for (range
= val
->livei
; range
; range
= range
->next
) {
68 break; /* insert before */
70 if (bgn
> range
->end
) {
72 continue; /* insert after */
76 if (bgn
< range
->bgn
) {
80 ranges_coalesce(range
);
83 if (end
> range
->end
) {
85 ranges_coalesce(range
);
88 assert(bgn
>= range
->bgn
);
89 assert(end
<= range
->end
);
94 new_range
= CALLOC_STRUCT(nv_range
);
98 new_range
->next
= range
;
104 add_range(struct nv_value
*val
, struct nv_basic_block
*b
, int end
)
108 if (!val
->insn
) /* ignore non-def values */
110 assert(b
->entry
->serial
<= b
->exit
->serial
);
111 assert(b
->phi
->serial
<= end
);
112 assert(b
->exit
->serial
+ 1 >= end
);
114 bgn
= val
->insn
->serial
;
115 if (bgn
< b
->entry
->serial
|| bgn
> b
->exit
->serial
)
116 bgn
= b
->entry
->serial
;
120 add_range_ex(val
, bgn
, end
, NULL
);
123 #if defined(NVC0_RA_DEBUG_JOIN) || defined(NVC0_RA_DEBUG_LIVEI)
125 livei_print(struct nv_value
*a
)
127 struct nv_range
*r
= a
->livei
;
129 debug_printf("livei %i: ", a
->n
);
131 debug_printf("[%i, %i) ", r
->bgn
, r
->end
);
139 livei_unify(struct nv_value
*dst
, struct nv_value
*src
)
141 struct nv_range
*range
, *next
;
143 for (range
= src
->livei
; range
; range
= next
) {
145 if (add_range_ex(dst
, range
->bgn
, range
->end
, range
))
152 livei_release(struct nv_value
*val
)
154 struct nv_range
*range
, *next
;
156 for (range
= val
->livei
; range
; range
= next
) {
163 livei_have_overlap(struct nv_value
*a
, struct nv_value
*b
)
165 struct nv_range
*r_a
, *r_b
;
167 for (r_a
= a
->livei
; r_a
; r_a
= r_a
->next
) {
168 for (r_b
= b
->livei
; r_b
; r_b
= r_b
->next
) {
169 if (r_b
->bgn
< r_a
->end
&&
178 livei_end(struct nv_value
*a
)
180 struct nv_range
*r
= a
->livei
;
189 livei_contains(struct nv_value
*a
, int pos
)
193 for (r
= a
->livei
; r
&& r
->bgn
<= pos
; r
= r
->next
)
200 reg_assign(struct register_set
*set
, struct nv_value
**def
, int n
)
204 int f
= def
[0]->reg
.file
;
209 s
= (k
* def
[0]->reg
.size
) >> set
->log2_unit
[f
];
214 for (i
= 0; i
* 32 < set
->last
[f
]; ++i
) {
215 if (set
->bits
[f
][i
] == 0xffffffff)
218 for (id
= 0; id
< 32; id
+= s
)
219 if (!(set
->bits
[f
][i
] & (m
<< id
)))
224 if (i
* 32 + id
> set
->last
[f
])
227 set
->bits
[f
][i
] |= m
<< id
;
231 set
->pc
->max_reg
[f
] = MAX2(set
->pc
->max_reg
[f
], id
+ s
- 1);
233 for (i
= 0; i
< n
; ++i
)
235 def
[i
]->reg
.id
= id
++;
241 reg_occupy(struct register_set
*set
, struct nv_value
*val
)
243 int id
= val
->reg
.id
, f
= val
->reg
.file
;
248 m
= (1 << (val
->reg
.size
>> set
->log2_unit
[f
])) - 1;
250 set
->bits
[f
][id
/ 32] |= m
<< (id
% 32);
252 if (set
->pc
->max_reg
[f
] < id
)
253 set
->pc
->max_reg
[f
] = id
;
257 reg_release(struct register_set
*set
, struct nv_value
*val
)
259 int id
= val
->reg
.id
, f
= val
->reg
.file
;
264 m
= (1 << (val
->reg
.size
>> set
->log2_unit
[f
])) - 1;
266 set
->bits
[f
][id
/ 32] &= ~(m
<< (id
% 32));
269 static INLINE boolean
270 join_allowed(struct nv_pc_pass
*ctx
, struct nv_value
*a
, struct nv_value
*b
)
273 struct nv_value
*val
;
275 if (a
->reg
.file
!= b
->reg
.file
|| a
->reg
.size
!= b
->reg
.size
)
278 if (a
->join
->reg
.id
== b
->join
->reg
.id
)
281 /* either a or b or both have been assigned */
283 if (a
->join
->reg
.id
>= 0 && b
->join
->reg
.id
>= 0)
286 if (b
->join
->reg
.id
>= 0) {
287 if (b
->join
->reg
.id
== 63)
293 if (a
->join
->reg
.id
== 63)
296 for (i
= 0; i
< ctx
->pc
->num_values
; ++i
) {
297 val
= &ctx
->pc
->values
[i
];
299 if (val
->join
->reg
.id
!= a
->join
->reg
.id
)
301 if (val
->join
!= a
->join
&& livei_have_overlap(val
->join
, b
->join
))
308 do_join_values(struct nv_pc_pass
*ctx
, struct nv_value
*a
, struct nv_value
*b
)
311 struct nv_value
*bjoin
= b
->join
;
313 if (b
->join
->reg
.id
>= 0)
314 a
->join
->reg
.id
= b
->join
->reg
.id
;
316 livei_unify(a
->join
, b
->join
);
318 #ifdef NVC0_RA_DEBUG_JOIN
319 debug_printf("joining %i to %i\n", b
->n
, a
->n
);
322 /* make a->join the new representative */
323 for (j
= 0; j
< ctx
->pc
->num_values
; ++j
)
324 if (ctx
->pc
->values
[j
].join
== bjoin
)
325 ctx
->pc
->values
[j
].join
= a
->join
;
327 assert(b
->join
== a
->join
);
331 try_join_values(struct nv_pc_pass
*ctx
, struct nv_value
*a
, struct nv_value
*b
)
333 if (!join_allowed(ctx
, a
, b
)) {
334 #ifdef NVC0_RA_DEBUG_JOIN
335 debug_printf("cannot join %i to %i: not allowed\n", b
->n
, a
->n
);
339 if (livei_have_overlap(a
->join
, b
->join
)) {
340 #ifdef NVC0_RA_DEBUG_JOIN
341 debug_printf("cannot join %i to %i: livei overlap\n", b
->n
, a
->n
);
348 do_join_values(ctx
, a
, b
);
351 static INLINE boolean
352 need_new_else_block(struct nv_basic_block
*b
, struct nv_basic_block
*p
)
357 if (p
->out
[i
] && !IS_LOOP_EDGE(p
->out_kind
[i
]))
360 return (b
->num_in
> 1) && (n
== 2);
363 /* Look for the @phi's operand whose definition reaches @b. */
365 phi_opnd_for_bb(struct nv_instruction
*phi
, struct nv_basic_block
*b
,
366 struct nv_basic_block
*tb
)
368 struct nv_ref
*srci
, *srcj
;
371 for (j
= -1, i
= 0; i
< 6 && phi
->src
[i
]; ++i
) {
373 /* if already replaced, check with original source first */
374 if (srci
->flags
& NV_REF_FLAG_REGALLOC_PRIV
)
375 srci
= srci
->value
->insn
->src
[0];
376 if (!nvc0_bblock_reachable_by(b
, srci
->value
->insn
->bb
, NULL
))
378 /* NOTE: back-edges are ignored by the reachable-by check */
379 if (j
< 0 || !nvc0_bblock_reachable_by(srcj
->value
->insn
->bb
,
380 srci
->value
->insn
->bb
, NULL
)) {
385 if (j
>= 0 && nvc0_bblock_reachable_by(b
, phi
->def
[0]->insn
->bb
, NULL
))
386 if (!nvc0_bblock_reachable_by(srcj
->value
->insn
->bb
,
387 phi
->def
[0]->insn
->bb
, NULL
))
392 /* For each operand of each PHI in b, generate a new value by inserting a MOV
393 * at the end of the block it is coming from and replace the operand with its
394 * result. This eliminates liveness conflicts and enables us to let values be
395 * copied to the right register if such a conflict exists nonetheless.
397 * These MOVs are also crucial in making sure the live intervals of phi srces
398 * are extended until the end of the loop, since they are not included in the
402 pass_generate_phi_movs(struct nv_pc_pass
*ctx
, struct nv_basic_block
*b
)
404 struct nv_instruction
*i
, *ni
;
405 struct nv_value
*val
;
406 struct nv_basic_block
*p
, *pn
;
409 b
->pass_seq
= ctx
->pc
->pass_seq
;
411 for (n
= 0; n
< b
->num_in
; ++n
) {
415 if (need_new_else_block(b
, p
)) {
416 pn
= new_basic_block(ctx
->pc
);
423 if (p
->exit
->target
== b
) /* target to new else-block */
424 p
->exit
->target
= pn
;
432 ctx
->pc
->current_block
= pn
;
434 for (i
= b
->phi
; i
&& i
->opcode
== NV_OP_PHI
; i
= i
->next
) {
435 j
= phi_opnd_for_bb(i
, p
, b
);
440 val
= i
->src
[j
]->value
;
441 if (i
->src
[j
]->flags
& NV_REF_FLAG_REGALLOC_PRIV
) {
443 /* use original value, we already encountered & replaced it */
444 val
= val
->insn
->src
[0]->value
;
447 if (j
< 0) /* need an additional source ? */
448 for (j
= 0; j
< 6 && i
->src
[j
] && i
->src
[j
]->value
!= val
; ++j
);
449 assert(j
< 6); /* XXX: really ugly shaders */
451 ni
= new_instruction(ctx
->pc
, NV_OP_MOV
);
452 if (ni
->prev
&& ni
->prev
->target
)
453 nvc0_insns_permute(ni
->prev
, ni
);
455 ni
->def
[0] = new_value_like(ctx
->pc
, val
);
456 ni
->def
[0]->insn
= ni
;
457 nv_reference(ctx
->pc
, ni
, 0, val
);
458 nv_reference(ctx
->pc
, i
, j
, ni
->def
[0]); /* new phi source = MOV def */
459 i
->src
[j
]->flags
|= NV_REF_FLAG_REGALLOC_PRIV
;
462 if (pn
!= p
&& pn
->exit
) {
463 ctx
->pc
->current_block
= b
->in
[n
? 0 : 1];
464 ni
= new_instruction(ctx
->pc
, NV_OP_BRA
);
470 for (j
= 0; j
< 2; ++j
)
471 if (b
->out
[j
] && b
->out
[j
]->pass_seq
< ctx
->pc
->pass_seq
)
472 pass_generate_phi_movs(ctx
, b
->out
[j
]);
478 pass_join_values(struct nv_pc_pass
*ctx
, int iter
)
482 for (n
= 0; n
< ctx
->num_insns
; ++n
) {
483 struct nv_instruction
*i
= ctx
->insns
[n
];
489 for (c
= 0; c
< 6 && i
->src
[c
]; ++c
)
490 try_join_values(ctx
, i
->def
[0], i
->src
[c
]->value
);
493 if ((iter
== 2) && i
->src
[0]->value
->insn
&&
494 !nv_is_vector_op(i
->src
[0]->value
->join
->insn
->opcode
))
495 try_join_values(ctx
, i
->def
[0], i
->src
[0]->value
);
500 for (c
= 0; c
< 6 && i
->src
[c
]; ++c
) {
501 assert(join_allowed(ctx
, i
->def
[0], i
->src
[c
]->value
));
502 do_join_values(ctx
, i
->def
[0], i
->src
[c
]->value
);
508 for (c
= 0; c
< 4 && i
->src
[c
]; ++c
)
509 do_join_values(ctx
, i
->def
[c
], i
->src
[c
]->value
);
514 case NV_OP_TXQ
: /* on nvc0, TEX src and dst can differ */
522 /* Order the instructions so that live intervals can be expressed in numbers. */
524 pass_order_instructions(void *priv
, struct nv_basic_block
*b
)
526 struct nv_pc_pass
*ctx
= (struct nv_pc_pass
*)priv
;
527 struct nv_instruction
*i
;
529 b
->pass_seq
= ctx
->pc
->pass_seq
;
531 assert(!b
->exit
|| !b
->exit
->next
);
532 for (i
= b
->phi
; i
; i
= i
->next
) {
533 i
->serial
= ctx
->num_insns
;
534 ctx
->insns
[ctx
->num_insns
++] = i
;
539 bb_live_set_print(struct nv_pc
*pc
, struct nv_basic_block
*b
)
541 #ifdef NVC0_RA_DEBUG_LIVE_SETS
542 struct nv_value
*val
;
545 debug_printf("LIVE-INs of BB:%i: ", b
->id
);
547 for (j
= 0; j
< pc
->num_values
; ++j
) {
548 if (!(b
->live_set
[j
/ 32] & (1 << (j
% 32))))
550 val
= &pc
->values
[j
];
553 debug_printf("%i ", val
->n
);
560 live_set_add(struct nv_basic_block
*b
, struct nv_value
*val
)
562 if (!val
->insn
) /* don't add non-def values */
564 b
->live_set
[val
->n
/ 32] |= 1 << (val
->n
% 32);
568 live_set_rem(struct nv_basic_block
*b
, struct nv_value
*val
)
570 b
->live_set
[val
->n
/ 32] &= ~(1 << (val
->n
% 32));
573 static INLINE boolean
574 live_set_test(struct nv_basic_block
*b
, struct nv_ref
*ref
)
576 int n
= ref
->value
->n
;
577 return b
->live_set
[n
/ 32] & (1 << (n
% 32));
580 /* The live set of a block contains those values that are live immediately
581 * before the beginning of the block, so do a backwards scan.
584 pass_build_live_sets(struct nv_pc_pass
*ctx
, struct nv_basic_block
*b
)
586 struct nv_instruction
*i
;
589 if (b
->pass_seq
>= ctx
->pc
->pass_seq
)
591 b
->pass_seq
= ctx
->pc
->pass_seq
;
593 /* slight hack for undecidedness: set phi = entry if it's undefined */
597 for (n
= 0; n
< 2; ++n
) {
598 if (!b
->out
[n
] || b
->out
[n
] == b
)
600 ret
= pass_build_live_sets(ctx
, b
->out
[n
]);
605 for (j
= 0; j
< (ctx
->pc
->num_values
+ 31) / 32; ++j
)
606 b
->live_set
[j
] = b
->out
[n
]->live_set
[j
];
608 for (j
= 0; j
< (ctx
->pc
->num_values
+ 31) / 32; ++j
)
609 b
->live_set
[j
] |= b
->out
[n
]->live_set
[j
];
616 bb_live_set_print(ctx
->pc
, b
);
618 for (i
= b
->exit
; i
!= b
->entry
->prev
; i
= i
->prev
) {
619 for (j
= 0; j
< 5 && i
->def
[j
]; j
++)
620 live_set_rem(b
, i
->def
[j
]);
621 for (j
= 0; j
< 6 && i
->src
[j
]; j
++)
622 live_set_add(b
, i
->src
[j
]->value
);
624 for (i
= b
->phi
; i
&& i
->opcode
== NV_OP_PHI
; i
= i
->next
)
625 live_set_rem(b
, i
->def
[0]);
627 bb_live_set_print(ctx
->pc
, b
);
632 static void collect_live_values(struct nv_basic_block
*b
, const int n
)
636 /* XXX: what to do about back/fake-edges (used to include both here) ? */
637 if (b
->out
[0] && b
->out_kind
[0] != CFG_EDGE_FAKE
) {
638 if (b
->out
[1] && b
->out_kind
[1] != CFG_EDGE_FAKE
) {
639 for (i
= 0; i
< n
; ++i
)
640 b
->live_set
[i
] = b
->out
[0]->live_set
[i
] | b
->out
[1]->live_set
[i
];
642 memcpy(b
->live_set
, b
->out
[0]->live_set
, n
* sizeof(uint32_t));
645 if (b
->out
[1] && b
->out_kind
[1] != CFG_EDGE_FAKE
) {
646 memcpy(b
->live_set
, b
->out
[1]->live_set
, n
* sizeof(uint32_t));
648 memset(b
->live_set
, 0, n
* sizeof(uint32_t));
652 /* NOTE: the live intervals of phi functions start at the first non-phi insn. */
654 pass_build_intervals(struct nv_pc_pass
*ctx
, struct nv_basic_block
*b
)
656 struct nv_instruction
*i
, *i_stop
;
658 const int n
= (ctx
->pc
->num_values
+ 31) / 32;
660 /* verify that first block does not have live-in values */
662 for (j
= 0; j
< n
; ++j
)
663 assert(b
->live_set
[j
] == 0);
665 collect_live_values(b
, n
);
667 /* remove live-outs def'd in a parallel block, hopefully they're all phi'd */
668 for (j
= 0; j
< 2; ++j
) {
669 if (!b
->out
[j
] || !b
->out
[j
]->phi
)
671 for (i
= b
->out
[j
]->phi
; i
->opcode
== NV_OP_PHI
; i
= i
->next
) {
672 live_set_rem(b
, i
->def
[0]);
674 for (s
= 0; s
< 6 && i
->src
[s
]; ++s
) {
675 assert(i
->src
[s
]->value
->insn
);
676 if (nvc0_bblock_reachable_by(b
, i
->src
[s
]->value
->insn
->bb
,
678 live_set_add(b
, i
->src
[s
]->value
);
680 live_set_rem(b
, i
->src
[s
]->value
);
685 /* remaining live-outs are live until the end */
687 for (j
= 0; j
< ctx
->pc
->num_values
; ++j
) {
688 if (!(b
->live_set
[j
/ 32] & (1 << (j
% 32))))
690 add_range(&ctx
->pc
->values
[j
], b
, b
->exit
->serial
+ 1);
691 #ifdef NVC0_RA_DEBUG_LIVEI
692 debug_printf("adding range for live value %i: ", j
);
693 livei_print(&ctx
->pc
->values
[j
]);
698 i_stop
= b
->entry
? b
->entry
->prev
: NULL
;
700 /* don't have to include phi functions here (will have 0 live range) */
701 for (i
= b
->exit
; i
!= i_stop
; i
= i
->prev
) {
702 assert(i
->serial
>= b
->phi
->serial
&& i
->serial
<= b
->exit
->serial
);
703 for (j
= 0; j
< 4 && i
->def
[j
]; ++j
)
704 live_set_rem(b
, i
->def
[j
]);
706 for (j
= 0; j
< 6 && i
->src
[j
]; ++j
) {
707 if (!live_set_test(b
, i
->src
[j
])) {
708 live_set_add(b
, i
->src
[j
]->value
);
709 add_range(i
->src
[j
]->value
, b
, i
->serial
);
710 #ifdef NVC0_RA_DEBUG_LIVEI
711 debug_printf("adding range for source %i (ends living): ",
712 i
->src
[j
]->value
->n
);
713 livei_print(i
->src
[j
]->value
);
719 b
->pass_seq
= ctx
->pc
->pass_seq
;
721 if (b
->out
[0] && b
->out
[0]->pass_seq
< ctx
->pc
->pass_seq
)
722 pass_build_intervals(ctx
, b
->out
[0]);
724 if (b
->out
[1] && b
->out
[1]->pass_seq
< ctx
->pc
->pass_seq
)
725 pass_build_intervals(ctx
, b
->out
[1]);
731 nvc0_ctor_register_set(struct nv_pc
*pc
, struct register_set
*set
)
733 memset(set
, 0, sizeof(*set
));
735 set
->last
[NV_FILE_GPR
] = 62;
736 set
->last
[NV_FILE_PRED
] = 6;
737 set
->last
[NV_FILE_COND
] = 1;
739 set
->log2_unit
[NV_FILE_GPR
] = 2;
740 set
->log2_unit
[NV_FILE_COND
] = 0;
741 set
->log2_unit
[NV_FILE_PRED
] = 0;
746 /* We allocate registers for all defs of a vector instruction at once.
747 * Since we'll encounter all of them in the allocation loop, do the allocation
748 * when we're at the one with the live range that starts latest.
751 is_best_representative(struct nv_value
*val
)
753 struct nv_instruction
*nvi
= val
->insn
;
755 for (i
= 0; i
< 4 && val
->insn
->def
[i
]; ++i
)
756 if (nvi
->def
[i
]->livei
&& nvi
->def
[i
]->livei
->bgn
> val
->livei
->bgn
)
762 insert_ordered_tail(struct nv_value
*list
, struct nv_value
*nval
)
764 struct nv_value
*elem
;
766 for (elem
= list
->prev
;
767 elem
!= list
&& elem
->livei
->bgn
> nval
->livei
->bgn
;
769 /* now elem begins before or at the same time as val */
772 nval
->next
= elem
->next
;
773 elem
->next
->prev
= nval
;
778 pass_linear_scan(struct nv_pc_pass
*ctx
, int iter
)
780 struct nv_instruction
*i
;
781 struct register_set f
, free
;
783 struct nv_value
*cur
, *val
, *tmp
[2];
784 struct nv_value active
, inactive
, handled
, unhandled
;
786 make_empty_list(&active
);
787 make_empty_list(&inactive
);
788 make_empty_list(&handled
);
789 make_empty_list(&unhandled
);
791 nvc0_ctor_register_set(ctx
->pc
, &free
);
793 /* joined values should have range = NULL and thus not be added;
794 * also, fixed memory values won't be added because they're not
797 for (n
= 0; n
< ctx
->num_insns
; ++n
) {
800 for (k
= 0; k
< 5; ++k
) {
801 if (i
->def
[k
] && i
->def
[k
]->livei
)
802 insert_ordered_tail(&unhandled
, i
->def
[k
]);
805 debug_printf("skipping def'd value %i: no livei\n", i
->def
[k
]->n
);
809 for (val
= unhandled
.next
; val
!= unhandled
.prev
; val
= val
->next
) {
810 assert(val
->join
== val
);
811 assert(val
->livei
->bgn
<= val
->next
->livei
->bgn
);
814 foreach_s(cur
, tmp
[0], &unhandled
) {
815 remove_from_list(cur
);
817 foreach_s(val
, tmp
[1], &active
) {
818 if (livei_end(val
) <= cur
->livei
->bgn
) {
819 reg_release(&free
, val
);
820 move_to_head(&handled
, val
);
822 if (!livei_contains(val
, cur
->livei
->bgn
)) {
823 reg_release(&free
, val
);
824 move_to_head(&inactive
, val
);
828 foreach_s(val
, tmp
[1], &inactive
) {
829 if (livei_end(val
) <= cur
->livei
->bgn
)
830 move_to_head(&handled
, val
);
832 if (livei_contains(val
, cur
->livei
->bgn
)) {
833 reg_occupy(&free
, val
);
834 move_to_head(&active
, val
);
840 foreach(val
, &inactive
)
841 if (livei_have_overlap(val
, cur
))
844 foreach(val
, &unhandled
)
845 if (val
->reg
.id
>= 0 && livei_have_overlap(val
, cur
))
848 if (cur
->reg
.id
< 0) {
850 int v
= nvi_vector_size(cur
->insn
);
853 if (is_best_representative(cur
))
854 mem
= !reg_assign(&f
, &cur
->insn
->def
[0], v
);
857 mem
= !reg_assign(&f
, &cur
, 1);
861 NOUVEAU_ERR("out of registers\n");
865 insert_at_head(&active
, cur
);
866 reg_occupy(&free
, cur
);
873 nv_pc_pass1(struct nv_pc
*pc
, struct nv_basic_block
*root
)
875 struct nv_pc_pass
*ctx
;
878 NOUVEAU_DBG("REGISTER ALLOCATION - entering\n");
880 ctx
= CALLOC_STRUCT(nv_pc_pass
);
885 ctx
->insns
= CALLOC(NV_PC_MAX_INSTRUCTIONS
, sizeof(struct nv_instruction
*));
892 ret
= pass_generate_phi_movs(ctx
, root
);
895 #ifdef NVC0_RA_DEBUG_LIVEI
896 nvc0_print_function(root
);
899 for (i
= 0; i
< pc
->loop_nesting_bound
; ++i
) {
901 ret
= pass_build_live_sets(ctx
, root
);
902 assert(!ret
&& "live sets");
904 NOUVEAU_ERR("failed to build live sets (iteration %d)\n", i
);
910 nvc0_pc_pass_in_order(root
, pass_order_instructions
, ctx
);
913 ret
= pass_build_intervals(ctx
, root
);
914 assert(!ret
&& "build intervals");
916 NOUVEAU_ERR("failed to build live intervals\n");
920 #ifdef NVC0_RA_DEBUG_LIVEI
921 for (i
= 0; i
< pc
->num_values
; ++i
)
922 livei_print(&pc
->values
[i
]);
925 ret
= pass_join_values(ctx
, 0);
928 ret
= pass_linear_scan(ctx
, 0);
931 ret
= pass_join_values(ctx
, 1);
934 ret
= pass_join_values(ctx
, 2);
937 ret
= pass_linear_scan(ctx
, 1);
941 for (i
= 0; i
< pc
->num_values
; ++i
)
942 livei_release(&pc
->values
[i
]);
944 NOUVEAU_DBG("REGISTER ALLOCATION - leaving\n");
953 nvc0_pc_exec_pass1(struct nv_pc
*pc
)
957 for (i
= 0; i
< pc
->num_subroutines
+ 1; ++i
)
958 if (pc
->root
[i
] && (ret
= nv_pc_pass1(pc
, pc
->root
[i
])))