2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
30 #define RA_DUMP(q) do { q } while (0)
40 #include "sb_shader.h"
49 typedef uint32_t basetype
;
50 static const unsigned bt_bytes
= sizeof(basetype
);
51 static const unsigned bt_index_shift
= 5;
52 static const unsigned bt_index_mask
= (1u << bt_index_shift
) - 1;
53 static const unsigned bt_bits
= bt_bytes
<< 3;
54 static const unsigned size
= MAX_GPR
* 4 / bt_bits
;
62 regbits(unsigned num_temps
) : dta(), num_temps(num_temps
) {}
63 regbits(unsigned num_temps
, unsigned value
) : num_temps(num_temps
)
66 regbits(shader
&sh
, val_set
&vs
) : num_temps(sh
.get_ctx().alu_temp_gprs
)
67 { set_all(1); from_val_set(sh
, vs
); }
69 void set_all(unsigned val
);
70 void from_val_set(shader
&sh
, val_set
&vs
);
72 void set(unsigned index
);
73 void clear(unsigned index
);
74 bool get(unsigned index
);
76 void set(unsigned index
, unsigned val
);
78 sel_chan
find_free_bit();
79 sel_chan
find_free_chans(unsigned mask
);
80 sel_chan
find_free_array(unsigned size
, unsigned mask
);
85 // =======================================
87 void regbits::dump() {
88 for (unsigned i
= 0; i
< size
* bt_bits
; ++i
) {
94 cerr
<< " " << std::setw(3) << (i
/ 4) << " ";
96 cerr
<< (get(i
) ? 1 : 0);
101 void regbits::set_all(unsigned v
) {
102 memset(&dta
, v
? 0xFF : 0x00, size
* bt_bytes
);
105 void regbits::from_val_set(shader
&sh
, val_set
& vs
) {
108 for (val_set::iterator I
= s
.begin(sh
), E
= s
.end(sh
); I
!= E
; ++I
) {
110 if (v
->is_any_gpr()) {
111 g
= v
->get_final_gpr();
124 void regbits::set(unsigned index
) {
125 unsigned ih
= index
>> bt_index_shift
;
126 unsigned il
= index
& bt_index_mask
;
127 dta
[ih
] |= ((basetype
)1u << il
);
130 void regbits::clear(unsigned index
) {
131 unsigned ih
= index
>> bt_index_shift
;
132 unsigned il
= index
& bt_index_mask
;
134 dta
[ih
] &= ~((basetype
)1u << il
);
137 bool regbits::get(unsigned index
) {
138 unsigned ih
= index
>> bt_index_shift
;
139 unsigned il
= index
& bt_index_mask
;
140 return dta
[ih
] & ((basetype
)1u << il
);
143 void regbits::set(unsigned index
, unsigned val
) {
144 unsigned ih
= index
>> bt_index_shift
;
145 unsigned il
= index
& bt_index_mask
;
146 basetype bm
= 1u << il
;
147 dta
[ih
] = (dta
[ih
] & ~bm
) | (val
<< il
);
150 // free register for ra means the bit is set
151 sel_chan
regbits::find_free_bit() {
155 while (elt
< size
&& !dta
[elt
])
161 bit
= __builtin_ctz(dta
[elt
]) + (elt
<< bt_index_shift
);
163 assert(bit
< MAX_GPR
- num_temps
);
168 // find free gpr component to use as indirectly addressable array
169 sel_chan
regbits::find_free_array(unsigned length
, unsigned mask
) {
172 // FIXME optimize this. though hopefully we won't have a lot of arrays
173 for (unsigned a
= 0; a
< MAX_GPR
- num_temps
; ++a
) {
174 for(unsigned c
= 0; c
< MAX_CHAN
; ++c
) {
175 if (mask
& (1 << c
)) {
176 if (get((a
<< 2) | c
)) {
177 if (++cc
[c
] == length
)
178 return sel_chan(a
- length
+ 1, c
);
188 sel_chan
regbits::find_free_chans(unsigned mask
) {
192 basetype cd
= dta
[elt
] >> bit
;
205 unsigned p
= __builtin_ctz(cd
) & ~(basetype
)3u;
207 if (p
> bt_bits
- bit
) {
218 if ((cd
& mask
) == mask
) {
219 return ((elt
<< bt_index_shift
) | bit
) + 1;
230 // ================================
232 void ra_init::alloc_arrays() {
234 gpr_array_vec
&ga
= sh
.arrays();
236 for(gpr_array_vec::iterator I
= ga
.begin(), E
= ga
.end(); I
!= E
; ++I
) {
240 cerr
<< "array [" << a
->array_size
<< "] at " << a
->base_gpr
<< "\n";
244 // skip preallocated arrays (e.g. with preloaded inputs)
246 RA_DUMP( cerr
<< " FIXED at " << a
->gpr
<< "\n"; );
250 bool dead
= a
->is_dead();
253 RA_DUMP( cerr
<< " DEAD\n"; );
257 val_set
&s
= a
->interferences
;
260 for (val_set::iterator I
= s
.begin(sh
), E
= s
.end(sh
); I
!= E
; ++I
) {
268 dump::dump_set(sh
, s
);
274 sel_chan base
= rb
.find_free_array(a
->array_size
,
275 (1 << a
->base_gpr
.chan()));
277 RA_DUMP( cerr
<< " found base: " << base
<< "\n"; );
292 void ra_init::ra_node(container_node
* c
) {
294 for (node_iterator I
= c
->begin(), E
= c
->end(); I
!= E
; ++I
) {
296 if (n
->type
== NT_OP
) {
299 if (n
->is_container() && !n
->is_alu_packed()) {
300 ra_node(static_cast<container_node
*>(n
));
305 void ra_init::process_op(node
* n
) {
307 bool copy
= n
->is_copy_mov();
310 cerr
<< "ra_init: process_op : ";
315 if (n
->is_alu_packed()) {
316 for (vvec::iterator I
= n
->src
.begin(), E
= n
->src
.end(); I
!= E
; ++I
) {
318 if (v
&& v
->is_sgpr() && v
->constraint
&&
319 v
->constraint
->kind
== CK_PACKED_BS
) {
320 color_bs_constraint(v
->constraint
);
326 if (n
->is_fetch_inst() || n
->is_cf_inst()) {
327 for (vvec::iterator I
= n
->src
.begin(), E
= n
->src
.end(); I
!= E
; ++I
) {
329 if (v
&& v
->is_sgpr())
334 for (vvec::iterator I
= n
->dst
.begin(), E
= n
->dst
.end(); I
!= E
; ++I
) {
340 if (copy
&& !v
->constraint
) {
341 value
*s
= *(n
->src
.begin() + (I
- n
->dst
.begin()));
344 assign_color(v
, s
->gpr
);
353 void ra_init::color_bs_constraint(ra_constraint
* c
) {
354 vvec
&vv
= c
->values
;
355 assert(vv
.size() <= 8);
358 cerr
<< "color_bs_constraint: ";
363 regbits
rb(ctx
.alu_temp_gprs
);
365 unsigned chan_count
[4] = {};
366 unsigned allowed_chans
= 0x0F;
368 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
370 sel_chan gpr
= v
->get_final_gpr();
372 if (!v
|| v
->is_dead())
378 sh
.coal
.get_chunk_interferences(v
->chunk
, interf
);
380 interf
= v
->interferences
;
383 cerr
<< " processing " << *v
<< " interferences : ";
384 dump::dump_set(sh
, interf
);
389 unsigned chan
= gpr
.chan();
390 if (chan_count
[chan
] < 3) {
394 v
->flags
&= ~VLF_FIXED
;
395 allowed_chans
&= ~(1 << chan
);
396 assert(allowed_chans
);
406 rb
.from_val_set(sh
, interf
);
409 cerr
<< " regbits : ";
414 while (allowed_chans
&& gpr
.sel() < sh
.num_nontemp_gpr()) {
416 while (rb
.get(gpr
- 1) == 0)
420 cerr
<< " trying " << gpr
<< "\n";
423 unsigned chan
= gpr
.chan();
424 if (chan_count
[chan
] < 3) {
428 vvec::iterator F
= std::find(v
->chunk
->values
.begin(),
429 v
->chunk
->values
.end(),
431 v
->chunk
->values
.erase(F
);
435 assign_color(v
, gpr
);
438 allowed_chans
&= ~(1 << chan
);
444 cerr
<< "color_bs_constraint: failed...\n";
445 assert(!"coloring failed");
450 void ra_init::color(value
* v
) {
452 if (v
->constraint
&& v
->constraint
->kind
== CK_PACKED_BS
) {
453 color_bs_constraint(v
->constraint
);
457 if (v
->chunk
&& v
->chunk
->is_fixed())
463 cerr
<< " interferences ";
464 dump::dump_set(sh
, v
->interferences
);
468 if (v
->is_reg_pinned()) {
469 assert(v
->is_chan_pinned());
470 assign_color(v
, v
->pin_gpr
);
474 regbits
rb(sh
, v
->interferences
);
477 if (v
->is_chan_pinned()) {
478 RA_DUMP( cerr
<< "chan_pinned = " << v
->pin_gpr
.chan() << " "; );
479 unsigned mask
= 1 << v
->pin_gpr
.chan();
480 c
= rb
.find_free_chans(mask
) + v
->pin_gpr
.chan();
482 c
= rb
.find_free_bit();
485 assert(c
&& c
.sel() < 128 - ctx
.alu_temp_gprs
&& "color failed");
489 void ra_init::assign_color(value
* v
, sel_chan c
) {
494 cerr
<< " to " << c
<< "\n";
498 // ===================================================
500 int ra_split::run() {
505 void ra_split::split_phi_src(container_node
*loc
, container_node
*c
,
506 unsigned id
, bool loop
) {
507 for (node_iterator I
= c
->begin(), E
= c
->end(); I
!= E
; ++I
) {
509 value
* &v
= p
->src
[id
], *d
= p
->dst
[0];
512 if (!d
->is_sgpr() || v
->is_undef())
515 value
*t
= sh
.create_temp_value();
517 loc
->insert_before(sh
.create_copy_mov(t
, v
));
519 loc
->push_back(sh
.create_copy_mov(t
, v
));
522 sh
.coal
.add_edge(v
, d
, coalescer::phi_cost
);
526 void ra_split::split_phi_dst(node
* loc
, container_node
*c
, bool loop
) {
527 for (node_iterator I
= c
->begin(), E
= c
->end(); I
!= E
; ++I
) {
529 value
* &v
= p
->dst
[0];
535 value
*t
= sh
.create_temp_value();
536 node
*cp
= sh
.create_copy_mov(v
, t
);
538 static_cast<container_node
*>(loc
)->push_front(cp
);
540 loc
->insert_after(cp
);
546 void ra_split::init_phi_constraints(container_node
*c
) {
547 for (node_iterator I
= c
->begin(), E
= c
->end(); I
!= E
; ++I
) {
549 ra_constraint
*cc
= sh
.coal
.create_constraint(CK_PHI
);
550 cc
->values
.push_back(p
->dst
[0]);
552 for (vvec::iterator I
= p
->src
.begin(), E
= p
->src
.end(); I
!= E
; ++I
) {
555 cc
->values
.push_back(v
);
562 void ra_split::split(container_node
* n
) {
564 if (n
->type
== NT_DEPART
) {
565 depart_node
*d
= static_cast<depart_node
*>(n
);
567 split_phi_src(d
, d
->target
->phi
, d
->dep_id
, false);
568 } else if (n
->type
== NT_REPEAT
) {
569 repeat_node
*r
= static_cast<repeat_node
*>(n
);
570 if (r
->target
->loop_phi
)
571 split_phi_src(r
, r
->target
->loop_phi
, r
->rep_id
, true);
572 } else if (n
->type
== NT_REGION
) {
573 region_node
*r
= static_cast<region_node
*>(n
);
575 split_phi_dst(r
, r
->phi
, false);
578 split_phi_dst(r
->get_entry_code_location(), r
->loop_phi
,
580 split_phi_src(r
, r
->loop_phi
, 0, true);
584 for (node_riterator N
, I
= n
->rbegin(), E
= n
->rend(); I
!= E
; I
= N
) {
588 if (o
->type
== NT_OP
) {
590 } else if (o
->is_container()) {
591 split(static_cast<container_node
*>(o
));
595 if (n
->type
== NT_REGION
) {
596 region_node
*r
= static_cast<region_node
*>(n
);
598 init_phi_constraints(r
->phi
);
600 init_phi_constraints(r
->loop_phi
);
604 void ra_split::split_op(node
* n
) {
606 case NST_ALU_PACKED_INST
:
607 split_alu_packed(static_cast<alu_packed_node
*>(n
));
611 split_vector_inst(n
);
617 void ra_split::split_packed_ins(alu_packed_node
*n
) {
621 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
625 if (v
&& v
->is_any_gpr() && !v
->is_undef()) {
627 vvec::iterator F
= std::find(sv
.begin(), sv
.end(), v
);
631 t
= *(dv
.begin() + (F
- sv
.begin()));
633 t
= sh
.create_temp_value();
641 unsigned cnt
= sv
.size();
645 for (vvec::iterator SI
= sv
.begin(), DI
= dv
.begin(), SE
= sv
.end();
646 SI
!= SE
; ++SI
, ++DI
) {
647 n
->insert_before(sh
.create_copy_mov(*DI
, *SI
));
650 ra_constraint
*c
= sh
.coal
.create_constraint(CK_PACKED_BS
);
656 // TODO handle other packed ops for cayman
657 void ra_split::split_alu_packed(alu_packed_node
* n
) {
668 void ra_split::split_vec(vvec
&vv
, vvec
&v1
, vvec
&v2
, bool allow_swz
) {
670 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
, ++ch
) {
676 assert(!o
->is_dead());
681 if (allow_swz
&& o
->is_float_0_or_1())
686 allow_swz
? find(v2
.begin(), v2
.end(), o
) : v2
.end();
689 t
= *(v1
.begin() + (F
- v2
.begin()));
691 t
= sh
.create_temp_value();
694 t
->flags
|= VLF_PIN_CHAN
;
695 t
->pin_gpr
= sel_chan(0, ch
);
706 void ra_split::split_vector_inst(node
* n
) {
709 bool call_fs
= n
->is_cf_op(CF_OP_CALL_FS
);
710 bool no_src_swizzle
= n
->is_cf_inst() && (n
->cf_op_flags() & CF_MEM
);
712 no_src_swizzle
|= n
->is_fetch_op(FETCH_OP_VFETCH
) ||
713 n
->is_fetch_op(FETCH_OP_SEMFETCH
);
715 if (!n
->src
.empty() && !call_fs
) {
717 // we may have more than one source vector -
718 // fetch instructions with FF_USEGRAD have gradient values in
719 // src vectors 1 (src[4-7] and 2 (src[8-11])
721 unsigned nvec
= n
->src
.size() >> 2;
722 assert(nvec
<< 2 == n
->src
.size());
724 for (unsigned nv
= 0; nv
< nvec
; ++nv
) {
725 vvec sv
, tv
, nsrc(4);
726 unsigned arg_start
= nv
<< 2;
728 std::copy(n
->src
.begin() + arg_start
,
729 n
->src
.begin() + arg_start
+ 4,
732 split_vec(nsrc
, tv
, sv
, !no_src_swizzle
);
734 unsigned cnt
= sv
.size();
736 if (no_src_swizzle
|| cnt
) {
738 std::copy(nsrc
.begin(), nsrc
.end(), n
->src
.begin() + arg_start
);
740 for(unsigned i
= 0, s
= tv
.size(); i
< s
; ++i
) {
741 n
->insert_before(sh
.create_copy_mov(tv
[i
], sv
[i
]));
744 c
= sh
.coal
.create_constraint(CK_SAME_REG
);
751 if (!n
->dst
.empty()) {
752 vvec sv
, tv
, ndst
= n
->dst
;
754 split_vec(ndst
, tv
, sv
, true);
760 for(unsigned i
= 0, s
= tv
.size(); i
< s
; ++i
) {
761 lp
->insert_after(sh
.create_copy_mov(sv
[i
], tv
[i
]));
766 for (unsigned i
= 0, cnt
= tv
.size(); i
< cnt
; ++i
) {
772 v
->flags
|= VLF_PIN_REG
| VLF_PIN_CHAN
;
773 s
->flags
&= ~(VLF_PIN_REG
| VLF_PIN_CHAN
);
777 assert(s
->rel
->is_const());
778 sel
= sel_chan(s
->select
.sel() +
779 s
->rel
->get_const_value().u
,
784 v
->gpr
= v
->pin_gpr
= sel
;
788 c
= sh
.coal
.create_constraint(CK_SAME_REG
);
796 } // namespace r600_sb