2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
30 #define RA_DUMP(q) do { q } while (0)
35 #include "sb_shader.h"
40 int ra_coalesce::run() {
44 void coalescer::add_edge(value
* a
, value
* b
, unsigned cost
) {
45 assert(a
->is_sgpr() && b
->is_sgpr());
46 edges
.insert(new ra_edge(a
,b
, cost
));
49 void coalescer::create_chunk(value
*v
) {
53 ra_chunk
*c
= new ra_chunk();
55 c
->values
.push_back(v
);
57 if (v
->is_chan_pinned())
58 c
->flags
|= RCF_PIN_CHAN
;
59 if (v
->is_reg_pinned()) {
60 c
->flags
|= RCF_PIN_REG
;
66 sblog
<< "create_chunk: ";
70 all_chunks
.push_back(c
);
75 void coalescer::unify_chunks(ra_edge
*e
) {
76 ra_chunk
*c1
= e
->a
->chunk
, *c2
= e
->b
->chunk
;
79 sblog
<< "unify_chunks: ";
84 if (c2
->is_chan_pinned() && !c1
->is_chan_pinned()) {
85 c1
->flags
|= RCF_PIN_CHAN
;
86 c1
->pin
= sel_chan(c1
->pin
.sel(), c2
->pin
.chan());
89 if (c2
->is_reg_pinned() && !c1
->is_reg_pinned()) {
90 c1
->flags
|= RCF_PIN_REG
;
91 c1
->pin
= sel_chan(c2
->pin
.sel(), c1
->pin
.chan());
94 c1
->values
.reserve(c1
->values
.size() + c2
->values
.size());
96 for (vvec::iterator I
= c2
->values
.begin(), E
= c2
->values
.end(); I
!= E
;
99 c1
->values
.push_back(*I
);
102 chunk_vec::iterator F
= std::find(all_chunks
.begin(), all_chunks
.end(), c2
);
103 assert(F
!= all_chunks
.end());
107 c1
->cost
+= c2
->cost
+ e
->cost
;
111 bool coalescer::chunks_interference(ra_chunk
*c1
, ra_chunk
*c2
) {
112 unsigned pin_flags
= (c1
->flags
& c2
->flags
) &
113 (RCF_PIN_CHAN
| RCF_PIN_REG
);
115 if ((pin_flags
& RCF_PIN_CHAN
) &&
116 c1
->pin
.chan() != c2
->pin
.chan())
119 if ((pin_flags
& RCF_PIN_REG
) &&
120 c1
->pin
.sel() != c2
->pin
.sel())
123 for (vvec::iterator I
= c1
->values
.begin(), E
= c1
->values
.end(); I
!= E
;
127 for (vvec::iterator I
= c2
->values
.begin(), E
= c2
->values
.end(); I
!= E
;
131 if (!v1
->v_equal(v2
) && v1
->interferences
.contains(v2
))
138 void coalescer::build_chunks() {
140 for (edge_queue::iterator I
= edges
.begin(), E
= edges
.end();
151 ra_chunk
*c1
= e
->a
->chunk
, *c2
= e
->b
->chunk
;
155 } else if (!chunks_interference(c1
, c2
))
160 ra_constraint
* coalescer::create_constraint(constraint_kind kind
) {
161 ra_constraint
*c
= new ra_constraint(kind
);
162 all_constraints
.push_back(c
);
166 void coalescer::dump_edges() {
167 sblog
<< "######## affinity edges\n";
169 for (edge_queue::iterator I
= edges
.begin(), E
= edges
.end();
172 sblog
<< " ra_edge ";
173 dump::dump_val(e
->a
);
175 dump::dump_val(e
->b
);
176 sblog
<< " cost = " << e
->cost
<< "\n";
180 void coalescer::dump_chunks() {
181 sblog
<< "######## chunks\n";
183 for (chunk_vec::iterator I
= all_chunks
.begin(), E
= all_chunks
.end();
191 void coalescer::dump_constraint_queue() {
192 sblog
<< "######## constraints\n";
194 for (constraint_queue::iterator I
= constraints
.begin(),
195 E
= constraints
.end(); I
!= E
; ++I
) {
196 ra_constraint
* c
= *I
;
201 void coalescer::dump_chunk(ra_chunk
* c
) {
202 sblog
<< " ra_chunk cost = " << c
->cost
<< " : ";
203 dump::dump_vec(c
->values
);
205 if (c
->flags
& RCF_PIN_REG
)
206 sblog
<< " REG = " << c
->pin
.sel();
208 if (c
->flags
& RCF_PIN_CHAN
)
209 sblog
<< " CHAN = " << c
->pin
.chan();
211 sblog
<< (c
->flags
& RCF_GLOBAL
? " GLOBAL" : "");
216 void coalescer::dump_constraint(ra_constraint
* c
) {
217 sblog
<< " ra_constraint: ";
219 case CK_PACKED_BS
: sblog
<< "PACKED_BS"; break;
220 case CK_PHI
: sblog
<< "PHI"; break;
221 case CK_SAME_REG
: sblog
<< "SAME_REG"; break;
222 default: sblog
<< "UNKNOWN_KIND"; assert(0); break;
225 sblog
<< " cost = " << c
->cost
<< " : ";
226 dump::dump_vec(c
->values
);
231 void coalescer::get_chunk_interferences(ra_chunk
*c
, val_set
&s
) {
233 for (vvec::iterator I
= c
->values
.begin(), E
= c
->values
.end(); I
!= E
;
236 s
.add_set(v
->interferences
);
238 s
.remove_vec(c
->values
);
241 void coalescer::build_chunk_queue() {
242 for (chunk_vec::iterator I
= all_chunks
.begin(),
243 E
= all_chunks
.end(); I
!= E
; ++I
) {
251 void coalescer::build_constraint_queue() {
252 for (constraint_vec::iterator I
= all_constraints
.begin(),
253 E
= all_constraints
.end(); I
!= E
; ++I
) {
254 ra_constraint
*c
= *I
;
257 if (c
->values
.empty() || !c
->values
.front()->is_sgpr())
260 if (c
->kind
!= CK_SAME_REG
)
263 for (vvec::iterator I
= c
->values
.begin(), E
= c
->values
.end();
269 cost
+= v
->chunk
->cost
;
272 constraints
.insert(c
);
276 void coalescer::color_chunks() {
278 for (chunk_queue::iterator I
= chunks
.begin(), E
= chunks
.end();
281 if (c
->is_fixed() || c
->values
.size() == 1)
287 get_chunk_interferences(c
, interf
);
290 sblog
<< "color_chunks: ";
292 sblog
<< "\n interferences: ";
293 dump::dump_set(sh
,interf
);
297 init_reg_bitset(rb
, interf
);
299 unsigned pass
= c
->is_reg_pinned() ? 0 : 1;
301 unsigned cs
= c
->is_chan_pinned() ? c
->pin
.chan() : 0;
302 unsigned ce
= c
->is_chan_pinned() ? cs
+ 1 : 4;
315 re
= sh
.num_nontemp_gpr();
318 for (unsigned reg
= rs
; reg
< re
; ++reg
) {
319 for (unsigned chan
= cs
; chan
< ce
; ++chan
) {
320 unsigned bit
= sel_chan(reg
, chan
);
321 if (bit
>= rb
.size() || !rb
.get(bit
)) {
337 color_chunk(c
, color
);
341 void coalescer::init_reg_bitset(sb_bitset
&bs
, val_set
&vs
) {
343 for (val_set::iterator I
= vs
.begin(sh
), E
= vs
.end(sh
); I
!= E
; ++I
) {
346 if (!v
->is_any_gpr())
349 unsigned gpr
= v
->get_final_gpr();
354 if (gpr
>= bs
.size())
361 void coalescer::color_chunk(ra_chunk
*c
, sel_chan color
) {
365 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
;
369 if (v
->is_reg_pinned() && v
->pin_gpr
.sel() != color
.sel()) {
374 if (v
->is_chan_pinned() && v
->pin_gpr
.chan() != color
.chan()) {
381 if (v
->constraint
&& v
->constraint
->kind
== CK_PHI
)
386 sblog
<< " assigned " << color
<< " to ";
394 if (c
->is_reg_pinned()) {
399 coalescer::~coalescer() {
401 // FIXME use pool allocator ??
403 for (constraint_vec::iterator I
= all_constraints
.begin(),
404 E
= all_constraints
.end(); I
!= E
; ++I
) {
408 for (chunk_vec::iterator I
= all_chunks
.begin(),
409 E
= all_chunks
.end(); I
!= E
; ++I
) {
413 for (edge_queue::iterator I
= edges
.begin(), E
= edges
.end();
419 int coalescer::run() {
422 RA_DUMP( dump_edges(); );
425 RA_DUMP( dump_chunks(); );
427 build_constraint_queue();
428 RA_DUMP( dump_constraint_queue(); );
430 if ((r
= color_constraints()))
439 void coalescer::color_phi_constraint(ra_constraint
* c
) {
442 ra_chunk
* coalescer::detach_value(value
*v
) {
444 vvec::iterator F
= std::find(v
->chunk
->values
.begin(),
445 v
->chunk
->values
.end(), v
);
447 assert(F
!= v
->chunk
->values
.end());
448 v
->chunk
->values
.erase(F
);
451 if (v
->is_reg_pinned()) {
456 sblog
<< " detached : ";
457 dump_chunk(v
->chunk
);
464 int coalescer::color_reg_constraint(ra_constraint
*c
) {
465 unsigned k
, cnt
= c
->values
.size();
466 vvec
& cv
= c
->values
;
469 unsigned swz
[4] = {0, 1, 2, 3};
473 bool reg_pinned
= false;
474 unsigned pin_reg
= ~0;
476 unsigned chan_mask
= 0;
479 for (vvec::iterator I
= cv
.begin(), E
= cv
.end(); I
!= E
; ++I
, ++k
) {
487 if (v
->chunk
->is_chan_pinned()) {
488 unsigned chan
= 1 << v
->chunk
->pin
.chan();
490 if (chan
& chan_mask
) { // channel already in use
491 ch
[k
] = detach_value(v
);
492 assert(!ch
[k
]->is_chan_pinned());
498 if (v
->chunk
->is_reg_pinned()) {
501 pin_reg
= v
->chunk
->pin
.sel();
505 get_chunk_interferences(ch
[k
], interf
[k
]);
506 init_reg_bitset(rb
[k
], interf
[k
]);
509 unsigned start_reg
, end_reg
;
512 end_reg
= sh
.num_nontemp_gpr();
514 unsigned min_reg
= end_reg
;
516 unsigned i
, pass
= reg_pinned
? 0 : 1;
534 // cycle on swizzle combinations
536 for (i
= 0; i
< cnt
; ++i
) {
537 if (ch
[i
]->flags
& RCF_PIN_CHAN
)
538 if (ch
[i
]->pin
.chan() != swz
[i
])
544 // looking for minimal reg number such that the constrained chunks
545 // may be colored with the current swizzle combination
546 for (unsigned reg
= rs
; reg
< min_reg
; ++reg
) {
547 for (i
= 0; i
< cnt
; ++i
) {
548 unsigned bit
= sel_chan(reg
, swz
[i
]);
549 if (bit
< rb
[i
].size() && rb
[i
].get(bit
))
555 std::copy(swz
, swz
+ 4, min_swz
);
560 if (pass
== 0 && done
)
563 } while (std::next_permutation(swz
, swz
+ 4));
566 sblog
<< "sb: ra_coalesce - out of registers\n";
570 if (pass
== 0 && done
)
579 sblog
<< "min reg = " << min_reg
<< " min_swz = "
580 << min_swz
[0] << min_swz
[1] << min_swz
[2] << min_swz
[3] << "\n";
583 for (i
= 0; i
< cnt
; ++i
) {
584 sel_chan
color(min_reg
, min_swz
[i
]);
585 ra_chunk
*cc
= ch
[i
];
587 if (cc
->is_fixed()) {
588 if (cc
->pin
!= color
)
589 cc
= detach_value(cv
[i
]);
594 color_chunk(cc
, color
);
602 int coalescer::color_constraints() {
605 for (constraint_queue::iterator I
= constraints
.begin(),
606 E
= constraints
.end(); I
!= E
; ++I
) {
608 ra_constraint
*c
= *I
;
611 sblog
<< "color_constraints: ";
615 if (c
->kind
== CK_SAME_REG
) {
616 if ((r
= color_reg_constraint(c
)))
618 } else if (c
->kind
== CK_PHI
)
619 color_phi_constraint(c
);
624 } // namespace r600_sb