2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
30 #define RA_DUMP(q) do { q } while (0)
35 #include "sb_shader.h"
42 int ra_coalesce::run() {
46 void coalescer::add_edge(value
* a
, value
* b
, unsigned cost
) {
47 assert(a
->is_sgpr() && b
->is_sgpr());
48 edges
.insert(new ra_edge(a
,b
, cost
));
51 void coalescer::create_chunk(value
*v
) {
55 ra_chunk
*c
= new ra_chunk();
57 c
->values
.push_back(v
);
59 if (v
->is_chan_pinned())
60 c
->flags
|= RCF_PIN_CHAN
;
61 if (v
->is_reg_pinned()) {
62 c
->flags
|= RCF_PIN_REG
;
68 cerr
<< "create_chunk: ";
72 all_chunks
.push_back(c
);
77 void coalescer::unify_chunks(ra_edge
*e
) {
78 ra_chunk
*c1
= e
->a
->chunk
, *c2
= e
->b
->chunk
;
81 cerr
<< "unify_chunks: ";
86 if (c2
->is_chan_pinned() && !c1
->is_chan_pinned()) {
87 c1
->flags
|= RCF_PIN_CHAN
;
88 c1
->pin
= sel_chan(c1
->pin
.sel(), c2
->pin
.chan());
91 if (c2
->is_reg_pinned() && !c1
->is_reg_pinned()) {
92 c1
->flags
|= RCF_PIN_REG
;
93 c1
->pin
= sel_chan(c2
->pin
.sel(), c1
->pin
.chan());
96 c1
->values
.reserve(c1
->values
.size() + c2
->values
.size());
98 for (vvec::iterator I
= c2
->values
.begin(), E
= c2
->values
.end(); I
!= E
;
101 c1
->values
.push_back(*I
);
104 chunk_vec::iterator F
= std::find(all_chunks
.begin(), all_chunks
.end(), c2
);
105 assert(F
!= all_chunks
.end());
109 c1
->cost
+= c2
->cost
+ e
->cost
;
113 bool coalescer::chunks_interference(ra_chunk
*c1
, ra_chunk
*c2
) {
114 unsigned pin_flags
= (c1
->flags
& c2
->flags
) &
115 (RCF_PIN_CHAN
| RCF_PIN_REG
);
117 if ((pin_flags
& RCF_PIN_CHAN
) &&
118 c1
->pin
.chan() != c2
->pin
.chan())
121 if ((pin_flags
& RCF_PIN_REG
) &&
122 c1
->pin
.sel() != c2
->pin
.sel())
125 for (vvec::iterator I
= c1
->values
.begin(), E
= c1
->values
.end(); I
!= E
;
129 for (vvec::iterator I
= c2
->values
.begin(), E
= c2
->values
.end(); I
!= E
;
133 if (!v1
->v_equal(v2
) && v1
->interferences
.contains(v2
))
140 void coalescer::build_chunks() {
142 for (edge_queue::iterator I
= edges
.begin(), E
= edges
.end();
153 ra_chunk
*c1
= e
->a
->chunk
, *c2
= e
->b
->chunk
;
157 } else if (!chunks_interference(c1
, c2
))
162 ra_constraint
* coalescer::create_constraint(constraint_kind kind
) {
163 ra_constraint
*c
= new ra_constraint(kind
);
164 all_constraints
.push_back(c
);
168 void coalescer::dump_edges() {
169 cerr
<< "######## affinity edges\n";
171 for (edge_queue::iterator I
= edges
.begin(), E
= edges
.end();
175 dump::dump_val(e
->a
);
177 dump::dump_val(e
->b
);
178 cerr
<< " cost = " << e
->cost
<< "\n";
182 void coalescer::dump_chunks() {
183 cerr
<< "######## chunks\n";
185 for (chunk_vec::iterator I
= all_chunks
.begin(), E
= all_chunks
.end();
193 void coalescer::dump_constraint_queue() {
194 cerr
<< "######## constraints\n";
196 for (constraint_queue::iterator I
= constraints
.begin(),
197 E
= constraints
.end(); I
!= E
; ++I
) {
198 ra_constraint
* c
= *I
;
203 void coalescer::dump_chunk(ra_chunk
* c
) {
204 cerr
<< " ra_chunk cost = " << c
->cost
<< " : ";
205 dump::dump_vec(c
->values
);
207 if (c
->flags
& RCF_PIN_REG
)
208 cerr
<< " REG = " << c
->pin
.sel();
210 if (c
->flags
& RCF_PIN_CHAN
)
211 cerr
<< " CHAN = " << c
->pin
.chan();
213 cerr
<< (c
->flags
& RCF_GLOBAL
? " GLOBAL" : "");
218 void coalescer::dump_constraint(ra_constraint
* c
) {
219 cerr
<< " ra_constraint: ";
221 case CK_PACKED_BS
: cerr
<< "PACKED_BS"; break;
222 case CK_PHI
: cerr
<< "PHI"; break;
223 case CK_SAME_REG
: cerr
<< "SAME_REG"; break;
224 default: cerr
<< "UNKNOWN_KIND"; assert(0); break;
227 cerr
<< " cost = " << c
->cost
<< " : ";
228 dump::dump_vec(c
->values
);
233 void coalescer::get_chunk_interferences(ra_chunk
*c
, val_set
&s
) {
235 for (vvec::iterator I
= c
->values
.begin(), E
= c
->values
.end(); I
!= E
;
238 s
.add_set(v
->interferences
);
240 s
.remove_vec(c
->values
);
243 void coalescer::build_chunk_queue() {
244 for (chunk_vec::iterator I
= all_chunks
.begin(),
245 E
= all_chunks
.end(); I
!= E
; ++I
) {
253 void coalescer::build_constraint_queue() {
254 for (constraint_vec::iterator I
= all_constraints
.begin(),
255 E
= all_constraints
.end(); I
!= E
; ++I
) {
256 ra_constraint
*c
= *I
;
259 if (c
->values
.empty() || !c
->values
.front()->is_sgpr())
262 if (c
->kind
!= CK_SAME_REG
)
265 for (vvec::iterator I
= c
->values
.begin(), E
= c
->values
.end();
271 cost
+= v
->chunk
->cost
;
274 constraints
.insert(c
);
278 void coalescer::color_chunks() {
280 for (chunk_queue::iterator I
= chunks
.begin(), E
= chunks
.end();
283 if (c
->is_fixed() || c
->values
.size() == 1)
289 get_chunk_interferences(c
, interf
);
292 cerr
<< "color_chunks: ";
294 cerr
<< "\n interferences: ";
295 dump::dump_set(sh
,interf
);
299 init_reg_bitset(rb
, interf
);
301 unsigned pass
= c
->is_reg_pinned() ? 0 : 1;
303 unsigned cs
= c
->is_chan_pinned() ? c
->pin
.chan() : 0;
304 unsigned ce
= c
->is_chan_pinned() ? cs
+ 1 : 4;
317 re
= sh
.num_nontemp_gpr();
320 for (unsigned reg
= rs
; reg
< re
; ++reg
) {
321 for (unsigned chan
= cs
; chan
< ce
; ++chan
) {
322 unsigned bit
= sel_chan(reg
, chan
);
323 if (bit
>= rb
.size() || !rb
.get(bit
)) {
339 color_chunk(c
, color
);
343 void coalescer::init_reg_bitset(sb_bitset
&bs
, val_set
&vs
) {
345 for (val_set::iterator I
= vs
.begin(sh
), E
= vs
.end(sh
); I
!= E
; ++I
) {
348 if (!v
->is_any_gpr())
351 unsigned gpr
= v
->get_final_gpr();
356 if (gpr
>= bs
.size())
363 void coalescer::color_chunk(ra_chunk
*c
, sel_chan color
) {
367 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
;
371 if (v
->is_reg_pinned() && v
->pin_gpr
.sel() != color
.sel()) {
376 if (v
->is_chan_pinned() && v
->pin_gpr
.chan() != color
.chan()) {
383 if (v
->constraint
&& v
->constraint
->kind
== CK_PHI
)
388 cerr
<< " assigned " << color
<< " to ";
396 if (c
->is_reg_pinned()) {
401 coalescer::~coalescer() {
403 // FIXME use pool allocator ??
405 for (constraint_vec::iterator I
= all_constraints
.begin(),
406 E
= all_constraints
.end(); I
!= E
; ++I
) {
410 for (chunk_vec::iterator I
= all_chunks
.begin(),
411 E
= all_chunks
.end(); I
!= E
; ++I
) {
415 for (edge_queue::iterator I
= edges
.begin(), E
= edges
.end();
421 int coalescer::run() {
424 RA_DUMP( dump_edges(); );
427 RA_DUMP( dump_chunks(); );
429 build_constraint_queue();
430 RA_DUMP( dump_constraint_queue(); );
432 if ((r
= color_constraints()))
441 void coalescer::color_phi_constraint(ra_constraint
* c
) {
444 ra_chunk
* coalescer::detach_value(value
*v
) {
446 vvec::iterator F
= std::find(v
->chunk
->values
.begin(),
447 v
->chunk
->values
.end(), v
);
449 assert(F
!= v
->chunk
->values
.end());
450 v
->chunk
->values
.erase(F
);
453 if (v
->is_reg_pinned()) {
458 cerr
<< " detached : ";
459 dump_chunk(v
->chunk
);
466 int coalescer::color_reg_constraint(ra_constraint
*c
) {
467 unsigned k
, cnt
= c
->values
.size();
468 vvec
& cv
= c
->values
;
471 unsigned swz
[4] = {0, 1, 2, 3};
475 bool reg_pinned
= false;
476 unsigned pin_reg
= ~0;
478 unsigned chan_mask
= 0;
481 for (vvec::iterator I
= cv
.begin(), E
= cv
.end(); I
!= E
; ++I
, ++k
) {
489 if (v
->chunk
->is_chan_pinned()) {
490 unsigned chan
= 1 << v
->chunk
->pin
.chan();
492 if (chan
& chan_mask
) { // channel already in use
493 ch
[k
] = detach_value(v
);
494 assert(!ch
[k
]->is_chan_pinned());
500 if (v
->chunk
->is_reg_pinned()) {
503 pin_reg
= v
->chunk
->pin
.sel();
507 get_chunk_interferences(ch
[k
], interf
[k
]);
508 init_reg_bitset(rb
[k
], interf
[k
]);
511 unsigned start_reg
, end_reg
;
514 end_reg
= sh
.num_nontemp_gpr();
516 unsigned min_reg
= end_reg
;
518 unsigned i
, pass
= reg_pinned
? 0 : 1;
536 // cycle on swizzle combinations
538 for (i
= 0; i
< cnt
; ++i
) {
539 if (ch
[i
]->flags
& RCF_PIN_CHAN
)
540 if (ch
[i
]->pin
.chan() != swz
[i
])
546 // looking for minimal reg number such that the constrained chunks
547 // may be colored with the current swizzle combination
548 for (unsigned reg
= rs
; reg
< min_reg
; ++reg
) {
549 for (i
= 0; i
< cnt
; ++i
) {
550 unsigned bit
= sel_chan(reg
, swz
[i
]);
551 if (bit
< rb
[i
].size() && rb
[i
].get(bit
))
557 std::copy(swz
, swz
+ 4, min_swz
);
562 if (pass
== 0 && done
)
565 } while (std::next_permutation(swz
, swz
+ 4));
568 cerr
<< "sb: ra_coalesce - out of registers\n";
572 if (pass
== 0 && done
)
581 cerr
<< "min reg = " << min_reg
<< " min_swz = "
582 << min_swz
[0] << min_swz
[1] << min_swz
[2] << min_swz
[3] << "\n";
585 for (i
= 0; i
< cnt
; ++i
) {
586 sel_chan
color(min_reg
, min_swz
[i
]);
587 ra_chunk
*cc
= ch
[i
];
589 if (cc
->is_fixed()) {
590 if (cc
->pin
!= color
)
591 cc
= detach_value(cv
[i
]);
596 color_chunk(cc
, color
);
603 int coalescer::color_constraints() {
606 for (constraint_queue::iterator I
= constraints
.begin(),
607 E
= constraints
.end(); I
!= E
; ++I
) {
609 ra_constraint
*c
= *I
;
612 cerr
<< "color_constraints: ";
616 if (c
->kind
== CK_SAME_REG
) {
617 if ((r
= color_reg_constraint(c
)))
619 } else if (c
->kind
== CK_PHI
)
620 color_phi_constraint(c
);
625 } // namespace r600_sb