2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
30 #define VT_DUMP(q) do { q } while (0)
37 #include "sb_shader.h"
42 static const char * chans
= "xyzw01?_";
44 sb_ostream
& operator << (sb_ostream
&o
, value
&v
) {
46 bool dead
= v
.flags
& VLF_DEAD
;
52 case VLK_SPECIAL_REG
: {
53 switch (v
.select
.sel()) {
54 case SV_AR_INDEX
: o
<< "AR"; break;
55 case SV_ALU_PRED
: o
<< "PR"; break;
56 case SV_EXEC_MASK
: o
<< "EM"; break;
57 case SV_VALID_MASK
: o
<< "VM"; break;
58 case SV_GEOMETRY_EMIT
: o
<< "GEOMETRY_EMIT"; break;
59 default: o
<< "???specialreg"; break;
65 o
<< "R" << v
.select
.sel() << "."
66 << chans
[v
.select
.chan()];
70 o
<< "C" << v
.select
.sel() << "." << chans
[v
.select
.chan()];
74 o
<< v
.literal_value
.f
<< "|";
75 o
.print_zw_hex(v
.literal_value
.u
, 8);
78 o
<< "Param" << (v
.select
.sel() - ALU_SRC_PARAM_OFFSET
)
79 << chans
[v
.select
.chan()];
82 o
<< "t" << v
.select
.sel() - shader::temp_regid_offset
;
98 o
<< v
.kind
<< "?????";
103 o
<< "." << v
.version
;
124 o
<< "@R" << g
.sel() << "." << chans
[g
.chan()];
130 void value_table::add_value(value
* v
) {
137 sblog
<< "gvn add_value ";
141 value_hash hash
= v
->hash();
142 vt_item
& vti
= hashtable
[hash
& size_mask
];
146 if (v
->def
&& ex
.try_fold(v
)) {
148 sblog
<< " folded: ";
149 dump::dump_val(v
->gvn_source
);
156 for (vt_item::iterator I
= vti
.begin(), E
= vti
.end(); I
!= E
; ++I
, ++n
) {
162 if (expr_equal(c
, v
)) {
163 v
->gvn_source
= c
->gvn_source
;
166 sblog
<< " found : equal to ";
167 dump::dump_val(v
->gvn_source
);
176 sblog
<< " added new\n";
180 value_hash
value::hash() {
188 ghash
= ((uintptr_t)this) | 1;
193 value_hash
value::rel_hash() {
194 value_hash h
= rel
? rel
->hash() : 0;
200 bool value_table::expr_equal(value
* l
, value
* r
) {
201 return ex
.equal(l
, r
);
204 void value_table::get_values(vvec
& v
) {
207 vvec::iterator T
= v
.begin();
209 for(vt_table::iterator I
= hashtable
.begin(), E
= hashtable
.end();
211 T
= std::copy(I
->begin(), I
->end(), T
);
215 void value::add_use(node
* n
) {
218 dump::dump_val(this);
225 struct use_node_comp
{
226 explicit use_node_comp(const node
*n
) : n(n
) {}
227 bool operator() (const node
*o
) {
228 return o
->hash() == n
->hash();
235 void value::remove_use(const node
*n
) {
236 uselist::iterator it
=
237 std::find_if(uses
.begin(), uses
.end(), use_node_comp(n
));
239 if (it
!= uses
.end())
241 // We only ever had a pointer, so don't delete it here
246 unsigned value::use_count() {
250 bool value::is_global() {
252 return chunk
->is_global();
253 return flags
& VLF_GLOBAL
;
256 void value::set_global() {
263 void value::set_prealloc() {
265 flags
|= VLF_PREALLOC
;
267 chunk
->set_prealloc();
270 bool value::is_fixed() {
271 if (array
&& array
->gpr
)
273 if (chunk
&& chunk
->is_fixed())
275 return flags
& VLF_FIXED
;
284 bool value::is_prealloc() {
286 return chunk
->is_prealloc();
287 return flags
& VLF_PREALLOC
;
290 void value::delete_uses() {
291 // We only ever had pointers, so don't delete them here
292 uses
.erase(uses
.begin(), uses
.end());
295 void ra_constraint::update_values() {
296 for (vvec::iterator I
= values
.begin(), E
= values
.end(); I
!= E
; ++I
) {
297 assert(!(*I
)->constraint
);
298 (*I
)->constraint
= this;
302 void* sb_pool::allocate(unsigned sz
) {
303 sz
= (sz
+ SB_POOL_ALIGN
- 1) & ~(SB_POOL_ALIGN
- 1);
304 assert (sz
< (block_size
>> 6) && "too big allocation size for sb_pool");
306 unsigned offset
= total_size
% block_size
;
307 unsigned capacity
= block_size
* blocks
.size();
309 if (total_size
+ sz
> capacity
) {
310 total_size
= capacity
;
311 void * nb
= malloc(block_size
);
312 blocks
.push_back(nb
);
317 return ((char*)blocks
.back() + offset
);
320 void sb_pool::free_all() {
321 for (block_vector::iterator I
= blocks
.begin(), E
= blocks
.end(); I
!= E
;
327 value
* sb_value_pool::create(value_kind k
, sel_chan regid
,
329 void* np
= allocate(aligned_elt_size
);
330 value
*v
= new (np
) value(size(), k
, regid
, ver
);
334 void sb_value_pool::delete_all() {
335 unsigned bcnt
= blocks
.size();
336 unsigned toffset
= 0;
337 for (unsigned b
= 0; b
< bcnt
; ++b
) {
338 char *bstart
= (char*)blocks
[b
];
339 for (unsigned offset
= 0; offset
< block_size
;
340 offset
+= aligned_elt_size
) {
341 ((value
*)(bstart
+ offset
))->~value();
342 toffset
+= aligned_elt_size
;
343 if (toffset
>= total_size
)
349 bool sb_bitset::get(unsigned id
) {
350 assert(id
< bit_size
);
351 unsigned w
= id
/ bt_bits
;
352 unsigned b
= id
% bt_bits
;
353 return (data
[w
] >> b
) & 1;
356 void sb_bitset::set(unsigned id
, bool bit
) {
357 assert(id
< bit_size
);
358 unsigned w
= id
/ bt_bits
;
359 unsigned b
= id
% bt_bits
;
360 if (w
>= data
.size())
366 data
[w
] &= ~(1 << b
);
369 inline bool sb_bitset::set_chk(unsigned id
, bool bit
) {
370 assert(id
< bit_size
);
371 unsigned w
= id
/ bt_bits
;
372 unsigned b
= id
% bt_bits
;
373 basetype d
= data
[w
];
374 basetype dn
= (d
& ~(1 << b
)) | (bit
<< b
);
376 data
[w
] = r
? dn
: data
[w
];
380 void sb_bitset::clear() {
381 std::fill(data
.begin(), data
.end(), 0);
384 void sb_bitset::resize(unsigned size
) {
385 unsigned cur_data_size
= data
.size();
386 unsigned new_data_size
= (size
+ bt_bits
- 1) / bt_bits
;
389 if (new_data_size
!= cur_data_size
)
390 data
.resize(new_data_size
);
392 // make sure that new bits in the existing word are cleared
393 if (cur_data_size
&& size
> bit_size
&& bit_size
% bt_bits
) {
394 basetype clear_mask
= (~(basetype
)0u) << (bit_size
% bt_bits
);
395 data
[cur_data_size
- 1] &= ~clear_mask
;
401 unsigned sb_bitset::find_bit(unsigned start
) {
402 assert(start
< bit_size
);
403 unsigned w
= start
/ bt_bits
;
404 unsigned b
= start
% bt_bits
;
405 unsigned sz
= data
.size();
408 basetype d
= data
[w
] >> b
;
410 unsigned pos
= __builtin_ctz(d
) + b
+ w
* bt_bits
;
421 sb_value_set::iterator::iterator(shader
& sh
, sb_value_set
* s
, unsigned nb
)
422 : vp(sh
.get_value_pool()), s(s
), nb(nb
) {}
424 bool sb_value_set::add_set_checked(sb_value_set
& s2
) {
425 if (bs
.size() < s2
.bs
.size())
426 bs
.resize(s2
.bs
.size());
427 sb_bitset nbs
= bs
| s2
.bs
;
435 void r600_sb::sb_value_set::remove_set(sb_value_set
& s2
) {
439 bool sb_value_set::add_val(value
* v
) {
441 if (bs
.size() < v
->uid
)
442 bs
.resize(v
->uid
+ 32);
444 return bs
.set_chk(v
->uid
- 1, 1);
447 bool sb_value_set::remove_vec(vvec
& vv
) {
448 bool modified
= false;
449 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
451 modified
|= remove_val(*I
);
456 void sb_value_set::clear() {
460 bool sb_value_set::remove_val(value
* v
) {
462 if (bs
.size() < v
->uid
)
464 return bs
.set_chk(v
->uid
- 1, 0);
467 bool r600_sb::sb_value_set::add_vec(vvec
& vv
) {
468 bool modified
= false;
469 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
472 modified
|= add_val(v
);
477 bool r600_sb::sb_value_set::contains(value
* v
) {
478 unsigned b
= v
->uid
- 1;
485 bool sb_value_set::empty() {
486 return bs
.size() == 0 || bs
.find_bit(0) == bs
.size();
489 void sb_bitset::swap(sb_bitset
& bs2
) {
490 std::swap(data
, bs2
.data
);
491 std::swap(bit_size
, bs2
.bit_size
);
494 bool sb_bitset::operator ==(const sb_bitset
& bs2
) {
495 if (bit_size
!= bs2
.bit_size
)
498 for (unsigned i
= 0, c
= data
.size(); i
< c
; ++i
) {
499 if (data
[i
] != bs2
.data
[i
])
505 sb_bitset
& sb_bitset::operator &=(const sb_bitset
& bs2
) {
506 if (bit_size
> bs2
.bit_size
) {
507 resize(bs2
.bit_size
);
510 for (unsigned i
= 0, c
= std::min(data
.size(), bs2
.data
.size()); i
< c
;
512 data
[i
] &= bs2
.data
[i
];
517 sb_bitset
& sb_bitset::mask(const sb_bitset
& bs2
) {
518 if (bit_size
< bs2
.bit_size
) {
519 resize(bs2
.bit_size
);
522 for (unsigned i
= 0, c
= data
.size(); i
< c
;
524 data
[i
] &= ~bs2
.data
[i
];
529 bool ra_constraint::check() {
530 assert(kind
== CK_SAME_REG
);
534 for (vvec::iterator I
= values
.begin(), E
= values
.end(); I
!= E
; ++I
) {
543 reg
= v
->gpr
.sel() + 1;
544 else if (reg
!= v
->gpr
.sel() + 1)
547 if (v
->is_chan_pinned()) {
548 if (v
->pin_gpr
.chan() != v
->gpr
.chan())
555 bool gpr_array::is_dead() {
559 } // namespace r600_sb