2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
30 #define VT_DUMP(q) do { q } while (0)
37 #include "sb_shader.h"
42 static const char * chans
= "xyzw01?_";
44 sb_ostream
& operator << (sb_ostream
&o
, value
&v
) {
46 bool dead
= v
.flags
& VLF_DEAD
;
52 case VLK_SPECIAL_REG
: {
53 switch (v
.select
.sel()) {
54 case SV_AR_INDEX
: o
<< "AR"; break;
55 case SV_ALU_PRED
: o
<< "PR"; break;
56 case SV_EXEC_MASK
: o
<< "EM"; break;
57 case SV_VALID_MASK
: o
<< "VM"; break;
58 case SV_GEOMETRY_EMIT
: o
<< "GEOMETRY_EMIT"; break;
59 case SV_LDS_RW
: o
<< "LDS_RW"; break;
60 case SV_LDS_OQA
: o
<< "LDS_OQA"; break;
61 case SV_LDS_OQB
: o
<< "LDS_OQB"; break;
62 case SV_SCRATCH
: o
<< "SCRATCH"; break;
63 default: o
<< "???specialreg"; break;
69 o
<< "R" << v
.select
.sel() << "."
70 << chans
[v
.select
.chan()];
74 o
<< "C" << v
.select
.sel() << "." << chans
[v
.select
.chan()];
78 o
<< v
.literal_value
.f
<< "|";
79 o
.print_zw_hex(v
.literal_value
.u
, 8);
82 o
<< "Param" << (v
.select
.sel() - ALU_SRC_PARAM_OFFSET
)
83 << chans
[v
.select
.chan()];
86 o
<< "t" << v
.select
.sel() - shader::temp_regid_offset
;
102 o
<< v
.kind
<< "?????";
107 o
<< "." << v
.version
;
128 o
<< "@R" << g
.sel() << "." << chans
[g
.chan()];
134 void value_table::add_value(value
* v
) {
141 sblog
<< "gvn add_value ";
145 value_hash hash
= v
->hash();
146 vt_item
& vti
= hashtable
[hash
& size_mask
];
150 if (v
->def
&& ex
.try_fold(v
)) {
152 sblog
<< " folded: ";
153 dump::dump_val(v
->gvn_source
);
160 for (vt_item::iterator I
= vti
.begin(), E
= vti
.end(); I
!= E
; ++I
, ++n
) {
166 if (expr_equal(c
, v
)) {
167 v
->gvn_source
= c
->gvn_source
;
170 sblog
<< " found : equal to ";
171 dump::dump_val(v
->gvn_source
);
180 sblog
<< " added new\n";
184 value_hash
value::hash() {
192 ghash
= ((uintptr_t)this) | 1;
197 value_hash
value::rel_hash() {
198 value_hash h
= rel
? rel
->hash() : 0;
204 bool value_table::expr_equal(value
* l
, value
* r
) {
205 return ex
.equal(l
, r
);
208 void value_table::get_values(vvec
& v
) {
211 vvec::iterator T
= v
.begin();
213 for(vt_table::iterator I
= hashtable
.begin(), E
= hashtable
.end();
215 T
= std::copy(I
->begin(), I
->end(), T
);
219 void value::add_use(node
* n
) {
222 dump::dump_val(this);
229 struct use_node_comp
{
230 explicit use_node_comp(const node
*n
) : n(n
) {}
231 bool operator() (const node
*o
) {
232 return o
->hash() == n
->hash();
239 void value::remove_use(const node
*n
) {
240 uselist::iterator it
=
241 std::find_if(uses
.begin(), uses
.end(), use_node_comp(n
));
243 if (it
!= uses
.end())
245 // We only ever had a pointer, so don't delete it here
250 unsigned value::use_count() {
254 bool value::is_global() {
256 return chunk
->is_global();
257 return flags
& VLF_GLOBAL
;
260 void value::set_global() {
267 void value::set_prealloc() {
269 flags
|= VLF_PREALLOC
;
271 chunk
->set_prealloc();
274 bool value::is_fixed() {
275 if (array
&& array
->gpr
)
277 if (chunk
&& chunk
->is_fixed())
279 return flags
& VLF_FIXED
;
288 bool value::is_prealloc() {
290 return chunk
->is_prealloc();
291 return flags
& VLF_PREALLOC
;
294 void value::delete_uses() {
295 // We only ever had pointers, so don't delete them here
296 uses
.erase(uses
.begin(), uses
.end());
299 bool value::no_reladdr_conflict_with(value
*src
)
301 /* if the register is not relative, it can't create an relative access conflict */
305 /* If the destination is AR then we accept the copy propagation, because the
306 * scheduler actually re-creates the address loading operation and will
307 * signal interference if there is an address register load and it will fail
310 if (gvalue()->is_AR())
313 /* For all nodes that use this value test whether the operation uses another
314 * relative access with a different address value. If found, signal conflict.
316 for (uselist::const_iterator N
= uses
.begin(); N
!= uses
.end(); ++N
) {
317 for (vvec::const_iterator V
= (*N
)->src
.begin(); V
!= (*N
)->src
.end(); ++V
) {
319 value
*v
= (*V
)->gvalue();
320 if (v
!= src
&& v
->is_rel() && v
->rel
!= src
->rel
)
324 for (vvec::const_iterator V
= (*N
)->dst
.begin(); V
!= (*N
)->dst
.end(); ++V
) {
326 value
*v
= (*V
)->gvalue();
327 if (v
&& v
!= src
&& v
->is_rel() && (v
->rel
!= src
->rel
))
335 void ra_constraint::update_values() {
336 for (vvec::iterator I
= values
.begin(), E
= values
.end(); I
!= E
; ++I
) {
337 assert(!(*I
)->constraint
);
338 (*I
)->constraint
= this;
342 void* sb_pool::allocate(unsigned sz
) {
343 sz
= (sz
+ SB_POOL_ALIGN
- 1) & ~(SB_POOL_ALIGN
- 1);
344 assert (sz
< (block_size
>> 6) && "too big allocation size for sb_pool");
346 unsigned offset
= total_size
% block_size
;
347 unsigned capacity
= block_size
* blocks
.size();
349 if (total_size
+ sz
> capacity
) {
350 total_size
= capacity
;
351 void * nb
= malloc(block_size
);
352 blocks
.push_back(nb
);
357 return ((char*)blocks
.back() + offset
);
360 void sb_pool::free_all() {
361 for (block_vector::iterator I
= blocks
.begin(), E
= blocks
.end(); I
!= E
;
367 value
* sb_value_pool::create(value_kind k
, sel_chan regid
,
369 void* np
= allocate(aligned_elt_size
);
370 value
*v
= new (np
) value(size(), k
, regid
, ver
);
374 void sb_value_pool::delete_all() {
375 unsigned bcnt
= blocks
.size();
376 unsigned toffset
= 0;
377 for (unsigned b
= 0; b
< bcnt
; ++b
) {
378 char *bstart
= (char*)blocks
[b
];
379 for (unsigned offset
= 0; offset
< block_size
;
380 offset
+= aligned_elt_size
) {
381 ((value
*)(bstart
+ offset
))->~value();
382 toffset
+= aligned_elt_size
;
383 if (toffset
>= total_size
)
389 bool sb_bitset::get(unsigned id
) {
390 assert(id
< bit_size
);
391 unsigned w
= id
/ bt_bits
;
392 unsigned b
= id
% bt_bits
;
393 return (data
[w
] >> b
) & 1;
396 void sb_bitset::set(unsigned id
, bool bit
) {
397 assert(id
< bit_size
);
398 unsigned w
= id
/ bt_bits
;
399 unsigned b
= id
% bt_bits
;
400 if (w
>= data
.size())
406 data
[w
] &= ~(1 << b
);
409 inline bool sb_bitset::set_chk(unsigned id
, bool bit
) {
410 assert(id
< bit_size
);
411 unsigned w
= id
/ bt_bits
;
412 unsigned b
= id
% bt_bits
;
413 basetype d
= data
[w
];
414 basetype dn
= (d
& ~(1 << b
)) | (bit
<< b
);
416 data
[w
] = r
? dn
: data
[w
];
420 void sb_bitset::clear() {
421 std::fill(data
.begin(), data
.end(), 0);
424 void sb_bitset::resize(unsigned size
) {
425 unsigned cur_data_size
= data
.size();
426 unsigned new_data_size
= (size
+ bt_bits
- 1) / bt_bits
;
429 if (new_data_size
!= cur_data_size
)
430 data
.resize(new_data_size
);
432 // make sure that new bits in the existing word are cleared
433 if (cur_data_size
&& size
> bit_size
&& bit_size
% bt_bits
) {
434 basetype clear_mask
= (~(basetype
)0u) << (bit_size
% bt_bits
);
435 data
[cur_data_size
- 1] &= ~clear_mask
;
441 unsigned sb_bitset::find_bit(unsigned start
) {
442 assert(start
< bit_size
);
443 unsigned w
= start
/ bt_bits
;
444 unsigned b
= start
% bt_bits
;
445 unsigned sz
= data
.size();
448 basetype d
= data
[w
] >> b
;
450 unsigned pos
= __builtin_ctz(d
) + b
+ w
* bt_bits
;
461 sb_value_set::iterator::iterator(shader
& sh
, sb_value_set
* s
, unsigned nb
)
462 : vp(sh
.get_value_pool()), s(s
), nb(nb
) {}
464 bool sb_value_set::add_set_checked(sb_value_set
& s2
) {
465 if (bs
.size() < s2
.bs
.size())
466 bs
.resize(s2
.bs
.size());
467 sb_bitset nbs
= bs
| s2
.bs
;
475 void r600_sb::sb_value_set::remove_set(sb_value_set
& s2
) {
479 bool sb_value_set::add_val(value
* v
) {
481 if (bs
.size() < v
->uid
)
482 bs
.resize(v
->uid
+ 32);
484 return bs
.set_chk(v
->uid
- 1, 1);
487 bool sb_value_set::remove_vec(vvec
& vv
) {
488 bool modified
= false;
489 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
491 modified
|= remove_val(*I
);
496 void sb_value_set::clear() {
500 bool sb_value_set::remove_val(value
* v
) {
502 if (bs
.size() < v
->uid
)
504 return bs
.set_chk(v
->uid
- 1, 0);
507 bool r600_sb::sb_value_set::add_vec(vvec
& vv
) {
508 bool modified
= false;
509 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
512 modified
|= add_val(v
);
517 bool r600_sb::sb_value_set::contains(value
* v
) {
518 unsigned b
= v
->uid
- 1;
525 bool sb_value_set::empty() {
526 return bs
.size() == 0 || bs
.find_bit(0) == bs
.size();
529 void sb_bitset::swap(sb_bitset
& bs2
) {
530 std::swap(data
, bs2
.data
);
531 std::swap(bit_size
, bs2
.bit_size
);
534 bool sb_bitset::operator ==(const sb_bitset
& bs2
) {
535 if (bit_size
!= bs2
.bit_size
)
538 for (unsigned i
= 0, c
= data
.size(); i
< c
; ++i
) {
539 if (data
[i
] != bs2
.data
[i
])
545 sb_bitset
& sb_bitset::operator &=(const sb_bitset
& bs2
) {
546 if (bit_size
> bs2
.bit_size
) {
547 resize(bs2
.bit_size
);
550 for (unsigned i
= 0, c
= std::min(data
.size(), bs2
.data
.size()); i
< c
;
552 data
[i
] &= bs2
.data
[i
];
557 sb_bitset
& sb_bitset::mask(const sb_bitset
& bs2
) {
558 if (bit_size
< bs2
.bit_size
) {
559 resize(bs2
.bit_size
);
562 for (unsigned i
= 0, c
= data
.size(); i
< c
;
564 data
[i
] &= ~bs2
.data
[i
];
569 bool ra_constraint::check() {
570 assert(kind
== CK_SAME_REG
);
574 for (vvec::iterator I
= values
.begin(), E
= values
.end(); I
!= E
; ++I
) {
583 reg
= v
->gpr
.sel() + 1;
584 else if (reg
!= v
->gpr
.sel() + 1)
587 if (v
->is_chan_pinned()) {
588 if (v
->pin_gpr
.chan() != v
->gpr
.chan())
595 bool gpr_array::is_dead() {
599 } // namespace r600_sb