2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
30 #define PSC_DUMP(a) do { a } while (0)
36 #include "sb_shader.h"
39 #include "eg_sq.h" // V_SQ_CF_INDEX_NONE/0/1
43 rp_kcache_tracker::rp_kcache_tracker(shader
&sh
) : rp(), uc(),
44 // FIXME: for now we'll use "two const pairs" limit for r600, same as
45 // for other chips, otherwise additional check in alu_group_tracker is
46 // required to make sure that all 4 consts in the group fit into 2
50 bool rp_kcache_tracker::try_reserve(sel_chan r
) {
51 unsigned sel
= kc_sel(r
);
53 for (unsigned i
= 0; i
< sel_count
; ++i
) {
67 bool rp_kcache_tracker::try_reserve(node
* n
) {
68 bool need_unreserve
= false;
69 vvec::iterator
I(n
->src
.begin()), E(n
->src
.end());
74 if (!try_reserve(v
->select
))
77 need_unreserve
= true;
83 if (need_unreserve
&& I
!= n
->src
.begin()) {
89 } while (I
!= n
->src
.begin());
95 void rp_kcache_tracker::unreserve(node
* n
) {
96 vvec::iterator
I(n
->src
.begin()), E(n
->src
.end());
100 unreserve(v
->select
);
104 void rp_kcache_tracker::unreserve(sel_chan r
) {
105 unsigned sel
= kc_sel(r
);
107 for (unsigned i
= 0; i
< sel_count
; ++i
)
117 bool literal_tracker::try_reserve(alu_node
* n
) {
118 bool need_unreserve
= false;
120 vvec::iterator
I(n
->src
.begin()), E(n
->src
.end());
122 for (; I
!= E
; ++I
) {
124 if (v
->is_literal()) {
125 if (!try_reserve(v
->literal_value
))
128 need_unreserve
= true;
134 if (need_unreserve
&& I
!= n
->src
.begin()) {
139 unreserve(v
->literal_value
);
140 } while (I
!= n
->src
.begin());
145 void literal_tracker::unreserve(alu_node
* n
) {
146 unsigned nsrc
= n
->bc
.op_ptr
->src_count
, i
;
148 for (i
= 0; i
< nsrc
; ++i
) {
149 value
*v
= n
->src
[i
];
151 unreserve(v
->literal_value
);
155 bool literal_tracker::try_reserve(literal l
) {
157 PSC_DUMP( sblog
<< "literal reserve " << l
.u
<< " " << l
.f
<< "\n"; );
159 for (unsigned i
= 0; i
< MAX_ALU_LITERALS
; ++i
) {
163 PSC_DUMP( sblog
<< " reserved new uc = " << uc
[i
] << "\n"; );
165 } else if (lt
[i
] == l
) {
167 PSC_DUMP( sblog
<< " reserved uc = " << uc
[i
] << "\n"; );
171 PSC_DUMP( sblog
<< " failed to reserve literal\n"; );
175 void literal_tracker::unreserve(literal l
) {
177 PSC_DUMP( sblog
<< "literal unreserve " << l
.u
<< " " << l
.f
<< "\n"; );
179 for (unsigned i
= 0; i
< MAX_ALU_LITERALS
; ++i
) {
190 static inline unsigned bs_cycle_vector(unsigned bs
, unsigned src
) {
191 static const unsigned swz
[VEC_NUM
][3] = {
192 {0, 1, 2}, {0, 2, 1}, {1, 2, 0}, {1, 0, 2}, {2, 0, 1}, {2, 1, 0}
194 assert(bs
< VEC_NUM
&& src
< 3);
198 static inline unsigned bs_cycle_scalar(unsigned bs
, unsigned src
) {
199 static const unsigned swz
[SCL_NUM
][3] = {
200 {2, 1, 0}, {1, 2, 2}, {2, 1, 2}, {2, 2, 1}
203 if (bs
>= SCL_NUM
|| src
>= 3) {
204 // this prevents gcc warning "array subscript is above array bounds"
205 // AFAICS we should never hit this path
211 static inline unsigned bs_cycle(bool trans
, unsigned bs
, unsigned src
) {
212 return trans
? bs_cycle_scalar(bs
, src
) : bs_cycle_vector(bs
, src
);
216 bool rp_gpr_tracker::try_reserve(unsigned cycle
, unsigned sel
, unsigned chan
) {
218 if (rp
[cycle
][chan
] == 0) {
219 rp
[cycle
][chan
] = sel
;
222 } else if (rp
[cycle
][chan
] == sel
) {
230 void rp_gpr_tracker::unreserve(alu_node
* n
) {
231 unsigned nsrc
= n
->bc
.op_ptr
->src_count
, i
;
232 unsigned trans
= n
->bc
.slot
== SLOT_TRANS
;
233 unsigned bs
= n
->bc
.bank_swizzle
;
234 unsigned opt
= !trans
235 && n
->bc
.src
[0].sel
== n
->bc
.src
[1].sel
236 && n
->bc
.src
[0].chan
== n
->bc
.src
[1].chan
;
238 for (i
= 0; i
< nsrc
; ++i
) {
239 value
*v
= n
->src
[i
];
240 if (v
->is_readonly() || v
->is_undef())
244 unsigned cycle
= bs_cycle(trans
, bs
, i
);
245 unreserve(cycle
, n
->bc
.src
[i
].sel
, n
->bc
.src
[i
].chan
);
250 void rp_gpr_tracker::unreserve(unsigned cycle
, unsigned sel
, unsigned chan
) {
252 assert(rp
[cycle
][chan
] == sel
&& uc
[cycle
][chan
]);
253 if (--uc
[cycle
][chan
] == 0)
258 bool rp_gpr_tracker::try_reserve(alu_node
* n
) {
259 unsigned nsrc
= n
->bc
.op_ptr
->src_count
, i
;
260 unsigned trans
= n
->bc
.slot
== SLOT_TRANS
;
261 unsigned bs
= n
->bc
.bank_swizzle
;
262 unsigned opt
= !trans
&& nsrc
>= 2 &&
263 n
->src
[0] == n
->src
[1];
265 bool need_unreserve
= false;
266 unsigned const_count
= 0, min_gpr_cycle
= 3;
268 for (i
= 0; i
< nsrc
; ++i
) {
269 value
*v
= n
->src
[i
];
270 if (v
->is_readonly() || v
->is_undef()) {
272 if (trans
&& const_count
== 3)
278 unsigned cycle
= bs_cycle(trans
, bs
, i
);
280 if (trans
&& cycle
< min_gpr_cycle
)
281 min_gpr_cycle
= cycle
;
283 if (const_count
&& cycle
< const_count
&& trans
)
286 if (!try_reserve(cycle
, n
->bc
.src
[i
].sel
, n
->bc
.src
[i
].chan
))
289 need_unreserve
= true;
293 if ((i
== nsrc
) && (min_gpr_cycle
+ 1 > const_count
))
296 if (need_unreserve
&& i
--) {
298 value
*v
= n
->src
[i
];
299 if (!v
->is_readonly() && !v
->is_undef()) {
302 unreserve(bs_cycle(trans
, bs
, i
), n
->bc
.src
[i
].sel
,
310 alu_group_tracker::alu_group_tracker(shader
&sh
)
312 gpr(), lt(), slots(),
313 max_slots(sh
.get_ctx().is_cayman() ? 4 : 5),
314 has_mova(), uses_ar(), has_predset(), has_kill(),
315 updates_exec_mask(), chan_count(), interp_param(), next_id() {
317 available_slots
= sh
.get_ctx().has_trans
? 0x1F : 0x0F;
321 sel_chan
alu_group_tracker::get_value_id(value
* v
) {
322 unsigned &id
= vmap
[v
];
325 return sel_chan(id
, v
->get_final_chan());
329 void alu_group_tracker::assign_slot(unsigned slot
, alu_node
* n
) {
332 available_slots
&= ~(1 << slot
);
334 unsigned param
= n
->interp_param();
337 assert(!interp_param
|| interp_param
== param
);
338 interp_param
= param
;
343 void alu_group_tracker::discard_all_slots(container_node
&removed_nodes
) {
344 PSC_DUMP( sblog
<< "agt::discard_all_slots\n"; );
345 discard_slots(~available_slots
& ((1 << max_slots
) - 1), removed_nodes
);
348 void alu_group_tracker::discard_slots(unsigned slot_mask
,
349 container_node
&removed_nodes
) {
352 sblog
<< "discard_slots : packed_ops : "
353 << (unsigned)packed_ops
.size() << "\n";
356 for (node_vec::iterator N
, I
= packed_ops
.begin();
357 I
!= packed_ops
.end(); I
= N
) {
360 alu_packed_node
*n
= static_cast<alu_packed_node
*>(*I
);
361 unsigned pslots
= n
->get_slot_mask();
364 sblog
<< "discard_slots : packed slot_mask : " << pslots
<< "\n";
367 if (pslots
& slot_mask
) {
370 sblog
<< "discard_slots : discarding packed...\n";
373 removed_nodes
.push_back(n
);
374 slot_mask
&= ~pslots
;
375 N
= packed_ops
.erase(I
);
376 available_slots
|= pslots
;
377 for (unsigned k
= 0; k
< max_slots
; ++k
) {
378 if (pslots
& (1 << k
))
384 for (unsigned slot
= 0; slot
< max_slots
; ++slot
) {
385 unsigned slot_bit
= 1 << slot
;
387 if (slot_mask
& slot_bit
) {
388 assert(!(available_slots
& slot_bit
));
391 assert(!(slots
[slot
]->bc
.slot_flags
& AF_4SLOT
));
394 sblog
<< "discarding slot " << slot
<< " : ";
395 dump::dump_op(slots
[slot
]);
399 removed_nodes
.push_back(slots
[slot
]);
401 available_slots
|= slot_bit
;
405 alu_node
*t
= slots
[4];
406 if (t
&& (t
->bc
.slot_flags
& AF_V
)) {
407 unsigned chan
= t
->bc
.dst_chan
;
412 sblog
<< " from trans slot to free slot " << chan
<< "\n";
424 alu_group_node
* alu_group_tracker::emit() {
426 alu_group_node
*g
= sh
.create_alu_group();
428 lt
.init_group_literals(g
);
430 for (unsigned i
= 0; i
< max_slots
; ++i
) {
431 alu_node
*n
= slots
[i
];
439 bool alu_group_tracker::try_reserve(alu_node
* n
) {
440 unsigned nsrc
= n
->bc
.op_ptr
->src_count
;
441 unsigned slot
= n
->bc
.slot
;
442 bool trans
= slot
== 4;
447 unsigned flags
= n
->bc
.op_ptr
->flags
;
449 unsigned param
= n
->interp_param();
451 if (param
&& interp_param
&& interp_param
!= param
)
454 if ((flags
& AF_KILL
) && has_predset
)
456 if ((flags
& AF_ANY_PRED
) && (has_kill
|| has_predset
))
458 if ((flags
& AF_MOVA
) && (has_mova
|| uses_ar
))
461 if (n
->uses_ar() && has_mova
)
464 for (unsigned i
= 0; i
< nsrc
; ++i
) {
466 unsigned last_id
= next_id
;
468 value
*v
= n
->src
[i
];
469 if (!v
->is_any_gpr() && !v
->is_rel())
471 sel_chan vid
= get_value_id(n
->src
[i
]);
473 if (vid
> last_id
&& chan_count
[vid
.chan()] == 3) {
477 n
->bc
.src
[i
].sel
= vid
.sel();
478 n
->bc
.src
[i
].chan
= vid
.chan();
481 if (!lt
.try_reserve(n
))
484 if (!kc
.try_reserve(n
)) {
489 unsigned fbs
= n
->forced_bank_swizzle();
491 n
->bc
.bank_swizzle
= 0;
494 n
->bc
.bank_swizzle
= VEC_210
;
496 if (gpr
.try_reserve(n
)) {
497 assign_slot(slot
, n
);
502 unsigned swz_num
= trans
? SCL_NUM
: VEC_NUM
;
503 for (unsigned bs
= 0; bs
< swz_num
; ++bs
) {
504 n
->bc
.bank_swizzle
= bs
;
505 if (gpr
.try_reserve(n
)) {
506 assign_slot(slot
, n
);
515 unsigned forced_swz_slots
= 0;
516 int first_slot
= ~0, first_nf
= ~0, last_slot
= ~0;
519 for (unsigned i
= 0; i
< max_slots
; ++i
) {
520 alu_node
*a
= slots
[i
];
522 if (first_slot
== ~0)
525 save_bs
[i
] = a
->bc
.bank_swizzle
;
526 if (a
->forced_bank_swizzle()) {
527 assert(i
!= SLOT_TRANS
);
528 forced_swz_slots
|= (1 << i
);
529 a
->bc
.bank_swizzle
= VEC_210
;
530 if (!gpr
.try_reserve(a
))
531 assert(!"internal reservation error");
536 a
->bc
.bank_swizzle
= 0;
541 if (first_nf
== ~0) {
542 assign_slot(slot
, n
);
546 assert(first_slot
!= ~0 && last_slot
!= ~0);
548 // silence "array subscript is above array bounds" with gcc 4.8
553 alu_node
*a
= slots
[i
];
554 bool backtrack
= false;
559 sblog
<< " bs: trying s" << i
<< " bs:" << a
->bc
.bank_swizzle
560 << " bt:" << backtrack
<< "\n";
563 if (!backtrack
&& gpr
.try_reserve(a
)) {
565 sblog
<< " bs: reserved s" << i
<< " bs:" << a
->bc
.bank_swizzle
569 while ((++i
<= last_slot
) && !slots
[i
]);
575 bool itrans
= i
== SLOT_TRANS
;
576 unsigned max_swz
= itrans
? SCL_221
: VEC_210
;
578 if (a
->bc
.bank_swizzle
< max_swz
) {
579 ++a
->bc
.bank_swizzle
;
582 sblog
<< " bs: inc s" << i
<< " bs:" << a
->bc
.bank_swizzle
588 a
->bc
.bank_swizzle
= 0;
589 while ((--i
>= first_nf
) && !slots
[i
]);
594 sblog
<< " bs: unreserve s" << i
<< " bs:" << a
->bc
.bank_swizzle
606 if (i
== last_slot
+ 1) {
607 assign_slot(slot
, n
);
611 // reservation failed, restore previous state
614 for (unsigned i
= 0; i
< max_slots
; ++i
) {
615 alu_node
*a
= slots
[i
];
617 a
->bc
.bank_swizzle
= save_bs
[i
];
618 bool b
= gpr
.try_reserve(a
);
628 bool alu_group_tracker::try_reserve(alu_packed_node
* p
) {
629 bool need_unreserve
= false;
630 node_iterator
I(p
->begin()), E(p
->end());
632 for (; I
!= E
; ++I
) {
633 alu_node
*n
= static_cast<alu_node
*>(*I
);
637 need_unreserve
= true;
641 packed_ops
.push_back(p
);
645 if (need_unreserve
) {
647 alu_node
*n
= static_cast<alu_node
*>(*I
);
648 slots
[n
->bc
.slot
] = NULL
;
655 void alu_group_tracker::reinit() {
657 memcpy(s
, slots
, sizeof(slots
));
661 for (int i
= max_slots
- 1; i
>= 0; --i
) {
662 if (s
[i
] && !try_reserve(s
[i
])) {
663 sblog
<< "alu_group_tracker: reinit error on slot " << i
<< "\n";
664 for (unsigned i
= 0; i
< max_slots
; ++i
) {
665 sblog
<< " slot " << i
<< " : ";
671 assert(!"alu_group_tracker: reinit error");
676 void alu_group_tracker::reset(bool keep_packed
) {
680 memset(slots
, 0, sizeof(slots
));
687 updates_exec_mask
= false;
688 available_slots
= sh
.get_ctx().has_trans
? 0x1F : 0x0F;
700 void alu_group_tracker::update_flags(alu_node
* n
) {
701 unsigned flags
= n
->bc
.op_ptr
->flags
;
702 has_kill
|= (flags
& AF_KILL
);
703 has_mova
|= (flags
& AF_MOVA
);
704 has_predset
|= (flags
& AF_ANY_PRED
);
705 uses_ar
|= n
->uses_ar();
707 if (flags
& AF_ANY_PRED
) {
708 if (n
->dst
[2] != NULL
)
709 updates_exec_mask
= true;
713 int post_scheduler::run() {
718 void post_scheduler::run_on(container_node
* n
) {
720 for (node_riterator I
= n
->rbegin(), E
= n
->rend(); I
!= E
; ++I
) {
721 if (I
->is_container()) {
722 if (I
->subtype
== NST_BB
) {
723 bb_node
* bb
= static_cast<bb_node
*>(*I
);
726 run_on(static_cast<container_node
*>(*I
));
732 void post_scheduler::init_uc_val(container_node
*c
, value
*v
) {
733 node
*d
= v
->any_def();
734 if (d
&& d
->parent
== c
)
738 void post_scheduler::init_uc_vec(container_node
*c
, vvec
&vv
, bool src
) {
739 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
741 if (!v
|| v
->is_readonly())
745 init_uc_val(c
, v
->rel
);
746 init_uc_vec(c
, v
->muse
, true);
753 unsigned post_scheduler::init_ucm(container_node
*c
, node
*n
) {
754 init_uc_vec(c
, n
->src
, true);
755 init_uc_vec(c
, n
->dst
, false);
757 uc_map::iterator F
= ucm
.find(n
);
758 return F
== ucm
.end() ? 0 : F
->second
;
761 void post_scheduler::schedule_bb(bb_node
* bb
) {
763 sblog
<< "scheduling BB " << bb
->id
<< "\n";
764 if (!pending
.empty())
765 dump::dump_op_list(&pending
);
768 assert(pending
.empty());
769 assert(bb_pending
.empty());
770 assert(ready
.empty());
772 bb_pending
.append_from(bb
);
777 while ((n
= bb_pending
.back())) {
780 sblog
<< "post_sched_bb ";
785 // May require emitting ALU ops to load index registers
786 if (n
->is_fetch_clause()) {
788 process_fetch(static_cast<container_node
*>(n
));
792 if (n
->is_alu_clause()) {
794 process_alu(static_cast<container_node
*>(n
));
805 void post_scheduler::init_regmap() {
810 sblog
<< "init_regmap: live: ";
811 dump::dump_set(sh
, live
);
815 for (val_set::iterator I
= live
.begin(sh
), E
= live
.end(sh
); I
!= E
; ++I
) {
818 if (!v
->is_sgpr() || !v
->is_prealloc())
824 sblog
<< "init_regmap: " << r
<< " <= ";
834 static alu_node
*create_set_idx(shader
&sh
, unsigned ar_idx
) {
835 alu_node
*a
= sh
.create_alu();
837 assert(ar_idx
== V_SQ_CF_INDEX_0
|| ar_idx
== V_SQ_CF_INDEX_1
);
838 if (ar_idx
== V_SQ_CF_INDEX_0
)
839 a
->bc
.set_op(ALU_OP0_SET_CF_IDX0
);
841 a
->bc
.set_op(ALU_OP0_SET_CF_IDX1
);
843 a
->dst
.resize(1); // Dummy needed for recolor
846 sblog
<< "created IDX load: ";
854 void post_scheduler::load_index_register(value
*v
, unsigned ar_idx
)
858 if (!sh
.get_ctx().is_cayman()) {
859 // Evergreen has to first load address register, then use CF_SET_IDX0/1
860 alu_group_tracker
&rt
= alu
.grp();
861 alu_node
*set_idx
= create_set_idx(sh
, ar_idx
);
862 if (!rt
.try_reserve(set_idx
)) {
863 sblog
<< "can't emit SET_CF_IDX";
864 dump::dump_op(set_idx
);
869 if (!alu
.check_clause_limits()) {
870 // Can't happen since clause only contains MOVA/CF_SET_IDX0/1
875 alu_group_tracker
&rt
= alu
.grp();
876 alu_node
*a
= alu
.create_ar_load(v
, ar_idx
== V_SQ_CF_INDEX_1
? SEL_Z
: SEL_Y
);
878 if (!rt
.try_reserve(a
)) {
879 sblog
<< "can't emit AR load : ";
886 if (!alu
.check_clause_limits()) {
887 // Can't happen since clause only contains MOVA/CF_SET_IDX0/1
891 alu
.emit_clause(cur_bb
);
894 void post_scheduler::process_fetch(container_node
*c
) {
898 for (node_iterator N
, I
= c
->begin(), E
= c
->end(); I
!= E
; I
= N
) {
904 fetch_node
*f
= static_cast<fetch_node
*>(n
);
907 sblog
<< "process_tex ";
912 // TODO: If same values used can avoid reloading index register
913 if (f
->bc
.sampler_index_mode
!= V_SQ_CF_INDEX_NONE
||
914 f
->bc
.resource_index_mode
!= V_SQ_CF_INDEX_NONE
) {
915 unsigned index_mode
= f
->bc
.sampler_index_mode
!= V_SQ_CF_INDEX_NONE
?
916 f
->bc
.sampler_index_mode
: f
->bc
.resource_index_mode
;
918 // Currently require prior opt passes to use one TEX per indexed op
919 assert(f
->parent
->count() == 1);
921 value
*v
= f
->src
.back(); // Last src is index offset
924 cur_bb
->push_front(c
);
926 load_index_register(v
, index_mode
);
927 f
->src
.pop_back(); // Don't need index value any more
933 cur_bb
->push_front(c
);
936 void post_scheduler::process_alu(container_node
*c
) {
944 live
= c
->live_after
;
946 init_globals(c
->live_after
, true);
947 init_globals(c
->live_before
, true);
951 update_local_interferences();
953 for (node_riterator N
, I
= c
->rbegin(), E
= c
->rend(); I
!= E
; I
= N
) {
958 unsigned uc
= init_ucm(c
, n
);
961 sblog
<< "process_alu uc=" << uc
<< " ";
969 pending
.push_back(n
);
970 PSC_DUMP( sblog
<< "pending\n"; );
979 void post_scheduler::update_local_interferences() {
982 sblog
<< "update_local_interferences : ";
983 dump::dump_set(sh
, live
);
988 for (val_set::iterator I
= live
.begin(sh
), E
= live
.end(sh
); I
!= E
; ++I
) {
990 if (v
->is_prealloc())
993 v
->interferences
.add_set(live
);
997 void post_scheduler::update_live_src_vec(vvec
&vv
, val_set
*born
, bool src
) {
998 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
1004 if (src
&& v
->is_any_gpr()) {
1005 if (live
.add_val(v
)) {
1006 if (!v
->is_prealloc()) {
1007 if (!cleared_interf
.contains(v
)) {
1009 sblog
<< "clearing interferences for " << *v
<< "\n";
1011 v
->interferences
.clear();
1012 cleared_interf
.add_val(v
);
1018 } else if (v
->is_rel()) {
1019 if (!v
->rel
->is_any_gpr())
1020 live
.add_val(v
->rel
);
1021 update_live_src_vec(v
->muse
, born
, true);
1026 void post_scheduler::update_live_dst_vec(vvec
&vv
) {
1027 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
1033 update_live_dst_vec(v
->mdef
);
1034 } else if (v
->is_any_gpr()) {
1035 if (!live
.remove_val(v
)) {
1037 sblog
<< "failed to remove ";
1039 sblog
<< " from live : ";
1040 dump::dump_set(sh
, live
);
1048 void post_scheduler::update_live(node
*n
, val_set
*born
) {
1049 update_live_dst_vec(n
->dst
);
1050 update_live_src_vec(n
->src
, born
, true);
1051 update_live_src_vec(n
->dst
, born
, false);
1054 void post_scheduler::process_group() {
1055 alu_group_tracker
&rt
= alu
.grp();
1062 sblog
<< "process_group: live_before : ";
1063 dump::dump_set(sh
, live
);
1067 for (unsigned s
= 0; s
< ctx
.num_slots
; ++s
) {
1068 alu_node
*n
= rt
.slot(s
);
1072 update_live(n
, &vals_born
);
1076 sblog
<< "process_group: live_after : ";
1077 dump::dump_set(sh
, live
);
1081 update_local_interferences();
1083 for (unsigned i
= 0; i
< 5; ++i
) {
1084 node
*n
= rt
.slot(i
);
1085 if (n
&& !n
->is_mova()) {
1086 release_src_values(n
);
1091 void post_scheduler::init_globals(val_set
&s
, bool prealloc
) {
1094 sblog
<< "init_globals: ";
1095 dump::dump_set(sh
, s
);
1099 for (val_set::iterator I
= s
.begin(sh
), E
= s
.end(sh
); I
!= E
; ++I
) {
1101 if (v
->is_sgpr() && !v
->is_global()) {
1104 if (prealloc
&& v
->is_fixed()) {
1111 void post_scheduler::emit_index_registers() {
1112 for (unsigned i
= 0; i
< 2; i
++) {
1113 if (alu
.current_idx
[i
]) {
1114 regmap
= prev_regmap
;
1115 alu
.discard_current_group();
1117 load_index_register(alu
.current_idx
[i
], KC_INDEX_0
+ i
);
1118 alu
.current_idx
[i
] = NULL
;
1123 void post_scheduler::emit_clause() {
1125 if (alu
.current_ar
) {
1131 if (!alu
.is_empty()) {
1132 alu
.emit_clause(cur_bb
);
1135 emit_index_registers();
1138 void post_scheduler::schedule_alu(container_node
*c
) {
1140 assert(!ready
.empty() || !ready_copies
.empty());
1144 prev_regmap
= regmap
;
1146 if (!prepare_alu_group()) {
1147 if (alu
.current_idx
[0] || alu
.current_idx
[1]) {
1148 regmap
= prev_regmap
;
1150 init_globals(live
, false);
1155 if (alu
.current_ar
) {
1162 if (!alu
.check_clause_limits()) {
1163 regmap
= prev_regmap
;
1165 init_globals(live
, false);
1174 if (!alu
.is_empty()) {
1178 if (!ready
.empty()) {
1179 sblog
<< "##post_scheduler: unscheduled ready instructions :";
1180 dump::dump_op_list(&ready
);
1181 assert(!"unscheduled ready instructions");
1184 if (!pending
.empty()) {
1185 sblog
<< "##post_scheduler: unscheduled pending instructions :";
1186 dump::dump_op_list(&pending
);
1187 assert(!"unscheduled pending instructions");
1191 void post_scheduler::add_interferences(value
*v
, sb_bitset
&rb
, val_set
&vs
) {
1192 unsigned chan
= v
->gpr
.chan();
1194 for (val_set::iterator I
= vs
.begin(sh
), E
= vs
.end(sh
);
1197 sel_chan gpr
= vi
->get_final_gpr();
1199 if (vi
->is_any_gpr() && gpr
&& vi
!= v
&&
1200 (!v
->chunk
|| v
->chunk
!= vi
->chunk
) &&
1201 vi
->is_fixed() && gpr
.chan() == chan
) {
1203 unsigned r
= gpr
.sel();
1206 sblog
<< "\tadd_interferences: " << *vi
<< "\n";
1216 void post_scheduler::set_color_local_val(value
*v
, sel_chan color
) {
1220 sblog
<< " recolored: ";
1226 void post_scheduler::set_color_local(value
*v
, sel_chan color
) {
1228 vvec
&vv
= v
->chunk
->values
;
1229 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
1231 set_color_local_val(v2
, color
);
1235 set_color_local_val(v
, color
);
1240 bool post_scheduler::recolor_local(value
*v
) {
1244 assert(v
->is_sgpr());
1245 assert(!v
->is_prealloc());
1248 unsigned chan
= v
->gpr
.chan();
1251 sblog
<< "recolor_local: ";
1253 sblog
<< " interferences: ";
1254 dump::dump_set(sh
, v
->interferences
);
1257 sblog
<< " in chunk: ";
1258 coalescer::dump_chunk(v
->chunk
);
1264 for (vvec::iterator I
= v
->chunk
->values
.begin(),
1265 E
= v
->chunk
->values
.end(); I
!= E
; ++I
) {
1268 PSC_DUMP( sblog
<< " add_interferences for " << *v2
<< " :\n"; );
1270 add_interferences(v
, rb
, v2
->interferences
);
1273 add_interferences(v
, rb
, v
->interferences
);
1277 unsigned sz
= rb
.size();
1278 sblog
<< "registers bits: " << sz
;
1279 for (unsigned r
= 0; r
< sz
; ++r
) {
1281 sblog
<< "\n " << r
<< " ";
1282 sblog
<< (rb
.get(r
) ? 1 : 0);
1286 bool no_temp_gprs
= v
->is_global();
1287 unsigned rs
, re
, pass
= no_temp_gprs
? 1 : 0;
1292 rs
= sh
.first_temp_gpr();
1296 re
= sh
.num_nontemp_gpr();
1299 for (unsigned reg
= rs
; reg
< re
; ++reg
) {
1300 if (reg
>= rb
.size() || !rb
.get(reg
)) {
1302 set_color_local(v
, sel_chan(reg
, chan
));
1309 assert(!"recolor_local failed");
1313 void post_scheduler::emit_load_ar() {
1315 regmap
= prev_regmap
;
1316 alu
.discard_current_group();
1318 alu_group_tracker
&rt
= alu
.grp();
1319 alu_node
*a
= alu
.create_ar_load(alu
.current_ar
, SEL_X
);
1321 if (!rt
.try_reserve(a
)) {
1322 sblog
<< "can't emit AR load : ";
1330 bool post_scheduler::unmap_dst_val(value
*d
) {
1332 if (d
== alu
.current_ar
) {
1337 if (d
->is_prealloc()) {
1338 sel_chan gpr
= d
->get_final_gpr();
1339 rv_map::iterator F
= regmap
.find(gpr
);
1341 if (F
!= regmap
.end())
1344 if (c
&& c
!=d
&& (!c
->chunk
|| c
->chunk
!= d
->chunk
)) {
1346 sblog
<< "dst value conflict : ";
1348 sblog
<< " regmap contains ";
1352 assert(!"scheduler error");
1361 bool post_scheduler::unmap_dst(alu_node
*n
) {
1362 value
*d
= n
->dst
.empty() ? NULL
: n
->dst
[0];
1368 if (d
&& d
->is_any_reg()) {
1371 if (alu
.current_ar
!= d
) {
1372 sblog
<< "loading wrong ar value\n";
1375 alu
.current_ar
= NULL
;
1378 } else if (d
->is_any_gpr()) {
1379 if (!unmap_dst_val(d
))
1384 for (vvec::iterator I
= d
->mdef
.begin(), E
= d
->mdef
.end();
1390 assert(d
->is_any_gpr());
1392 if (!unmap_dst_val(d
))
1399 bool post_scheduler::map_src_val(value
*v
) {
1401 if (!v
->is_prealloc())
1404 sel_chan gpr
= v
->get_final_gpr();
1405 rv_map::iterator F
= regmap
.find(gpr
);
1407 if (F
!= regmap
.end()) {
1409 if (!v
->v_equal(c
)) {
1411 sblog
<< "can't map src value ";
1413 sblog
<< ", regmap contains ";
1420 regmap
.insert(std::make_pair(gpr
, v
));
1425 bool post_scheduler::map_src_vec(vvec
&vv
, bool src
) {
1427 // Handle possible UBO indexing
1428 bool ubo_indexing
[2] = { false, false };
1429 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
1434 if (v
->is_kcache()) {
1435 unsigned index_mode
= v
->select
.kcache_index_mode();
1436 if (index_mode
== KC_INDEX_0
|| index_mode
== KC_INDEX_1
) {
1437 ubo_indexing
[index_mode
- KC_INDEX_0
] = true;
1442 // idx values stored at end of src vec, see bc_parser::prepare_alu_group
1443 for (unsigned i
= 2; i
!= 0; i
--) {
1444 if (ubo_indexing
[i
-1]) {
1445 // TODO: skip adding value to kcache reservation somehow, causes
1446 // unnecessary group breaks and cache line locks
1447 value
*v
= vv
.back();
1448 if (alu
.current_idx
[i
-1] && alu
.current_idx
[i
-1] != v
) {
1450 sblog
<< "IDX" << i
-1 << " already set to " <<
1451 *alu
.current_idx
[i
-1] << ", trying to set " << *v
<< "\n";
1456 alu
.current_idx
[i
-1] = v
;
1457 PSC_DUMP(sblog
<< "IDX" << i
-1 << " set to " << *v
<< "\n";);
1462 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
1467 if ((!v
->is_any_gpr() || !v
->is_fixed()) && !v
->is_rel())
1471 value
*rel
= v
->rel
;
1474 if (!rel
->is_const()) {
1475 if (!map_src_vec(v
->muse
, true))
1478 if (rel
!= alu
.current_ar
) {
1479 if (alu
.current_ar
) {
1481 sblog
<< " current_AR is " << *alu
.current_ar
1482 << " trying to use " << *rel
<< "\n";
1487 alu
.current_ar
= rel
;
1490 sblog
<< " new current_AR assigned: " << *alu
.current_ar
1497 if (!map_src_val(v
)) {
1505 bool post_scheduler::map_src(alu_node
*n
) {
1506 if (!map_src_vec(n
->dst
, false))
1509 if (!map_src_vec(n
->src
, true))
1515 void post_scheduler::dump_regmap() {
1517 sblog
<< "# REGMAP :\n";
1519 for(rv_map::iterator I
= regmap
.begin(), E
= regmap
.end(); I
!= E
; ++I
) {
1520 sblog
<< " # " << I
->first
<< " => " << *(I
->second
) << "\n";
1524 sblog
<< " current_AR: " << *alu
.current_ar
<< "\n";
1526 sblog
<< " current_PR: " << *alu
.current_pr
<< "\n";
1527 if (alu
.current_idx
[0])
1528 sblog
<< " current IDX0: " << *alu
.current_idx
[0] << "\n";
1529 if (alu
.current_idx
[1])
1530 sblog
<< " current IDX1: " << *alu
.current_idx
[1] << "\n";
1533 void post_scheduler::recolor_locals() {
1534 alu_group_tracker
&rt
= alu
.grp();
1536 for (unsigned s
= 0; s
< ctx
.num_slots
; ++s
) {
1537 alu_node
*n
= rt
.slot(s
);
1539 value
*d
= n
->dst
[0];
1540 if (d
&& d
->is_sgpr() && !d
->is_prealloc()) {
1547 // returns true if there are interferences
1548 bool post_scheduler::check_interferences() {
1550 alu_group_tracker
&rt
= alu
.grp();
1552 unsigned interf_slots
;
1554 bool discarded
= false;
1557 sblog
<< "check_interferences: before: \n";
1565 for (unsigned s
= 0; s
< ctx
.num_slots
; ++s
) {
1566 alu_node
*n
= rt
.slot(s
);
1568 if (!unmap_dst(n
)) {
1574 for (unsigned s
= 0; s
< ctx
.num_slots
; ++s
) {
1575 alu_node
*n
= rt
.slot(s
);
1578 interf_slots
|= (1 << s
);
1584 for (unsigned i
= 0; i
< 5; ++i
) {
1585 if (interf_slots
& (1 << i
)) {
1586 sblog
<< "!!!!!! interf slot: " << i
<< " : ";
1587 dump::dump_op(rt
.slot(i
));
1596 PSC_DUMP( sblog
<< "ci: discarding slots " << interf_slots
<< "\n"; );
1598 rt
.discard_slots(interf_slots
, alu
.conflict_nodes
);
1599 regmap
= prev_regmap
;
1605 sblog
<< "check_interferences: after: \n";
1612 // add instruction(s) (alu_node or contents of alu_packed_node) to current group
1613 // returns the number of added instructions on success
1614 unsigned post_scheduler::try_add_instruction(node
*n
) {
1616 alu_group_tracker
&rt
= alu
.grp();
1618 unsigned avail_slots
= rt
.avail_slots();
1620 // Cannot schedule in same clause as instructions using this index value
1621 if (!n
->dst
.empty() && n
->dst
[0] &&
1622 (n
->dst
[0] == alu
.current_idx
[0] || n
->dst
[0] == alu
.current_idx
[1])) {
1623 PSC_DUMP(sblog
<< " CF_IDX source: " << *n
->dst
[0] << "\n";);
1627 if (n
->is_alu_packed()) {
1628 alu_packed_node
*p
= static_cast<alu_packed_node
*>(n
);
1629 unsigned slots
= p
->get_slot_mask();
1630 unsigned cnt
= __builtin_popcount(slots
);
1632 if ((slots
& avail_slots
) != slots
) {
1633 PSC_DUMP( sblog
<< " no slots \n"; );
1637 p
->update_packed_items(ctx
);
1639 if (!rt
.try_reserve(p
)) {
1640 PSC_DUMP( sblog
<< " reservation failed \n"; );
1648 alu_node
*a
= static_cast<alu_node
*>(n
);
1649 value
*d
= a
->dst
.empty() ? NULL
: a
->dst
[0];
1651 if (d
&& d
->is_special_reg()) {
1652 assert((a
->bc
.op_ptr
->flags
& AF_MOVA
) || d
->is_geometry_emit());
1656 unsigned allowed_slots
= ctx
.alu_slots_mask(a
->bc
.op_ptr
);
1659 allowed_slots
&= avail_slots
;
1665 slot
= d
->get_final_chan();
1666 a
->bc
.dst_chan
= slot
;
1667 allowed_slots
&= (1 << slot
) | 0x10;
1669 if (a
->bc
.op_ptr
->flags
& AF_MOVA
) {
1670 if (a
->bc
.slot_flags
& AF_V
)
1671 allowed_slots
&= (1 << SLOT_X
);
1673 allowed_slots
&= (1 << SLOT_TRANS
);
1677 // FIXME workaround for some problems with MULADD in trans slot on r700,
1678 // (is it really needed on r600?)
1679 if ((a
->bc
.op
== ALU_OP3_MULADD
|| a
->bc
.op
== ALU_OP3_MULADD_IEEE
) &&
1681 allowed_slots
&= 0x0F;
1684 if (!allowed_slots
) {
1685 PSC_DUMP( sblog
<< " no suitable slots\n"; );
1689 slot
= __builtin_ctz(allowed_slots
);
1692 PSC_DUMP( sblog
<< "slot: " << slot
<< "\n"; );
1694 if (!rt
.try_reserve(a
)) {
1695 PSC_DUMP( sblog
<< " reservation failed\n"; );
1704 bool post_scheduler::check_copy(node
*n
) {
1705 if (!n
->is_copy_mov())
1708 value
*s
= n
->src
[0];
1709 value
*d
= n
->dst
[0];
1711 if (!s
->is_sgpr() || !d
->is_sgpr())
1714 if (!s
->is_prealloc()) {
1717 if (!s
->chunk
|| s
->chunk
!= d
->chunk
)
1721 if (s
->gpr
== d
->gpr
) {
1724 sblog
<< "check_copy: ";
1729 rv_map::iterator F
= regmap
.find(d
->gpr
);
1730 bool gpr_free
= (F
== regmap
.end());
1732 if (d
->is_prealloc()) {
1734 PSC_DUMP( sblog
<< " copy not ready...\n";);
1738 value
*rv
= F
->second
;
1739 if (rv
!= d
&& (!rv
->chunk
|| rv
->chunk
!= d
->chunk
)) {
1740 PSC_DUMP( sblog
<< " copy not ready(2)...\n";);
1744 unmap_dst(static_cast<alu_node
*>(n
));
1747 if (s
->is_prealloc() && !map_src_val(s
))
1750 update_live(n
, NULL
);
1752 release_src_values(n
);
1754 PSC_DUMP( sblog
<< " copy coalesced...\n";);
1760 void post_scheduler::dump_group(alu_group_tracker
&rt
) {
1761 for (unsigned i
= 0; i
< 5; ++i
) {
1762 node
*n
= rt
.slot(i
);
1764 sblog
<< "slot " << i
<< " : ";
1771 void post_scheduler::process_ready_copies() {
1776 last
= ready_copies
.back();
1778 for (node_iterator N
, I
= ready_copies
.begin(), E
= ready_copies
.end();
1784 if (!check_copy(n
)) {
1789 } while (last
!= ready_copies
.back());
1791 update_local_interferences();
1795 bool post_scheduler::prepare_alu_group() {
1797 alu_group_tracker
&rt
= alu
.grp();
1802 sblog
<< "prepare_alu_group: starting...\n";
1806 ready
.append_from(&alu
.conflict_nodes
);
1808 // FIXME rework this loop
1812 process_ready_copies();
1816 for (node_iterator N
, I
= ready
.begin(), E
= ready
.end(); I
!= E
;
1828 unsigned cnt
= try_add_instruction(n
);
1834 sblog
<< "current group:\n";
1838 if (rt
.inst_count() == ctx
.num_slots
) {
1839 PSC_DUMP( sblog
<< " all slots used\n"; );
1844 if (!check_interferences())
1847 // don't try to add more instructions to the group with mova if this
1848 // can lead to breaking clause slot count limit - we don't want mova to
1849 // end up in the end of the new clause instead of beginning of the
1851 if (rt
.has_ar_load() && alu
.total_slots() > 121)
1854 if (rt
.inst_count() && i1
> 50)
1857 regmap
= prev_regmap
;
1862 sblog
<< " prepare_alu_group done, " << rt
.inst_count()
1865 sblog
<< "$$$$$$$$PAG i1=" << i1
1866 << " ready " << ready
.count()
1867 << " pending " << pending
.count()
1868 << " conflicting " << alu
.conflict_nodes
.count()
1873 return rt
.inst_count();
1876 void post_scheduler::release_src_values(node
* n
) {
1877 release_src_vec(n
->src
, true);
1878 release_src_vec(n
->dst
, false);
1881 void post_scheduler::release_op(node
*n
) {
1883 sblog
<< "release_op ";
1890 if (n
->is_copy_mov()) {
1891 ready_copies
.push_back(n
);
1892 } else if (n
->is_mova() || n
->is_pred_set()) {
1893 ready
.push_front(n
);
1899 void post_scheduler::release_src_val(value
*v
) {
1900 node
*d
= v
->any_def();
1907 void post_scheduler::release_src_vec(vvec
& vv
, bool src
) {
1909 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
1911 if (!v
|| v
->is_readonly())
1915 release_src_val(v
->rel
);
1916 release_src_vec(v
->muse
, true);
1924 void literal_tracker::reset() {
1925 memset(lt
, 0, sizeof(lt
));
1926 memset(uc
, 0, sizeof(uc
));
1929 void rp_gpr_tracker::reset() {
1930 memset(rp
, 0, sizeof(rp
));
1931 memset(uc
, 0, sizeof(uc
));
1934 void rp_kcache_tracker::reset() {
1935 memset(rp
, 0, sizeof(rp
));
1936 memset(uc
, 0, sizeof(uc
));
1939 void alu_kcache_tracker::reset() {
1940 memset(kc
, 0, sizeof(kc
));
1944 void alu_clause_tracker::reset() {
1951 alu_clause_tracker::alu_clause_tracker(shader
&sh
)
1952 : sh(sh
), kt(sh
.get_ctx().hw_class
), slot_count(),
1956 current_ar(), current_pr(), current_idx() {}
1958 void alu_clause_tracker::emit_group() {
1960 assert(grp().inst_count());
1962 alu_group_node
*g
= grp().emit();
1964 if (grp().has_update_exec_mask()) {
1965 assert(!push_exec_mask
);
1966 push_exec_mask
= true;
1972 clause
= sh
.create_clause(NST_ALU_CLAUSE
);
1975 clause
->push_front(g
);
1977 slot_count
+= grp().slot_count();
1981 PSC_DUMP( sblog
<< " #### group emitted\n"; );
1984 void alu_clause_tracker::emit_clause(container_node
*c
) {
1987 kt
.init_clause(clause
->bc
);
1989 assert(!current_ar
);
1990 assert(!current_pr
);
1993 clause
->bc
.set_op(CF_OP_ALU_PUSH_BEFORE
);
1995 c
->push_front(clause
);
1998 push_exec_mask
= false;
2002 PSC_DUMP( sblog
<< "######### ALU clause emitted\n"; );
2005 bool alu_clause_tracker::check_clause_limits() {
2007 alu_group_tracker
>
= grp();
2009 unsigned slots
= gt
.slot_count();
2011 // reserving slots to load AR and PR values
2012 unsigned reserve_slots
= (current_ar
? 1 : 0) + (current_pr
? 1 : 0);
2013 // ...and index registers
2014 reserve_slots
+= (current_idx
[0] != NULL
) + (current_idx
[1] != NULL
);
2016 if (slot_count
+ slots
> MAX_ALU_SLOTS
- reserve_slots
)
2019 if (!kt
.try_reserve(gt
))
2025 void alu_clause_tracker::new_group() {
2030 bool alu_clause_tracker::is_empty() {
2031 return clause
== NULL
;
2034 void literal_tracker::init_group_literals(alu_group_node
* g
) {
2036 g
->literals
.clear();
2037 for (unsigned i
= 0; i
< 4; ++i
) {
2041 g
->literals
.push_back(lt
[i
]);
2044 sblog
<< "literal emitted: " << lt
[i
].f
;
2045 sblog
.print_zw_hex(lt
[i
].u
, 8);
2046 sblog
<< " " << lt
[i
].i
<< "\n";
2051 bool alu_kcache_tracker::try_reserve(alu_group_tracker
& gt
) {
2052 rp_kcache_tracker
&kt
= gt
.kcache();
2057 sb_set
<unsigned> group_lines
;
2059 unsigned nl
= kt
.get_lines(group_lines
);
2062 sb_set
<unsigned> clause_lines(lines
);
2063 lines
.add_set(group_lines
);
2065 if (clause_lines
.size() == lines
.size())
2071 lines
= clause_lines
;
2076 unsigned rp_kcache_tracker::get_lines(kc_lines
& lines
) {
2079 for (unsigned i
= 0; i
< sel_count
; ++i
) {
2080 unsigned line
= rp
[i
] & 0x1fffffffu
;
2081 unsigned index_mode
= rp
[i
] >> 29;
2087 line
= (sel_count
== 2) ? line
>> 5 : line
>> 6;
2088 line
|= index_mode
<< 29;
2090 if (lines
.insert(line
).second
)
2096 bool alu_kcache_tracker::update_kc() {
2099 bc_kcache old_kc
[4];
2100 memcpy(old_kc
, kc
, sizeof(kc
));
2102 for (kc_lines::iterator I
= lines
.begin(), E
= lines
.end(); I
!= E
; ++I
) {
2103 unsigned index_mode
= *I
>> 29;
2104 unsigned line
= *I
& 0x1fffffffu
;
2105 unsigned bank
= line
>> 8;
2107 assert(index_mode
<= KC_INDEX_INVALID
);
2110 if (c
&& (bank
== kc
[c
-1].bank
) && (kc
[c
-1].addr
+ 1 == line
) &&
2111 kc
[c
-1].index_mode
== index_mode
)
2113 kc
[c
-1].mode
= KC_LOCK_2
;
2116 memcpy(kc
, old_kc
, sizeof(kc
));
2120 kc
[c
].mode
= KC_LOCK_1
;
2124 kc
[c
].index_mode
= index_mode
;
2131 alu_node
* alu_clause_tracker::create_ar_load(value
*v
, chan_select ar_channel
) {
2132 alu_node
*a
= sh
.create_alu();
2134 if (sh
.get_ctx().uses_mova_gpr
) {
2135 a
->bc
.set_op(ALU_OP1_MOVA_GPR_INT
);
2136 a
->bc
.slot
= SLOT_TRANS
;
2138 a
->bc
.set_op(ALU_OP1_MOVA_INT
);
2139 a
->bc
.slot
= SLOT_X
;
2141 a
->bc
.dst_chan
= ar_channel
;
2142 if (ar_channel
!= SEL_X
&& sh
.get_ctx().is_cayman()) {
2143 a
->bc
.dst_gpr
= ar_channel
== SEL_Y
? CM_V_SQ_MOVA_DST_CF_IDX0
: CM_V_SQ_MOVA_DST_CF_IDX1
;
2147 a
->src
.push_back(v
);
2150 sblog
<< "created AR load: ";
2158 void alu_clause_tracker::discard_current_group() {
2159 PSC_DUMP( sblog
<< "act::discard_current_group\n"; );
2160 grp().discard_all_slots(conflict_nodes
);
2163 void rp_gpr_tracker::dump() {
2164 sblog
<< "=== gpr_tracker dump:\n";
2165 for (int c
= 0; c
< 3; ++c
) {
2166 sblog
<< "cycle " << c
<< " ";
2167 for (int h
= 0; h
< 4; ++h
) {
2168 sblog
<< rp
[c
][h
] << ":" << uc
[c
][h
] << " ";
2174 } // namespace r600_sb