2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
30 #define PSC_DUMP(a) do { a } while (0)
36 #include "sb_shader.h"
39 #include "eg_sq.h" // V_SQ_CF_INDEX_NONE/0/1
43 rp_kcache_tracker::rp_kcache_tracker(shader
&sh
) : rp(), uc(),
44 // FIXME: for now we'll use "two const pairs" limit for r600, same as
45 // for other chips, otherwise additional check in alu_group_tracker is
46 // required to make sure that all 4 consts in the group fit into 2
50 bool rp_kcache_tracker::try_reserve(sel_chan r
) {
51 unsigned sel
= kc_sel(r
);
53 for (unsigned i
= 0; i
< sel_count
; ++i
) {
67 bool rp_kcache_tracker::try_reserve(node
* n
) {
68 bool need_unreserve
= false;
69 vvec::iterator
I(n
->src
.begin()), E(n
->src
.end());
74 if (!try_reserve(v
->select
))
77 need_unreserve
= true;
83 if (need_unreserve
&& I
!= n
->src
.begin()) {
89 } while (I
!= n
->src
.begin());
95 void rp_kcache_tracker::unreserve(node
* n
) {
96 vvec::iterator
I(n
->src
.begin()), E(n
->src
.end());
100 unreserve(v
->select
);
104 void rp_kcache_tracker::unreserve(sel_chan r
) {
105 unsigned sel
= kc_sel(r
);
107 for (unsigned i
= 0; i
< sel_count
; ++i
)
117 bool literal_tracker::try_reserve(alu_node
* n
) {
118 bool need_unreserve
= false;
120 vvec::iterator
I(n
->src
.begin()), E(n
->src
.end());
122 for (; I
!= E
; ++I
) {
124 if (v
->is_literal()) {
125 if (!try_reserve(v
->literal_value
))
128 need_unreserve
= true;
134 if (need_unreserve
&& I
!= n
->src
.begin()) {
139 unreserve(v
->literal_value
);
140 } while (I
!= n
->src
.begin());
145 void literal_tracker::unreserve(alu_node
* n
) {
146 unsigned nsrc
= n
->bc
.op_ptr
->src_count
, i
;
148 for (i
= 0; i
< nsrc
; ++i
) {
149 value
*v
= n
->src
[i
];
151 unreserve(v
->literal_value
);
155 bool literal_tracker::try_reserve(literal l
) {
157 PSC_DUMP( sblog
<< "literal reserve " << l
.u
<< " " << l
.f
<< "\n"; );
159 for (unsigned i
= 0; i
< MAX_ALU_LITERALS
; ++i
) {
163 PSC_DUMP( sblog
<< " reserved new uc = " << uc
[i
] << "\n"; );
165 } else if (lt
[i
] == l
) {
167 PSC_DUMP( sblog
<< " reserved uc = " << uc
[i
] << "\n"; );
171 PSC_DUMP( sblog
<< " failed to reserve literal\n"; );
175 void literal_tracker::unreserve(literal l
) {
177 PSC_DUMP( sblog
<< "literal unreserve " << l
.u
<< " " << l
.f
<< "\n"; );
179 for (unsigned i
= 0; i
< MAX_ALU_LITERALS
; ++i
) {
190 static inline unsigned bs_cycle_vector(unsigned bs
, unsigned src
) {
191 static const unsigned swz
[VEC_NUM
][3] = {
192 {0, 1, 2}, {0, 2, 1}, {1, 2, 0}, {1, 0, 2}, {2, 0, 1}, {2, 1, 0}
194 assert(bs
< VEC_NUM
&& src
< 3);
198 static inline unsigned bs_cycle_scalar(unsigned bs
, unsigned src
) {
199 static const unsigned swz
[SCL_NUM
][3] = {
200 {2, 1, 0}, {1, 2, 2}, {2, 1, 2}, {2, 2, 1}
203 if (bs
>= SCL_NUM
|| src
>= 3) {
204 // this prevents gcc warning "array subscript is above array bounds"
205 // AFAICS we should never hit this path
211 static inline unsigned bs_cycle(bool trans
, unsigned bs
, unsigned src
) {
212 return trans
? bs_cycle_scalar(bs
, src
) : bs_cycle_vector(bs
, src
);
216 bool rp_gpr_tracker::try_reserve(unsigned cycle
, unsigned sel
, unsigned chan
) {
218 if (rp
[cycle
][chan
] == 0) {
219 rp
[cycle
][chan
] = sel
;
222 } else if (rp
[cycle
][chan
] == sel
) {
230 void rp_gpr_tracker::unreserve(alu_node
* n
) {
231 unsigned nsrc
= n
->bc
.op_ptr
->src_count
, i
;
232 unsigned trans
= n
->bc
.slot
== SLOT_TRANS
;
233 unsigned bs
= n
->bc
.bank_swizzle
;
234 unsigned opt
= !trans
235 && n
->bc
.src
[0].sel
== n
->bc
.src
[1].sel
236 && n
->bc
.src
[0].chan
== n
->bc
.src
[1].chan
;
238 for (i
= 0; i
< nsrc
; ++i
) {
239 value
*v
= n
->src
[i
];
240 if (v
->is_readonly() || v
->is_undef())
244 unsigned cycle
= bs_cycle(trans
, bs
, i
);
245 unreserve(cycle
, n
->bc
.src
[i
].sel
, n
->bc
.src
[i
].chan
);
250 void rp_gpr_tracker::unreserve(unsigned cycle
, unsigned sel
, unsigned chan
) {
252 assert(rp
[cycle
][chan
] == sel
&& uc
[cycle
][chan
]);
253 if (--uc
[cycle
][chan
] == 0)
258 bool rp_gpr_tracker::try_reserve(alu_node
* n
) {
259 unsigned nsrc
= n
->bc
.op_ptr
->src_count
, i
;
260 unsigned trans
= n
->bc
.slot
== SLOT_TRANS
;
261 unsigned bs
= n
->bc
.bank_swizzle
;
262 unsigned opt
= !trans
&& nsrc
>= 2 &&
263 n
->src
[0] == n
->src
[1];
265 bool need_unreserve
= false;
266 unsigned const_count
= 0, min_gpr_cycle
= 3;
268 for (i
= 0; i
< nsrc
; ++i
) {
269 value
*v
= n
->src
[i
];
270 if (v
->is_readonly() || v
->is_undef()) {
272 if (trans
&& const_count
== 3)
278 unsigned cycle
= bs_cycle(trans
, bs
, i
);
280 if (trans
&& cycle
< min_gpr_cycle
)
281 min_gpr_cycle
= cycle
;
283 if (const_count
&& cycle
< const_count
&& trans
)
286 if (!try_reserve(cycle
, n
->bc
.src
[i
].sel
, n
->bc
.src
[i
].chan
))
289 need_unreserve
= true;
293 if ((i
== nsrc
) && (min_gpr_cycle
+ 1 > const_count
))
296 if (need_unreserve
&& i
--) {
298 value
*v
= n
->src
[i
];
299 if (!v
->is_readonly() && !v
->is_undef()) {
302 unreserve(bs_cycle(trans
, bs
, i
), n
->bc
.src
[i
].sel
,
310 alu_group_tracker::alu_group_tracker(shader
&sh
)
312 gpr(), lt(), slots(),
313 max_slots(sh
.get_ctx().is_cayman() ? 4 : 5),
314 has_mova(), uses_ar(), has_predset(), has_kill(),
315 updates_exec_mask(), chan_count(), interp_param(), next_id() {
317 available_slots
= sh
.get_ctx().has_trans
? 0x1F : 0x0F;
321 sel_chan
alu_group_tracker::get_value_id(value
* v
) {
322 unsigned &id
= vmap
[v
];
325 return sel_chan(id
, v
->get_final_chan());
329 void alu_group_tracker::assign_slot(unsigned slot
, alu_node
* n
) {
332 available_slots
&= ~(1 << slot
);
334 unsigned param
= n
->interp_param();
337 assert(!interp_param
|| interp_param
== param
);
338 interp_param
= param
;
343 void alu_group_tracker::discard_all_slots(container_node
&removed_nodes
) {
344 PSC_DUMP( sblog
<< "agt::discard_all_slots\n"; );
345 discard_slots(~available_slots
& ((1 << max_slots
) - 1), removed_nodes
);
348 void alu_group_tracker::discard_slots(unsigned slot_mask
,
349 container_node
&removed_nodes
) {
352 sblog
<< "discard_slots : packed_ops : "
353 << (unsigned)packed_ops
.size() << "\n";
356 for (node_vec::iterator N
, I
= packed_ops
.begin();
357 I
!= packed_ops
.end(); I
= N
) {
360 alu_packed_node
*n
= static_cast<alu_packed_node
*>(*I
);
361 unsigned pslots
= n
->get_slot_mask();
364 sblog
<< "discard_slots : packed slot_mask : " << pslots
<< "\n";
367 if (pslots
& slot_mask
) {
370 sblog
<< "discard_slots : discarding packed...\n";
373 removed_nodes
.push_back(n
);
374 slot_mask
&= ~pslots
;
375 N
= packed_ops
.erase(I
);
376 available_slots
|= pslots
;
377 for (unsigned k
= 0; k
< max_slots
; ++k
) {
378 if (pslots
& (1 << k
))
384 for (unsigned slot
= 0; slot
< max_slots
; ++slot
) {
385 unsigned slot_bit
= 1 << slot
;
387 if (slot_mask
& slot_bit
) {
388 assert(!(available_slots
& slot_bit
));
391 assert(!(slots
[slot
]->bc
.slot_flags
& AF_4SLOT
));
394 sblog
<< "discarding slot " << slot
<< " : ";
395 dump::dump_op(slots
[slot
]);
399 removed_nodes
.push_back(slots
[slot
]);
401 available_slots
|= slot_bit
;
405 alu_node
*t
= slots
[4];
406 if (t
&& (t
->bc
.slot_flags
& AF_V
)) {
407 unsigned chan
= t
->bc
.dst_chan
;
412 sblog
<< " from trans slot to free slot " << chan
<< "\n";
424 alu_group_node
* alu_group_tracker::emit() {
426 alu_group_node
*g
= sh
.create_alu_group();
428 lt
.init_group_literals(g
);
430 for (unsigned i
= 0; i
< max_slots
; ++i
) {
431 alu_node
*n
= slots
[i
];
439 bool alu_group_tracker::try_reserve(alu_node
* n
) {
440 unsigned nsrc
= n
->bc
.op_ptr
->src_count
;
441 unsigned slot
= n
->bc
.slot
;
442 bool trans
= slot
== 4;
447 unsigned flags
= n
->bc
.op_ptr
->flags
;
449 unsigned param
= n
->interp_param();
451 if (param
&& interp_param
&& interp_param
!= param
)
454 if ((flags
& AF_KILL
) && has_predset
)
456 if ((flags
& AF_ANY_PRED
) && (has_kill
|| has_predset
))
458 if ((flags
& AF_MOVA
) && (has_mova
|| uses_ar
))
461 if (n
->uses_ar() && has_mova
)
464 for (unsigned i
= 0; i
< nsrc
; ++i
) {
466 unsigned last_id
= next_id
;
468 value
*v
= n
->src
[i
];
469 if (!v
->is_any_gpr() && !v
->is_rel())
471 sel_chan vid
= get_value_id(n
->src
[i
]);
473 if (vid
> last_id
&& chan_count
[vid
.chan()] == 3) {
477 n
->bc
.src
[i
].sel
= vid
.sel();
478 n
->bc
.src
[i
].chan
= vid
.chan();
481 if (!lt
.try_reserve(n
))
484 if (!kc
.try_reserve(n
)) {
489 unsigned fbs
= n
->forced_bank_swizzle();
491 n
->bc
.bank_swizzle
= 0;
494 n
->bc
.bank_swizzle
= VEC_210
;
496 if (gpr
.try_reserve(n
)) {
497 assign_slot(slot
, n
);
502 unsigned swz_num
= trans
? SCL_NUM
: VEC_NUM
;
503 for (unsigned bs
= 0; bs
< swz_num
; ++bs
) {
504 n
->bc
.bank_swizzle
= bs
;
505 if (gpr
.try_reserve(n
)) {
506 assign_slot(slot
, n
);
515 unsigned forced_swz_slots
= 0;
516 int first_slot
= ~0, first_nf
= ~0, last_slot
= ~0;
519 for (unsigned i
= 0; i
< max_slots
; ++i
) {
520 alu_node
*a
= slots
[i
];
522 if (first_slot
== ~0)
525 save_bs
[i
] = a
->bc
.bank_swizzle
;
526 if (a
->forced_bank_swizzle()) {
527 assert(i
!= SLOT_TRANS
);
528 forced_swz_slots
|= (1 << i
);
529 a
->bc
.bank_swizzle
= VEC_210
;
530 if (!gpr
.try_reserve(a
))
531 assert(!"internal reservation error");
536 a
->bc
.bank_swizzle
= 0;
541 if (first_nf
== ~0) {
542 assign_slot(slot
, n
);
546 assert(first_slot
!= ~0 && last_slot
!= ~0);
548 // silence "array subscript is above array bounds" with gcc 4.8
553 alu_node
*a
= slots
[i
];
554 bool backtrack
= false;
559 sblog
<< " bs: trying s" << i
<< " bs:" << a
->bc
.bank_swizzle
560 << " bt:" << backtrack
<< "\n";
563 if (!backtrack
&& gpr
.try_reserve(a
)) {
565 sblog
<< " bs: reserved s" << i
<< " bs:" << a
->bc
.bank_swizzle
569 while ((++i
<= last_slot
) && !slots
[i
]);
575 bool itrans
= i
== SLOT_TRANS
;
576 unsigned max_swz
= itrans
? SCL_221
: VEC_210
;
578 if (a
->bc
.bank_swizzle
< max_swz
) {
579 ++a
->bc
.bank_swizzle
;
582 sblog
<< " bs: inc s" << i
<< " bs:" << a
->bc
.bank_swizzle
588 a
->bc
.bank_swizzle
= 0;
589 while ((--i
>= first_nf
) && !slots
[i
]);
594 sblog
<< " bs: unreserve s" << i
<< " bs:" << a
->bc
.bank_swizzle
606 if (i
== last_slot
+ 1) {
607 assign_slot(slot
, n
);
611 // reservation failed, restore previous state
614 for (unsigned i
= 0; i
< max_slots
; ++i
) {
615 alu_node
*a
= slots
[i
];
617 a
->bc
.bank_swizzle
= save_bs
[i
];
618 bool b
= gpr
.try_reserve(a
);
628 bool alu_group_tracker::try_reserve(alu_packed_node
* p
) {
629 bool need_unreserve
= false;
630 node_iterator
I(p
->begin()), E(p
->end());
632 for (; I
!= E
; ++I
) {
633 alu_node
*n
= static_cast<alu_node
*>(*I
);
637 need_unreserve
= true;
641 packed_ops
.push_back(p
);
645 if (need_unreserve
) {
647 alu_node
*n
= static_cast<alu_node
*>(*I
);
648 slots
[n
->bc
.slot
] = NULL
;
655 void alu_group_tracker::reinit() {
657 memcpy(s
, slots
, sizeof(slots
));
661 for (int i
= max_slots
- 1; i
>= 0; --i
) {
662 if (s
[i
] && !try_reserve(s
[i
])) {
663 sblog
<< "alu_group_tracker: reinit error on slot " << i
<< "\n";
664 for (unsigned i
= 0; i
< max_slots
; ++i
) {
665 sblog
<< " slot " << i
<< " : ";
671 assert(!"alu_group_tracker: reinit error");
676 void alu_group_tracker::reset(bool keep_packed
) {
680 memset(slots
, 0, sizeof(slots
));
687 updates_exec_mask
= false;
688 available_slots
= sh
.get_ctx().has_trans
? 0x1F : 0x0F;
700 void alu_group_tracker::update_flags(alu_node
* n
) {
701 unsigned flags
= n
->bc
.op_ptr
->flags
;
702 has_kill
|= (flags
& AF_KILL
);
703 has_mova
|= (flags
& AF_MOVA
);
704 has_predset
|= (flags
& AF_ANY_PRED
);
705 uses_ar
|= n
->uses_ar();
707 if (flags
& AF_ANY_PRED
) {
708 if (n
->dst
[2] != NULL
)
709 updates_exec_mask
= true;
713 int post_scheduler::run() {
714 return run_on(sh
.root
) ? 0 : 1;
717 bool post_scheduler::run_on(container_node
* n
) {
719 for (node_riterator I
= n
->rbegin(), E
= n
->rend(); I
!= E
; ++I
) {
720 if (I
->is_container()) {
721 if (I
->subtype
== NST_BB
) {
722 bb_node
* bb
= static_cast<bb_node
*>(*I
);
725 r
= run_on(static_cast<container_node
*>(*I
));
734 void post_scheduler::init_uc_val(container_node
*c
, value
*v
) {
735 node
*d
= v
->any_def();
736 if (d
&& d
->parent
== c
)
740 void post_scheduler::init_uc_vec(container_node
*c
, vvec
&vv
, bool src
) {
741 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
743 if (!v
|| v
->is_readonly())
747 init_uc_val(c
, v
->rel
);
748 init_uc_vec(c
, v
->muse
, true);
755 unsigned post_scheduler::init_ucm(container_node
*c
, node
*n
) {
756 init_uc_vec(c
, n
->src
, true);
757 init_uc_vec(c
, n
->dst
, false);
759 uc_map::iterator F
= ucm
.find(n
);
760 return F
== ucm
.end() ? 0 : F
->second
;
763 bool post_scheduler::schedule_bb(bb_node
* bb
) {
765 sblog
<< "scheduling BB " << bb
->id
<< "\n";
766 if (!pending
.empty())
767 dump::dump_op_list(&pending
);
770 assert(pending
.empty());
771 assert(bb_pending
.empty());
772 assert(ready
.empty());
774 bb_pending
.append_from(bb
);
779 while ((n
= bb_pending
.back())) {
782 sblog
<< "post_sched_bb ";
787 // May require emitting ALU ops to load index registers
788 if (n
->is_fetch_clause()) {
790 process_fetch(static_cast<container_node
*>(n
));
794 if (n
->is_alu_clause()) {
796 bool r
= process_alu(static_cast<container_node
*>(n
));
810 void post_scheduler::init_regmap() {
815 sblog
<< "init_regmap: live: ";
816 dump::dump_set(sh
, live
);
820 for (val_set::iterator I
= live
.begin(sh
), E
= live
.end(sh
); I
!= E
; ++I
) {
823 if (!v
->is_sgpr() || !v
->is_prealloc())
829 sblog
<< "init_regmap: " << r
<< " <= ";
839 static alu_node
*create_set_idx(shader
&sh
, unsigned ar_idx
) {
840 alu_node
*a
= sh
.create_alu();
842 assert(ar_idx
== V_SQ_CF_INDEX_0
|| ar_idx
== V_SQ_CF_INDEX_1
);
843 if (ar_idx
== V_SQ_CF_INDEX_0
)
844 a
->bc
.set_op(ALU_OP0_SET_CF_IDX0
);
846 a
->bc
.set_op(ALU_OP0_SET_CF_IDX1
);
848 a
->dst
.resize(1); // Dummy needed for recolor
851 sblog
<< "created IDX load: ";
859 void post_scheduler::load_index_register(value
*v
, unsigned ar_idx
)
863 if (!sh
.get_ctx().is_cayman()) {
864 // Evergreen has to first load address register, then use CF_SET_IDX0/1
865 alu_group_tracker
&rt
= alu
.grp();
866 alu_node
*set_idx
= create_set_idx(sh
, ar_idx
);
867 if (!rt
.try_reserve(set_idx
)) {
868 sblog
<< "can't emit SET_CF_IDX";
869 dump::dump_op(set_idx
);
874 if (!alu
.check_clause_limits()) {
875 // Can't happen since clause only contains MOVA/CF_SET_IDX0/1
880 alu_group_tracker
&rt
= alu
.grp();
881 alu_node
*a
= alu
.create_ar_load(v
, ar_idx
== V_SQ_CF_INDEX_1
? SEL_Z
: SEL_Y
);
883 if (!rt
.try_reserve(a
)) {
884 sblog
<< "can't emit AR load : ";
891 if (!alu
.check_clause_limits()) {
892 // Can't happen since clause only contains MOVA/CF_SET_IDX0/1
896 alu
.emit_clause(cur_bb
);
899 void post_scheduler::process_fetch(container_node
*c
) {
903 for (node_iterator N
, I
= c
->begin(), E
= c
->end(); I
!= E
; I
= N
) {
909 fetch_node
*f
= static_cast<fetch_node
*>(n
);
912 sblog
<< "process_tex ";
917 // TODO: If same values used can avoid reloading index register
918 if (f
->bc
.sampler_index_mode
!= V_SQ_CF_INDEX_NONE
||
919 f
->bc
.resource_index_mode
!= V_SQ_CF_INDEX_NONE
) {
920 unsigned index_mode
= f
->bc
.sampler_index_mode
!= V_SQ_CF_INDEX_NONE
?
921 f
->bc
.sampler_index_mode
: f
->bc
.resource_index_mode
;
923 // Currently require prior opt passes to use one TEX per indexed op
924 assert(f
->parent
->count() == 1);
926 value
*v
= f
->src
.back(); // Last src is index offset
929 cur_bb
->push_front(c
);
931 load_index_register(v
, index_mode
);
932 f
->src
.pop_back(); // Don't need index value any more
938 cur_bb
->push_front(c
);
941 bool post_scheduler::process_alu(container_node
*c
) {
949 live
= c
->live_after
;
951 init_globals(c
->live_after
, true);
952 init_globals(c
->live_before
, true);
956 update_local_interferences();
958 for (node_riterator N
, I
= c
->rbegin(), E
= c
->rend(); I
!= E
; I
= N
) {
963 unsigned uc
= init_ucm(c
, n
);
966 sblog
<< "process_alu uc=" << uc
<< " ";
974 pending
.push_back(n
);
975 PSC_DUMP( sblog
<< "pending\n"; );
981 return schedule_alu(c
);
984 void post_scheduler::update_local_interferences() {
987 sblog
<< "update_local_interferences : ";
988 dump::dump_set(sh
, live
);
993 for (val_set::iterator I
= live
.begin(sh
), E
= live
.end(sh
); I
!= E
; ++I
) {
995 if (v
->is_prealloc())
998 v
->interferences
.add_set(live
);
1002 void post_scheduler::update_live_src_vec(vvec
&vv
, val_set
*born
, bool src
) {
1003 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
1009 if (src
&& v
->is_any_gpr()) {
1010 if (live
.add_val(v
)) {
1011 if (!v
->is_prealloc()) {
1012 if (!cleared_interf
.contains(v
)) {
1014 sblog
<< "clearing interferences for " << *v
<< "\n";
1016 v
->interferences
.clear();
1017 cleared_interf
.add_val(v
);
1023 } else if (v
->is_rel()) {
1024 if (!v
->rel
->is_any_gpr())
1025 live
.add_val(v
->rel
);
1026 update_live_src_vec(v
->muse
, born
, true);
1031 void post_scheduler::update_live_dst_vec(vvec
&vv
) {
1032 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
1038 update_live_dst_vec(v
->mdef
);
1039 } else if (v
->is_any_gpr()) {
1040 if (!live
.remove_val(v
)) {
1042 sblog
<< "failed to remove ";
1044 sblog
<< " from live : ";
1045 dump::dump_set(sh
, live
);
1053 void post_scheduler::update_live(node
*n
, val_set
*born
) {
1054 update_live_dst_vec(n
->dst
);
1055 update_live_src_vec(n
->src
, born
, true);
1056 update_live_src_vec(n
->dst
, born
, false);
1059 void post_scheduler::process_group() {
1060 alu_group_tracker
&rt
= alu
.grp();
1067 sblog
<< "process_group: live_before : ";
1068 dump::dump_set(sh
, live
);
1072 for (unsigned s
= 0; s
< ctx
.num_slots
; ++s
) {
1073 alu_node
*n
= rt
.slot(s
);
1077 update_live(n
, &vals_born
);
1081 sblog
<< "process_group: live_after : ";
1082 dump::dump_set(sh
, live
);
1086 update_local_interferences();
1088 for (unsigned i
= 0; i
< 5; ++i
) {
1089 node
*n
= rt
.slot(i
);
1090 if (n
&& !n
->is_mova()) {
1091 release_src_values(n
);
1096 void post_scheduler::init_globals(val_set
&s
, bool prealloc
) {
1099 sblog
<< "init_globals: ";
1100 dump::dump_set(sh
, s
);
1104 for (val_set::iterator I
= s
.begin(sh
), E
= s
.end(sh
); I
!= E
; ++I
) {
1106 if (v
->is_sgpr() && !v
->is_global()) {
1109 if (prealloc
&& v
->is_fixed()) {
1116 void post_scheduler::emit_index_registers() {
1117 for (unsigned i
= 0; i
< 2; i
++) {
1118 if (alu
.current_idx
[i
]) {
1119 regmap
= prev_regmap
;
1120 alu
.discard_current_group();
1122 load_index_register(alu
.current_idx
[i
], KC_INDEX_0
+ i
);
1123 alu
.current_idx
[i
] = NULL
;
1128 void post_scheduler::emit_clause() {
1130 if (alu
.current_ar
) {
1136 if (!alu
.is_empty()) {
1137 alu
.emit_clause(cur_bb
);
1140 emit_index_registers();
1143 bool post_scheduler::schedule_alu(container_node
*c
) {
1145 assert(!ready
.empty() || !ready_copies
.empty());
1147 bool improving
= true;
1148 int last_pending
= pending
.count();
1150 prev_regmap
= regmap
;
1151 if (!prepare_alu_group()) {
1153 int new_pending
= pending
.count();
1154 improving
= (new_pending
< last_pending
) || (last_pending
== 0);
1155 last_pending
= new_pending
;
1157 if (alu
.current_idx
[0] || alu
.current_idx
[1]) {
1158 regmap
= prev_regmap
;
1160 init_globals(live
, false);
1165 if (alu
.current_ar
) {
1172 if (!alu
.check_clause_limits()) {
1173 regmap
= prev_regmap
;
1175 init_globals(live
, false);
1184 if (!alu
.is_empty()) {
1188 if (!ready
.empty()) {
1189 sblog
<< "##post_scheduler: unscheduled ready instructions :";
1190 dump::dump_op_list(&ready
);
1191 assert(!"unscheduled ready instructions");
1194 if (!pending
.empty()) {
1195 sblog
<< "##post_scheduler: unscheduled pending instructions :";
1196 dump::dump_op_list(&pending
);
1197 assert(!"unscheduled pending instructions");
1202 void post_scheduler::add_interferences(value
*v
, sb_bitset
&rb
, val_set
&vs
) {
1203 unsigned chan
= v
->gpr
.chan();
1205 for (val_set::iterator I
= vs
.begin(sh
), E
= vs
.end(sh
);
1208 sel_chan gpr
= vi
->get_final_gpr();
1210 if (vi
->is_any_gpr() && gpr
&& vi
!= v
&&
1211 (!v
->chunk
|| v
->chunk
!= vi
->chunk
) &&
1212 vi
->is_fixed() && gpr
.chan() == chan
) {
1214 unsigned r
= gpr
.sel();
1217 sblog
<< "\tadd_interferences: " << *vi
<< "\n";
1227 void post_scheduler::set_color_local_val(value
*v
, sel_chan color
) {
1231 sblog
<< " recolored: ";
1237 void post_scheduler::set_color_local(value
*v
, sel_chan color
) {
1239 vvec
&vv
= v
->chunk
->values
;
1240 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
1242 set_color_local_val(v2
, color
);
1246 set_color_local_val(v
, color
);
1251 bool post_scheduler::recolor_local(value
*v
) {
1255 assert(v
->is_sgpr());
1256 assert(!v
->is_prealloc());
1259 unsigned chan
= v
->gpr
.chan();
1262 sblog
<< "recolor_local: ";
1264 sblog
<< " interferences: ";
1265 dump::dump_set(sh
, v
->interferences
);
1268 sblog
<< " in chunk: ";
1269 coalescer::dump_chunk(v
->chunk
);
1275 for (vvec::iterator I
= v
->chunk
->values
.begin(),
1276 E
= v
->chunk
->values
.end(); I
!= E
; ++I
) {
1279 PSC_DUMP( sblog
<< " add_interferences for " << *v2
<< " :\n"; );
1281 add_interferences(v
, rb
, v2
->interferences
);
1284 add_interferences(v
, rb
, v
->interferences
);
1288 unsigned sz
= rb
.size();
1289 sblog
<< "registers bits: " << sz
;
1290 for (unsigned r
= 0; r
< sz
; ++r
) {
1292 sblog
<< "\n " << r
<< " ";
1293 sblog
<< (rb
.get(r
) ? 1 : 0);
1297 bool no_temp_gprs
= v
->is_global();
1298 unsigned rs
, re
, pass
= no_temp_gprs
? 1 : 0;
1303 rs
= sh
.first_temp_gpr();
1307 re
= sh
.num_nontemp_gpr();
1310 for (unsigned reg
= rs
; reg
< re
; ++reg
) {
1311 if (reg
>= rb
.size() || !rb
.get(reg
)) {
1313 set_color_local(v
, sel_chan(reg
, chan
));
1320 assert(!"recolor_local failed");
1324 void post_scheduler::emit_load_ar() {
1326 regmap
= prev_regmap
;
1327 alu
.discard_current_group();
1329 alu_group_tracker
&rt
= alu
.grp();
1330 alu_node
*a
= alu
.create_ar_load(alu
.current_ar
, SEL_X
);
1332 if (!rt
.try_reserve(a
)) {
1333 sblog
<< "can't emit AR load : ";
1341 bool post_scheduler::unmap_dst_val(value
*d
) {
1343 if (d
== alu
.current_ar
) {
1348 if (d
->is_prealloc()) {
1349 sel_chan gpr
= d
->get_final_gpr();
1350 rv_map::iterator F
= regmap
.find(gpr
);
1352 if (F
!= regmap
.end())
1355 if (c
&& c
!=d
&& (!c
->chunk
|| c
->chunk
!= d
->chunk
)) {
1357 sblog
<< "dst value conflict : ";
1359 sblog
<< " regmap contains ";
1363 assert(!"scheduler error");
1372 bool post_scheduler::unmap_dst(alu_node
*n
) {
1373 value
*d
= n
->dst
.empty() ? NULL
: n
->dst
[0];
1379 if (d
&& d
->is_any_reg()) {
1382 if (alu
.current_ar
!= d
) {
1383 sblog
<< "loading wrong ar value\n";
1386 alu
.current_ar
= NULL
;
1389 } else if (d
->is_any_gpr()) {
1390 if (!unmap_dst_val(d
))
1395 for (vvec::iterator I
= d
->mdef
.begin(), E
= d
->mdef
.end();
1401 assert(d
->is_any_gpr());
1403 if (!unmap_dst_val(d
))
1410 bool post_scheduler::map_src_val(value
*v
) {
1412 if (!v
->is_prealloc())
1415 sel_chan gpr
= v
->get_final_gpr();
1416 rv_map::iterator F
= regmap
.find(gpr
);
1418 if (F
!= regmap
.end()) {
1420 if (!v
->v_equal(c
)) {
1422 sblog
<< "can't map src value ";
1424 sblog
<< ", regmap contains ";
1431 regmap
.insert(std::make_pair(gpr
, v
));
1436 bool post_scheduler::map_src_vec(vvec
&vv
, bool src
) {
1438 // Handle possible UBO indexing
1439 bool ubo_indexing
[2] = { false, false };
1440 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
1445 if (v
->is_kcache()) {
1446 unsigned index_mode
= v
->select
.kcache_index_mode();
1447 if (index_mode
== KC_INDEX_0
|| index_mode
== KC_INDEX_1
) {
1448 ubo_indexing
[index_mode
- KC_INDEX_0
] = true;
1453 // idx values stored at end of src vec, see bc_parser::prepare_alu_group
1454 for (unsigned i
= 2; i
!= 0; i
--) {
1455 if (ubo_indexing
[i
-1]) {
1456 // TODO: skip adding value to kcache reservation somehow, causes
1457 // unnecessary group breaks and cache line locks
1458 value
*v
= vv
.back();
1459 if (alu
.current_idx
[i
-1] && alu
.current_idx
[i
-1] != v
) {
1461 sblog
<< "IDX" << i
-1 << " already set to " <<
1462 *alu
.current_idx
[i
-1] << ", trying to set " << *v
<< "\n";
1467 alu
.current_idx
[i
-1] = v
;
1468 PSC_DUMP(sblog
<< "IDX" << i
-1 << " set to " << *v
<< "\n";);
1473 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
1478 if ((!v
->is_any_gpr() || !v
->is_fixed()) && !v
->is_rel())
1482 value
*rel
= v
->rel
;
1485 if (!rel
->is_const()) {
1486 if (!map_src_vec(v
->muse
, true))
1489 if (rel
!= alu
.current_ar
) {
1490 if (alu
.current_ar
) {
1492 sblog
<< " current_AR is " << *alu
.current_ar
1493 << " trying to use " << *rel
<< "\n";
1498 alu
.current_ar
= rel
;
1501 sblog
<< " new current_AR assigned: " << *alu
.current_ar
1508 if (!map_src_val(v
)) {
1516 bool post_scheduler::map_src(alu_node
*n
) {
1517 if (!map_src_vec(n
->dst
, false))
1520 if (!map_src_vec(n
->src
, true))
1526 void post_scheduler::dump_regmap() {
1528 sblog
<< "# REGMAP :\n";
1530 for(rv_map::iterator I
= regmap
.begin(), E
= regmap
.end(); I
!= E
; ++I
) {
1531 sblog
<< " # " << I
->first
<< " => " << *(I
->second
) << "\n";
1535 sblog
<< " current_AR: " << *alu
.current_ar
<< "\n";
1537 sblog
<< " current_PR: " << *alu
.current_pr
<< "\n";
1538 if (alu
.current_idx
[0])
1539 sblog
<< " current IDX0: " << *alu
.current_idx
[0] << "\n";
1540 if (alu
.current_idx
[1])
1541 sblog
<< " current IDX1: " << *alu
.current_idx
[1] << "\n";
1544 void post_scheduler::recolor_locals() {
1545 alu_group_tracker
&rt
= alu
.grp();
1547 for (unsigned s
= 0; s
< ctx
.num_slots
; ++s
) {
1548 alu_node
*n
= rt
.slot(s
);
1550 value
*d
= n
->dst
[0];
1551 if (d
&& d
->is_sgpr() && !d
->is_prealloc()) {
1558 // returns true if there are interferences
1559 bool post_scheduler::check_interferences() {
1561 alu_group_tracker
&rt
= alu
.grp();
1563 unsigned interf_slots
;
1565 bool discarded
= false;
1568 sblog
<< "check_interferences: before: \n";
1576 for (unsigned s
= 0; s
< ctx
.num_slots
; ++s
) {
1577 alu_node
*n
= rt
.slot(s
);
1579 if (!unmap_dst(n
)) {
1585 for (unsigned s
= 0; s
< ctx
.num_slots
; ++s
) {
1586 alu_node
*n
= rt
.slot(s
);
1589 interf_slots
|= (1 << s
);
1595 for (unsigned i
= 0; i
< 5; ++i
) {
1596 if (interf_slots
& (1 << i
)) {
1597 sblog
<< "!!!!!! interf slot: " << i
<< " : ";
1598 dump::dump_op(rt
.slot(i
));
1607 PSC_DUMP( sblog
<< "ci: discarding slots " << interf_slots
<< "\n"; );
1609 rt
.discard_slots(interf_slots
, alu
.conflict_nodes
);
1610 regmap
= prev_regmap
;
1616 sblog
<< "check_interferences: after: \n";
1623 // add instruction(s) (alu_node or contents of alu_packed_node) to current group
1624 // returns the number of added instructions on success
1625 unsigned post_scheduler::try_add_instruction(node
*n
) {
1627 alu_group_tracker
&rt
= alu
.grp();
1629 unsigned avail_slots
= rt
.avail_slots();
1631 // Cannot schedule in same clause as instructions using this index value
1632 if (!n
->dst
.empty() && n
->dst
[0] &&
1633 (n
->dst
[0] == alu
.current_idx
[0] || n
->dst
[0] == alu
.current_idx
[1])) {
1634 PSC_DUMP(sblog
<< " CF_IDX source: " << *n
->dst
[0] << "\n";);
1638 if (n
->is_alu_packed()) {
1639 alu_packed_node
*p
= static_cast<alu_packed_node
*>(n
);
1640 unsigned slots
= p
->get_slot_mask();
1641 unsigned cnt
= __builtin_popcount(slots
);
1643 if ((slots
& avail_slots
) != slots
) {
1644 PSC_DUMP( sblog
<< " no slots \n"; );
1648 p
->update_packed_items(ctx
);
1650 if (!rt
.try_reserve(p
)) {
1651 PSC_DUMP( sblog
<< " reservation failed \n"; );
1659 alu_node
*a
= static_cast<alu_node
*>(n
);
1660 value
*d
= a
->dst
.empty() ? NULL
: a
->dst
[0];
1662 if (d
&& d
->is_special_reg()) {
1663 assert((a
->bc
.op_ptr
->flags
& AF_MOVA
) || d
->is_geometry_emit());
1667 unsigned allowed_slots
= ctx
.alu_slots_mask(a
->bc
.op_ptr
);
1670 allowed_slots
&= avail_slots
;
1676 slot
= d
->get_final_chan();
1677 a
->bc
.dst_chan
= slot
;
1678 allowed_slots
&= (1 << slot
) | 0x10;
1680 if (a
->bc
.op_ptr
->flags
& AF_MOVA
) {
1681 if (a
->bc
.slot_flags
& AF_V
)
1682 allowed_slots
&= (1 << SLOT_X
);
1684 allowed_slots
&= (1 << SLOT_TRANS
);
1688 // FIXME workaround for some problems with MULADD in trans slot on r700,
1689 // (is it really needed on r600?)
1690 if ((a
->bc
.op
== ALU_OP3_MULADD
|| a
->bc
.op
== ALU_OP3_MULADD_IEEE
) &&
1692 allowed_slots
&= 0x0F;
1695 if (!allowed_slots
) {
1696 PSC_DUMP( sblog
<< " no suitable slots\n"; );
1700 slot
= __builtin_ctz(allowed_slots
);
1703 PSC_DUMP( sblog
<< "slot: " << slot
<< "\n"; );
1705 if (!rt
.try_reserve(a
)) {
1706 PSC_DUMP( sblog
<< " reservation failed\n"; );
1715 bool post_scheduler::check_copy(node
*n
) {
1716 if (!n
->is_copy_mov())
1719 value
*s
= n
->src
[0];
1720 value
*d
= n
->dst
[0];
1722 if (!s
->is_sgpr() || !d
->is_sgpr())
1725 if (!s
->is_prealloc()) {
1728 if (!s
->chunk
|| s
->chunk
!= d
->chunk
)
1732 if (s
->gpr
== d
->gpr
) {
1735 sblog
<< "check_copy: ";
1740 rv_map::iterator F
= regmap
.find(d
->gpr
);
1741 bool gpr_free
= (F
== regmap
.end());
1743 if (d
->is_prealloc()) {
1745 PSC_DUMP( sblog
<< " copy not ready...\n";);
1749 value
*rv
= F
->second
;
1750 if (rv
!= d
&& (!rv
->chunk
|| rv
->chunk
!= d
->chunk
)) {
1751 PSC_DUMP( sblog
<< " copy not ready(2)...\n";);
1755 unmap_dst(static_cast<alu_node
*>(n
));
1758 if (s
->is_prealloc() && !map_src_val(s
))
1761 update_live(n
, NULL
);
1763 release_src_values(n
);
1765 PSC_DUMP( sblog
<< " copy coalesced...\n";);
1771 void post_scheduler::dump_group(alu_group_tracker
&rt
) {
1772 for (unsigned i
= 0; i
< 5; ++i
) {
1773 node
*n
= rt
.slot(i
);
1775 sblog
<< "slot " << i
<< " : ";
1782 void post_scheduler::process_ready_copies() {
1787 last
= ready_copies
.back();
1789 for (node_iterator N
, I
= ready_copies
.begin(), E
= ready_copies
.end();
1795 if (!check_copy(n
)) {
1800 } while (last
!= ready_copies
.back());
1802 update_local_interferences();
1806 bool post_scheduler::prepare_alu_group() {
1808 alu_group_tracker
&rt
= alu
.grp();
1813 sblog
<< "prepare_alu_group: starting...\n";
1817 ready
.append_from(&alu
.conflict_nodes
);
1819 // FIXME rework this loop
1823 process_ready_copies();
1827 for (node_iterator N
, I
= ready
.begin(), E
= ready
.end(); I
!= E
;
1839 unsigned cnt
= try_add_instruction(n
);
1845 sblog
<< "current group:\n";
1849 if (rt
.inst_count() == ctx
.num_slots
) {
1850 PSC_DUMP( sblog
<< " all slots used\n"; );
1855 if (!check_interferences())
1858 // don't try to add more instructions to the group with mova if this
1859 // can lead to breaking clause slot count limit - we don't want mova to
1860 // end up in the end of the new clause instead of beginning of the
1862 if (rt
.has_ar_load() && alu
.total_slots() > 121)
1865 if (rt
.inst_count() && i1
> 50)
1868 regmap
= prev_regmap
;
1873 sblog
<< " prepare_alu_group done, " << rt
.inst_count()
1876 sblog
<< "$$$$$$$$PAG i1=" << i1
1877 << " ready " << ready
.count()
1878 << " pending " << pending
.count()
1879 << " conflicting " << alu
.conflict_nodes
.count()
1884 return rt
.inst_count();
1887 void post_scheduler::release_src_values(node
* n
) {
1888 release_src_vec(n
->src
, true);
1889 release_src_vec(n
->dst
, false);
1892 void post_scheduler::release_op(node
*n
) {
1894 sblog
<< "release_op ";
1901 if (n
->is_copy_mov()) {
1902 ready_copies
.push_back(n
);
1903 } else if (n
->is_mova() || n
->is_pred_set()) {
1904 ready
.push_front(n
);
1910 void post_scheduler::release_src_val(value
*v
) {
1911 node
*d
= v
->any_def();
1918 void post_scheduler::release_src_vec(vvec
& vv
, bool src
) {
1920 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
1922 if (!v
|| v
->is_readonly())
1926 release_src_val(v
->rel
);
1927 release_src_vec(v
->muse
, true);
1935 void literal_tracker::reset() {
1936 memset(lt
, 0, sizeof(lt
));
1937 memset(uc
, 0, sizeof(uc
));
1940 void rp_gpr_tracker::reset() {
1941 memset(rp
, 0, sizeof(rp
));
1942 memset(uc
, 0, sizeof(uc
));
1945 void rp_kcache_tracker::reset() {
1946 memset(rp
, 0, sizeof(rp
));
1947 memset(uc
, 0, sizeof(uc
));
1950 void alu_kcache_tracker::reset() {
1951 memset(kc
, 0, sizeof(kc
));
1955 void alu_clause_tracker::reset() {
1962 alu_clause_tracker::alu_clause_tracker(shader
&sh
)
1963 : sh(sh
), kt(sh
.get_ctx().hw_class
), slot_count(),
1967 current_ar(), current_pr(), current_idx() {}
1969 void alu_clause_tracker::emit_group() {
1971 assert(grp().inst_count());
1973 alu_group_node
*g
= grp().emit();
1975 if (grp().has_update_exec_mask()) {
1976 assert(!push_exec_mask
);
1977 push_exec_mask
= true;
1983 clause
= sh
.create_clause(NST_ALU_CLAUSE
);
1986 clause
->push_front(g
);
1988 slot_count
+= grp().slot_count();
1992 PSC_DUMP( sblog
<< " #### group emitted\n"; );
1995 void alu_clause_tracker::emit_clause(container_node
*c
) {
1998 kt
.init_clause(clause
->bc
);
2000 assert(!current_ar
);
2001 assert(!current_pr
);
2004 clause
->bc
.set_op(CF_OP_ALU_PUSH_BEFORE
);
2006 c
->push_front(clause
);
2009 push_exec_mask
= false;
2013 PSC_DUMP( sblog
<< "######### ALU clause emitted\n"; );
2016 bool alu_clause_tracker::check_clause_limits() {
2018 alu_group_tracker
>
= grp();
2020 unsigned slots
= gt
.slot_count();
2022 // reserving slots to load AR and PR values
2023 unsigned reserve_slots
= (current_ar
? 1 : 0) + (current_pr
? 1 : 0);
2024 // ...and index registers
2025 reserve_slots
+= (current_idx
[0] != NULL
) + (current_idx
[1] != NULL
);
2027 if (slot_count
+ slots
> MAX_ALU_SLOTS
- reserve_slots
)
2030 if (!kt
.try_reserve(gt
))
2036 void alu_clause_tracker::new_group() {
2041 bool alu_clause_tracker::is_empty() {
2042 return clause
== NULL
;
2045 void literal_tracker::init_group_literals(alu_group_node
* g
) {
2047 g
->literals
.clear();
2048 for (unsigned i
= 0; i
< 4; ++i
) {
2052 g
->literals
.push_back(lt
[i
]);
2055 sblog
<< "literal emitted: " << lt
[i
].f
;
2056 sblog
.print_zw_hex(lt
[i
].u
, 8);
2057 sblog
<< " " << lt
[i
].i
<< "\n";
2062 bool alu_kcache_tracker::try_reserve(alu_group_tracker
& gt
) {
2063 rp_kcache_tracker
&kt
= gt
.kcache();
2068 sb_set
<unsigned> group_lines
;
2070 unsigned nl
= kt
.get_lines(group_lines
);
2073 sb_set
<unsigned> clause_lines(lines
);
2074 lines
.add_set(group_lines
);
2076 if (clause_lines
.size() == lines
.size())
2082 lines
= clause_lines
;
2087 unsigned rp_kcache_tracker::get_lines(kc_lines
& lines
) {
2090 for (unsigned i
= 0; i
< sel_count
; ++i
) {
2091 unsigned line
= rp
[i
] & 0x1fffffffu
;
2092 unsigned index_mode
= rp
[i
] >> 29;
2098 line
= (sel_count
== 2) ? line
>> 5 : line
>> 6;
2099 line
|= index_mode
<< 29;
2101 if (lines
.insert(line
).second
)
2107 bool alu_kcache_tracker::update_kc() {
2110 bc_kcache old_kc
[4];
2111 memcpy(old_kc
, kc
, sizeof(kc
));
2113 for (kc_lines::iterator I
= lines
.begin(), E
= lines
.end(); I
!= E
; ++I
) {
2114 unsigned index_mode
= *I
>> 29;
2115 unsigned line
= *I
& 0x1fffffffu
;
2116 unsigned bank
= line
>> 8;
2118 assert(index_mode
<= KC_INDEX_INVALID
);
2121 if (c
&& (bank
== kc
[c
-1].bank
) && (kc
[c
-1].addr
+ 1 == line
) &&
2122 kc
[c
-1].index_mode
== index_mode
)
2124 kc
[c
-1].mode
= KC_LOCK_2
;
2127 memcpy(kc
, old_kc
, sizeof(kc
));
2131 kc
[c
].mode
= KC_LOCK_1
;
2135 kc
[c
].index_mode
= index_mode
;
2142 alu_node
* alu_clause_tracker::create_ar_load(value
*v
, chan_select ar_channel
) {
2143 alu_node
*a
= sh
.create_alu();
2145 if (sh
.get_ctx().uses_mova_gpr
) {
2146 a
->bc
.set_op(ALU_OP1_MOVA_GPR_INT
);
2147 a
->bc
.slot
= SLOT_TRANS
;
2149 a
->bc
.set_op(ALU_OP1_MOVA_INT
);
2150 a
->bc
.slot
= SLOT_X
;
2152 a
->bc
.dst_chan
= ar_channel
;
2153 if (ar_channel
!= SEL_X
&& sh
.get_ctx().is_cayman()) {
2154 a
->bc
.dst_gpr
= ar_channel
== SEL_Y
? CM_V_SQ_MOVA_DST_CF_IDX0
: CM_V_SQ_MOVA_DST_CF_IDX1
;
2158 a
->src
.push_back(v
);
2161 sblog
<< "created AR load: ";
2169 void alu_clause_tracker::discard_current_group() {
2170 PSC_DUMP( sblog
<< "act::discard_current_group\n"; );
2171 grp().discard_all_slots(conflict_nodes
);
2174 void rp_gpr_tracker::dump() {
2175 sblog
<< "=== gpr_tracker dump:\n";
2176 for (int c
= 0; c
< 3; ++c
) {
2177 sblog
<< "cycle " << c
<< " ";
2178 for (int h
= 0; h
< 4; ++h
) {
2179 sblog
<< rp
[c
][h
] << ":" << uc
[c
][h
] << " ";
2185 } // namespace r600_sb