2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
30 #define PSC_DUMP(a) do { a } while (0)
36 #include "sb_shader.h"
39 #include "eg_sq.h" // V_SQ_CF_INDEX_NONE/0/1
43 rp_kcache_tracker::rp_kcache_tracker(shader
&sh
) : rp(), uc(),
44 // FIXME: for now we'll use "two const pairs" limit for r600, same as
45 // for other chips, otherwise additional check in alu_group_tracker is
46 // required to make sure that all 4 consts in the group fit into 2
50 bool rp_kcache_tracker::try_reserve(sel_chan r
) {
51 unsigned sel
= kc_sel(r
);
53 for (unsigned i
= 0; i
< sel_count
; ++i
) {
67 bool rp_kcache_tracker::try_reserve(node
* n
) {
68 bool need_unreserve
= false;
69 vvec::iterator
I(n
->src
.begin()), E(n
->src
.end());
74 if (!try_reserve(v
->select
))
77 need_unreserve
= true;
83 if (need_unreserve
&& I
!= n
->src
.begin()) {
89 } while (I
!= n
->src
.begin());
95 void rp_kcache_tracker::unreserve(node
* n
) {
96 vvec::iterator
I(n
->src
.begin()), E(n
->src
.end());
100 unreserve(v
->select
);
104 void rp_kcache_tracker::unreserve(sel_chan r
) {
105 unsigned sel
= kc_sel(r
);
107 for (unsigned i
= 0; i
< sel_count
; ++i
)
117 bool literal_tracker::try_reserve(alu_node
* n
) {
118 bool need_unreserve
= false;
120 vvec::iterator
I(n
->src
.begin()), E(n
->src
.end());
122 for (; I
!= E
; ++I
) {
124 if (v
->is_literal()) {
125 if (!try_reserve(v
->literal_value
))
128 need_unreserve
= true;
134 if (need_unreserve
&& I
!= n
->src
.begin()) {
139 unreserve(v
->literal_value
);
140 } while (I
!= n
->src
.begin());
145 void literal_tracker::unreserve(alu_node
* n
) {
146 unsigned nsrc
= n
->bc
.op_ptr
->src_count
, i
;
148 for (i
= 0; i
< nsrc
; ++i
) {
149 value
*v
= n
->src
[i
];
151 unreserve(v
->literal_value
);
155 bool literal_tracker::try_reserve(literal l
) {
157 PSC_DUMP( sblog
<< "literal reserve " << l
.u
<< " " << l
.f
<< "\n"; );
159 for (unsigned i
= 0; i
< MAX_ALU_LITERALS
; ++i
) {
163 PSC_DUMP( sblog
<< " reserved new uc = " << uc
[i
] << "\n"; );
165 } else if (lt
[i
] == l
) {
167 PSC_DUMP( sblog
<< " reserved uc = " << uc
[i
] << "\n"; );
171 PSC_DUMP( sblog
<< " failed to reserve literal\n"; );
175 void literal_tracker::unreserve(literal l
) {
177 PSC_DUMP( sblog
<< "literal unreserve " << l
.u
<< " " << l
.f
<< "\n"; );
179 for (unsigned i
= 0; i
< MAX_ALU_LITERALS
; ++i
) {
190 static inline unsigned bs_cycle_vector(unsigned bs
, unsigned src
) {
191 static const unsigned swz
[VEC_NUM
][3] = {
192 {0, 1, 2}, {0, 2, 1}, {1, 2, 0}, {1, 0, 2}, {2, 0, 1}, {2, 1, 0}
194 assert(bs
< VEC_NUM
&& src
< 3);
198 static inline unsigned bs_cycle_scalar(unsigned bs
, unsigned src
) {
199 static const unsigned swz
[SCL_NUM
][3] = {
200 {2, 1, 0}, {1, 2, 2}, {2, 1, 2}, {2, 2, 1}
203 if (bs
>= SCL_NUM
|| src
>= 3) {
204 // this prevents gcc warning "array subscript is above array bounds"
205 // AFAICS we should never hit this path
211 static inline unsigned bs_cycle(bool trans
, unsigned bs
, unsigned src
) {
212 return trans
? bs_cycle_scalar(bs
, src
) : bs_cycle_vector(bs
, src
);
216 bool rp_gpr_tracker::try_reserve(unsigned cycle
, unsigned sel
, unsigned chan
) {
218 if (rp
[cycle
][chan
] == 0) {
219 rp
[cycle
][chan
] = sel
;
222 } else if (rp
[cycle
][chan
] == sel
) {
230 void rp_gpr_tracker::unreserve(alu_node
* n
) {
231 unsigned nsrc
= n
->bc
.op_ptr
->src_count
, i
;
232 unsigned trans
= n
->bc
.slot
== SLOT_TRANS
;
233 unsigned bs
= n
->bc
.bank_swizzle
;
234 unsigned opt
= !trans
235 && n
->bc
.src
[0].sel
== n
->bc
.src
[1].sel
236 && n
->bc
.src
[0].chan
== n
->bc
.src
[1].chan
;
238 for (i
= 0; i
< nsrc
; ++i
) {
239 value
*v
= n
->src
[i
];
240 if (v
->is_readonly() || v
->is_undef())
244 unsigned cycle
= bs_cycle(trans
, bs
, i
);
245 unreserve(cycle
, n
->bc
.src
[i
].sel
, n
->bc
.src
[i
].chan
);
250 void rp_gpr_tracker::unreserve(unsigned cycle
, unsigned sel
, unsigned chan
) {
252 assert(rp
[cycle
][chan
] == sel
&& uc
[cycle
][chan
]);
253 if (--uc
[cycle
][chan
] == 0)
258 bool rp_gpr_tracker::try_reserve(alu_node
* n
) {
259 unsigned nsrc
= n
->bc
.op_ptr
->src_count
, i
;
260 unsigned trans
= n
->bc
.slot
== SLOT_TRANS
;
261 unsigned bs
= n
->bc
.bank_swizzle
;
262 unsigned opt
= !trans
&& nsrc
>= 2 &&
263 n
->src
[0] == n
->src
[1];
265 bool need_unreserve
= false;
266 unsigned const_count
= 0, min_gpr_cycle
= 3;
268 for (i
= 0; i
< nsrc
; ++i
) {
269 value
*v
= n
->src
[i
];
270 if (v
->is_readonly() || v
->is_undef()) {
272 if (trans
&& const_count
== 3)
278 unsigned cycle
= bs_cycle(trans
, bs
, i
);
280 if (trans
&& cycle
< min_gpr_cycle
)
281 min_gpr_cycle
= cycle
;
283 if (const_count
&& cycle
< const_count
&& trans
)
286 if (!try_reserve(cycle
, n
->bc
.src
[i
].sel
, n
->bc
.src
[i
].chan
))
289 need_unreserve
= true;
293 if ((i
== nsrc
) && (min_gpr_cycle
+ 1 > const_count
))
296 if (need_unreserve
&& i
--) {
298 value
*v
= n
->src
[i
];
299 if (!v
->is_readonly() && !v
->is_undef()) {
302 unreserve(bs_cycle(trans
, bs
, i
), n
->bc
.src
[i
].sel
,
310 alu_group_tracker::alu_group_tracker(shader
&sh
)
312 gpr(), lt(), slots(),
313 max_slots(sh
.get_ctx().is_cayman() ? 4 : 5),
314 has_mova(), uses_ar(), has_predset(), has_kill(),
315 updates_exec_mask(), chan_count(), interp_param(), next_id() {
317 available_slots
= sh
.get_ctx().has_trans
? 0x1F : 0x0F;
321 sel_chan
alu_group_tracker::get_value_id(value
* v
) {
322 unsigned &id
= vmap
[v
];
325 return sel_chan(id
, v
->get_final_chan());
329 void alu_group_tracker::assign_slot(unsigned slot
, alu_node
* n
) {
332 available_slots
&= ~(1 << slot
);
334 unsigned param
= n
->interp_param();
337 assert(!interp_param
|| interp_param
== param
);
338 interp_param
= param
;
343 void alu_group_tracker::discard_all_slots(container_node
&removed_nodes
) {
344 PSC_DUMP( sblog
<< "agt::discard_all_slots\n"; );
345 discard_slots(~available_slots
& ((1 << max_slots
) - 1), removed_nodes
);
348 void alu_group_tracker::discard_slots(unsigned slot_mask
,
349 container_node
&removed_nodes
) {
352 sblog
<< "discard_slots : packed_ops : "
353 << (unsigned)packed_ops
.size() << "\n";
356 for (node_vec::iterator N
, I
= packed_ops
.begin();
357 I
!= packed_ops
.end(); I
= N
) {
360 alu_packed_node
*n
= static_cast<alu_packed_node
*>(*I
);
361 unsigned pslots
= n
->get_slot_mask();
364 sblog
<< "discard_slots : packed slot_mask : " << pslots
<< "\n";
367 if (pslots
& slot_mask
) {
370 sblog
<< "discard_slots : discarding packed...\n";
373 removed_nodes
.push_back(n
);
374 slot_mask
&= ~pslots
;
375 N
= packed_ops
.erase(I
);
376 available_slots
|= pslots
;
377 for (unsigned k
= 0; k
< max_slots
; ++k
) {
378 if (pslots
& (1 << k
))
384 for (unsigned slot
= 0; slot
< max_slots
; ++slot
) {
385 unsigned slot_bit
= 1 << slot
;
387 if (slot_mask
& slot_bit
) {
388 assert(!(available_slots
& slot_bit
));
391 assert(!(slots
[slot
]->bc
.slot_flags
& AF_4SLOT
));
394 sblog
<< "discarding slot " << slot
<< " : ";
395 dump::dump_op(slots
[slot
]);
399 removed_nodes
.push_back(slots
[slot
]);
401 available_slots
|= slot_bit
;
405 alu_node
*t
= slots
[4];
406 if (t
&& (t
->bc
.slot_flags
& AF_V
)) {
407 unsigned chan
= t
->bc
.dst_chan
;
412 sblog
<< " from trans slot to free slot " << chan
<< "\n";
424 alu_group_node
* alu_group_tracker::emit() {
426 alu_group_node
*g
= sh
.create_alu_group();
428 lt
.init_group_literals(g
);
430 for (unsigned i
= 0; i
< max_slots
; ++i
) {
431 alu_node
*n
= slots
[i
];
439 bool alu_group_tracker::try_reserve(alu_node
* n
) {
440 unsigned nsrc
= n
->bc
.op_ptr
->src_count
;
441 unsigned slot
= n
->bc
.slot
;
442 bool trans
= slot
== 4;
447 unsigned flags
= n
->bc
.op_ptr
->flags
;
449 unsigned param
= n
->interp_param();
451 if (param
&& interp_param
&& interp_param
!= param
)
454 if ((flags
& AF_KILL
) && has_predset
)
456 if ((flags
& AF_ANY_PRED
) && (has_kill
|| has_predset
))
458 if ((flags
& AF_MOVA
) && (has_mova
|| uses_ar
))
461 if (n
->uses_ar() && has_mova
)
464 for (unsigned i
= 0; i
< nsrc
; ++i
) {
466 unsigned last_id
= next_id
;
468 value
*v
= n
->src
[i
];
469 if (!v
->is_any_gpr() && !v
->is_rel())
471 sel_chan vid
= get_value_id(n
->src
[i
]);
473 if (vid
> last_id
&& chan_count
[vid
.chan()] == 3) {
477 n
->bc
.src
[i
].sel
= vid
.sel();
478 n
->bc
.src
[i
].chan
= vid
.chan();
481 if (!lt
.try_reserve(n
))
484 if (!kc
.try_reserve(n
)) {
489 unsigned fbs
= n
->forced_bank_swizzle();
491 n
->bc
.bank_swizzle
= 0;
494 n
->bc
.bank_swizzle
= VEC_210
;
496 if (gpr
.try_reserve(n
)) {
497 assign_slot(slot
, n
);
502 unsigned swz_num
= trans
? SCL_NUM
: VEC_NUM
;
503 for (unsigned bs
= 0; bs
< swz_num
; ++bs
) {
504 n
->bc
.bank_swizzle
= bs
;
505 if (gpr
.try_reserve(n
)) {
506 assign_slot(slot
, n
);
515 unsigned forced_swz_slots
= 0;
516 int first_slot
= ~0, first_nf
= ~0, last_slot
= ~0;
519 for (unsigned i
= 0; i
< max_slots
; ++i
) {
520 alu_node
*a
= slots
[i
];
522 if (first_slot
== ~0)
525 save_bs
[i
] = a
->bc
.bank_swizzle
;
526 if (a
->forced_bank_swizzle()) {
527 assert(i
!= SLOT_TRANS
);
528 forced_swz_slots
|= (1 << i
);
529 a
->bc
.bank_swizzle
= VEC_210
;
530 if (!gpr
.try_reserve(a
))
531 assert(!"internal reservation error");
536 a
->bc
.bank_swizzle
= 0;
541 if (first_nf
== ~0) {
542 assign_slot(slot
, n
);
546 assert(first_slot
!= ~0 && last_slot
!= ~0);
548 // silence "array subscript is above array bounds" with gcc 4.8
553 alu_node
*a
= slots
[i
];
554 bool backtrack
= false;
559 sblog
<< " bs: trying s" << i
<< " bs:" << a
->bc
.bank_swizzle
560 << " bt:" << backtrack
<< "\n";
563 if (!backtrack
&& gpr
.try_reserve(a
)) {
565 sblog
<< " bs: reserved s" << i
<< " bs:" << a
->bc
.bank_swizzle
569 while ((++i
<= last_slot
) && !slots
[i
]);
575 bool itrans
= i
== SLOT_TRANS
;
576 unsigned max_swz
= itrans
? SCL_221
: VEC_210
;
578 if (a
->bc
.bank_swizzle
< max_swz
) {
579 ++a
->bc
.bank_swizzle
;
582 sblog
<< " bs: inc s" << i
<< " bs:" << a
->bc
.bank_swizzle
588 a
->bc
.bank_swizzle
= 0;
589 while ((--i
>= first_nf
) && !slots
[i
]);
594 sblog
<< " bs: unreserve s" << i
<< " bs:" << a
->bc
.bank_swizzle
606 if (i
== last_slot
+ 1) {
607 assign_slot(slot
, n
);
611 // reservation failed, restore previous state
614 for (unsigned i
= 0; i
< max_slots
; ++i
) {
615 alu_node
*a
= slots
[i
];
617 a
->bc
.bank_swizzle
= save_bs
[i
];
618 bool b
= gpr
.try_reserve(a
);
628 bool alu_group_tracker::try_reserve(alu_packed_node
* p
) {
629 bool need_unreserve
= false;
630 node_iterator
I(p
->begin()), E(p
->end());
632 for (; I
!= E
; ++I
) {
633 alu_node
*n
= static_cast<alu_node
*>(*I
);
637 need_unreserve
= true;
641 packed_ops
.push_back(p
);
645 if (need_unreserve
) {
647 alu_node
*n
= static_cast<alu_node
*>(*I
);
648 slots
[n
->bc
.slot
] = NULL
;
655 void alu_group_tracker::reinit() {
657 memcpy(s
, slots
, sizeof(slots
));
661 for (int i
= max_slots
- 1; i
>= 0; --i
) {
662 if (s
[i
] && !try_reserve(s
[i
])) {
663 sblog
<< "alu_group_tracker: reinit error on slot " << i
<< "\n";
664 for (unsigned i
= 0; i
< max_slots
; ++i
) {
665 sblog
<< " slot " << i
<< " : ";
671 assert(!"alu_group_tracker: reinit error");
676 void alu_group_tracker::reset(bool keep_packed
) {
680 memset(slots
, 0, sizeof(slots
));
687 updates_exec_mask
= false;
688 available_slots
= sh
.get_ctx().has_trans
? 0x1F : 0x0F;
700 void alu_group_tracker::update_flags(alu_node
* n
) {
701 unsigned flags
= n
->bc
.op_ptr
->flags
;
702 has_kill
|= (flags
& AF_KILL
);
703 has_mova
|= (flags
& AF_MOVA
);
704 has_predset
|= (flags
& AF_ANY_PRED
);
705 uses_ar
|= n
->uses_ar();
707 if (flags
& AF_ANY_PRED
) {
708 if (n
->dst
[2] != NULL
)
709 updates_exec_mask
= true;
713 int post_scheduler::run() {
714 return run_on(sh
.root
) ? 0 : 1;
717 bool post_scheduler::run_on(container_node
* n
) {
719 for (node_riterator I
= n
->rbegin(), E
= n
->rend(); I
!= E
; ++I
) {
720 if (I
->is_container()) {
721 if (I
->subtype
== NST_BB
) {
722 bb_node
* bb
= static_cast<bb_node
*>(*I
);
725 r
= run_on(static_cast<container_node
*>(*I
));
734 void post_scheduler::init_uc_val(container_node
*c
, value
*v
) {
735 node
*d
= v
->any_def();
736 if (d
&& d
->parent
== c
)
740 void post_scheduler::init_uc_vec(container_node
*c
, vvec
&vv
, bool src
) {
741 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
743 if (!v
|| v
->is_readonly())
747 init_uc_val(c
, v
->rel
);
748 init_uc_vec(c
, v
->muse
, true);
755 unsigned post_scheduler::init_ucm(container_node
*c
, node
*n
) {
756 init_uc_vec(c
, n
->src
, true);
757 init_uc_vec(c
, n
->dst
, false);
759 uc_map::iterator F
= ucm
.find(n
);
760 return F
== ucm
.end() ? 0 : F
->second
;
763 bool post_scheduler::schedule_bb(bb_node
* bb
) {
765 sblog
<< "scheduling BB " << bb
->id
<< "\n";
766 if (!pending
.empty())
767 dump::dump_op_list(&pending
);
770 assert(pending
.empty());
771 assert(bb_pending
.empty());
772 assert(ready
.empty());
774 bb_pending
.append_from(bb
);
779 while ((n
= bb_pending
.back())) {
782 sblog
<< "post_sched_bb ";
787 // May require emitting ALU ops to load index registers
788 if (n
->is_fetch_clause()) {
790 process_fetch(static_cast<container_node
*>(n
));
794 if (n
->is_alu_clause()) {
796 bool r
= process_alu(static_cast<container_node
*>(n
));
810 void post_scheduler::init_regmap() {
815 sblog
<< "init_regmap: live: ";
816 dump::dump_set(sh
, live
);
820 for (val_set::iterator I
= live
.begin(sh
), E
= live
.end(sh
); I
!= E
; ++I
) {
823 if (!v
->is_sgpr() || !v
->is_prealloc())
829 sblog
<< "init_regmap: " << r
<< " <= ";
839 static alu_node
*create_set_idx(shader
&sh
, unsigned ar_idx
) {
840 alu_node
*a
= sh
.create_alu();
842 assert(ar_idx
== V_SQ_CF_INDEX_0
|| ar_idx
== V_SQ_CF_INDEX_1
);
843 if (ar_idx
== V_SQ_CF_INDEX_0
)
844 a
->bc
.set_op(ALU_OP0_SET_CF_IDX0
);
846 a
->bc
.set_op(ALU_OP0_SET_CF_IDX1
);
848 a
->dst
.resize(1); // Dummy needed for recolor
851 sblog
<< "created IDX load: ";
859 void post_scheduler::load_index_register(value
*v
, unsigned ar_idx
)
863 if (!sh
.get_ctx().is_cayman()) {
864 // Evergreen has to first load address register, then use CF_SET_IDX0/1
865 alu_group_tracker
&rt
= alu
.grp();
866 alu_node
*set_idx
= create_set_idx(sh
, ar_idx
);
867 if (!rt
.try_reserve(set_idx
)) {
868 sblog
<< "can't emit SET_CF_IDX";
869 dump::dump_op(set_idx
);
874 if (!alu
.check_clause_limits()) {
875 // Can't happen since clause only contains MOVA/CF_SET_IDX0/1
880 alu_group_tracker
&rt
= alu
.grp();
881 alu_node
*a
= alu
.create_ar_load(v
, ar_idx
== V_SQ_CF_INDEX_1
? SEL_Z
: SEL_Y
);
883 if (!rt
.try_reserve(a
)) {
884 sblog
<< "can't emit AR load : ";
891 if (!alu
.check_clause_limits()) {
892 // Can't happen since clause only contains MOVA/CF_SET_IDX0/1
896 alu
.emit_clause(cur_bb
);
899 void post_scheduler::process_fetch(container_node
*c
) {
903 for (node_iterator N
, I
= c
->begin(), E
= c
->end(); I
!= E
; I
= N
) {
909 fetch_node
*f
= static_cast<fetch_node
*>(n
);
912 sblog
<< "process_tex ";
917 // TODO: If same values used can avoid reloading index register
918 if (f
->bc
.sampler_index_mode
!= V_SQ_CF_INDEX_NONE
||
919 f
->bc
.resource_index_mode
!= V_SQ_CF_INDEX_NONE
) {
920 unsigned index_mode
= f
->bc
.sampler_index_mode
!= V_SQ_CF_INDEX_NONE
?
921 f
->bc
.sampler_index_mode
: f
->bc
.resource_index_mode
;
923 // Currently require prior opt passes to use one TEX per indexed op
924 assert(f
->parent
->count() == 1);
926 value
*v
= f
->src
.back(); // Last src is index offset
929 cur_bb
->push_front(c
);
931 load_index_register(v
, index_mode
);
932 f
->src
.pop_back(); // Don't need index value any more
938 cur_bb
->push_front(c
);
941 bool post_scheduler::process_alu(container_node
*c
) {
949 live
= c
->live_after
;
951 init_globals(c
->live_after
, true);
952 init_globals(c
->live_before
, true);
956 update_local_interferences();
958 for (node_riterator N
, I
= c
->rbegin(), E
= c
->rend(); I
!= E
; I
= N
) {
963 unsigned uc
= init_ucm(c
, n
);
966 sblog
<< "process_alu uc=" << uc
<< " ";
974 pending
.push_back(n
);
975 PSC_DUMP( sblog
<< "pending\n"; );
981 return schedule_alu(c
);
984 void post_scheduler::update_local_interferences() {
987 sblog
<< "update_local_interferences : ";
988 dump::dump_set(sh
, live
);
993 for (val_set::iterator I
= live
.begin(sh
), E
= live
.end(sh
); I
!= E
; ++I
) {
995 if (v
->is_prealloc())
998 v
->interferences
.add_set(live
);
1002 void post_scheduler::update_live_src_vec(vvec
&vv
, val_set
*born
, bool src
) {
1003 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
1009 if (src
&& v
->is_any_gpr()) {
1010 if (live
.add_val(v
)) {
1011 if (!v
->is_prealloc()) {
1012 if (!cleared_interf
.contains(v
)) {
1014 sblog
<< "clearing interferences for " << *v
<< "\n";
1016 v
->interferences
.clear();
1017 cleared_interf
.add_val(v
);
1023 } else if (v
->is_rel()) {
1024 if (!v
->rel
->is_any_gpr())
1025 live
.add_val(v
->rel
);
1026 update_live_src_vec(v
->muse
, born
, true);
1031 void post_scheduler::update_live_dst_vec(vvec
&vv
) {
1032 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
1038 update_live_dst_vec(v
->mdef
);
1039 } else if (v
->is_any_gpr()) {
1040 if (!live
.remove_val(v
)) {
1042 sblog
<< "failed to remove ";
1044 sblog
<< " from live : ";
1045 dump::dump_set(sh
, live
);
1053 void post_scheduler::update_live(node
*n
, val_set
*born
) {
1054 update_live_dst_vec(n
->dst
);
1055 update_live_src_vec(n
->src
, born
, true);
1056 update_live_src_vec(n
->dst
, born
, false);
1059 void post_scheduler::process_group() {
1060 alu_group_tracker
&rt
= alu
.grp();
1067 sblog
<< "process_group: live_before : ";
1068 dump::dump_set(sh
, live
);
1072 for (unsigned s
= 0; s
< ctx
.num_slots
; ++s
) {
1073 alu_node
*n
= rt
.slot(s
);
1077 update_live(n
, &vals_born
);
1081 sblog
<< "process_group: live_after : ";
1082 dump::dump_set(sh
, live
);
1086 update_local_interferences();
1088 for (unsigned i
= 0; i
< 5; ++i
) {
1089 node
*n
= rt
.slot(i
);
1090 if (n
&& !n
->is_mova()) {
1091 release_src_values(n
);
1096 void post_scheduler::init_globals(val_set
&s
, bool prealloc
) {
1099 sblog
<< "init_globals: ";
1100 dump::dump_set(sh
, s
);
1104 for (val_set::iterator I
= s
.begin(sh
), E
= s
.end(sh
); I
!= E
; ++I
) {
1106 if (v
->is_sgpr() && !v
->is_global()) {
1109 if (prealloc
&& v
->is_fixed()) {
1116 void post_scheduler::emit_index_registers() {
1117 for (unsigned i
= 0; i
< 2; i
++) {
1118 if (alu
.current_idx
[i
]) {
1119 regmap
= prev_regmap
;
1120 alu
.discard_current_group();
1122 load_index_register(alu
.current_idx
[i
], KC_INDEX_0
+ i
);
1123 alu
.current_idx
[i
] = NULL
;
1128 void post_scheduler::emit_clause() {
1130 if (alu
.current_ar
) {
1133 if (!alu
.check_clause_limits()) {
1134 // Can't happen since clause only contains MOVA/CF_SET_IDX0/1
1139 if (!alu
.is_empty()) {
1140 alu
.emit_clause(cur_bb
);
1143 emit_index_registers();
1146 bool post_scheduler::schedule_alu(container_node
*c
) {
1148 assert(!ready
.empty() || !ready_copies
.empty());
1150 bool improving
= true;
1151 int last_pending
= pending
.count();
1153 prev_regmap
= regmap
;
1154 if (!prepare_alu_group()) {
1156 int new_pending
= pending
.count();
1157 improving
= (new_pending
< last_pending
) || (last_pending
== 0);
1158 last_pending
= new_pending
;
1160 if (alu
.current_idx
[0] || alu
.current_idx
[1]) {
1161 regmap
= prev_regmap
;
1163 init_globals(live
, false);
1168 if (alu
.current_ar
) {
1175 if (!alu
.check_clause_limits()) {
1176 regmap
= prev_regmap
;
1178 init_globals(live
, false);
1187 if (!alu
.is_empty()) {
1191 if (!ready
.empty()) {
1192 sblog
<< "##post_scheduler: unscheduled ready instructions :";
1193 dump::dump_op_list(&ready
);
1194 assert(!"unscheduled ready instructions");
1197 if (!pending
.empty()) {
1198 sblog
<< "##post_scheduler: unscheduled pending instructions :";
1199 dump::dump_op_list(&pending
);
1200 assert(!"unscheduled pending instructions");
1205 void post_scheduler::add_interferences(value
*v
, sb_bitset
&rb
, val_set
&vs
) {
1206 unsigned chan
= v
->gpr
.chan();
1208 for (val_set::iterator I
= vs
.begin(sh
), E
= vs
.end(sh
);
1211 sel_chan gpr
= vi
->get_final_gpr();
1213 if (vi
->is_any_gpr() && gpr
&& vi
!= v
&&
1214 (!v
->chunk
|| v
->chunk
!= vi
->chunk
) &&
1215 vi
->is_fixed() && gpr
.chan() == chan
) {
1217 unsigned r
= gpr
.sel();
1220 sblog
<< "\tadd_interferences: " << *vi
<< "\n";
1230 void post_scheduler::set_color_local_val(value
*v
, sel_chan color
) {
1234 sblog
<< " recolored: ";
1240 void post_scheduler::set_color_local(value
*v
, sel_chan color
) {
1242 vvec
&vv
= v
->chunk
->values
;
1243 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
1245 set_color_local_val(v2
, color
);
1249 set_color_local_val(v
, color
);
1254 bool post_scheduler::recolor_local(value
*v
) {
1258 assert(v
->is_sgpr());
1259 assert(!v
->is_prealloc());
1262 unsigned chan
= v
->gpr
.chan();
1265 sblog
<< "recolor_local: ";
1267 sblog
<< " interferences: ";
1268 dump::dump_set(sh
, v
->interferences
);
1271 sblog
<< " in chunk: ";
1272 coalescer::dump_chunk(v
->chunk
);
1278 for (vvec::iterator I
= v
->chunk
->values
.begin(),
1279 E
= v
->chunk
->values
.end(); I
!= E
; ++I
) {
1282 PSC_DUMP( sblog
<< " add_interferences for " << *v2
<< " :\n"; );
1284 add_interferences(v
, rb
, v2
->interferences
);
1287 add_interferences(v
, rb
, v
->interferences
);
1291 unsigned sz
= rb
.size();
1292 sblog
<< "registers bits: " << sz
;
1293 for (unsigned r
= 0; r
< sz
; ++r
) {
1295 sblog
<< "\n " << r
<< " ";
1296 sblog
<< (rb
.get(r
) ? 1 : 0);
1300 bool no_temp_gprs
= v
->is_global();
1301 unsigned rs
, re
, pass
= no_temp_gprs
? 1 : 0;
1306 rs
= sh
.first_temp_gpr();
1310 re
= sh
.num_nontemp_gpr();
1313 for (unsigned reg
= rs
; reg
< re
; ++reg
) {
1314 if (reg
>= rb
.size() || !rb
.get(reg
)) {
1316 set_color_local(v
, sel_chan(reg
, chan
));
1323 assert(!"recolor_local failed");
1327 void post_scheduler::emit_load_ar() {
1329 regmap
= prev_regmap
;
1330 alu
.discard_current_group();
1332 alu_group_tracker
&rt
= alu
.grp();
1333 alu_node
*a
= alu
.create_ar_load(alu
.current_ar
, SEL_X
);
1335 if (!rt
.try_reserve(a
)) {
1336 sblog
<< "can't emit AR load : ";
1344 bool post_scheduler::unmap_dst_val(value
*d
) {
1346 if (d
== alu
.current_ar
) {
1351 if (d
->is_prealloc()) {
1352 sel_chan gpr
= d
->get_final_gpr();
1353 rv_map::iterator F
= regmap
.find(gpr
);
1355 if (F
!= regmap
.end())
1358 if (c
&& c
!=d
&& (!c
->chunk
|| c
->chunk
!= d
->chunk
)) {
1360 sblog
<< "dst value conflict : ";
1362 sblog
<< " regmap contains ";
1366 assert(!"scheduler error");
1375 bool post_scheduler::unmap_dst(alu_node
*n
) {
1376 value
*d
= n
->dst
.empty() ? NULL
: n
->dst
[0];
1382 if (d
&& d
->is_any_reg()) {
1385 if (alu
.current_ar
!= d
) {
1386 sblog
<< "loading wrong ar value\n";
1389 alu
.current_ar
= NULL
;
1392 } else if (d
->is_any_gpr()) {
1393 if (!unmap_dst_val(d
))
1398 for (vvec::iterator I
= d
->mdef
.begin(), E
= d
->mdef
.end();
1404 assert(d
->is_any_gpr());
1406 if (!unmap_dst_val(d
))
1413 bool post_scheduler::map_src_val(value
*v
) {
1415 if (!v
->is_prealloc())
1418 sel_chan gpr
= v
->get_final_gpr();
1419 rv_map::iterator F
= regmap
.find(gpr
);
1421 if (F
!= regmap
.end()) {
1423 if (!v
->v_equal(c
)) {
1425 sblog
<< "can't map src value ";
1427 sblog
<< ", regmap contains ";
1434 regmap
.insert(std::make_pair(gpr
, v
));
1439 bool post_scheduler::map_src_vec(vvec
&vv
, bool src
) {
1441 // Handle possible UBO indexing
1442 bool ubo_indexing
[2] = { false, false };
1443 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
1448 if (v
->is_kcache()) {
1449 unsigned index_mode
= v
->select
.kcache_index_mode();
1450 if (index_mode
== KC_INDEX_0
|| index_mode
== KC_INDEX_1
) {
1451 ubo_indexing
[index_mode
- KC_INDEX_0
] = true;
1456 // idx values stored at end of src vec, see bc_parser::prepare_alu_group
1457 for (unsigned i
= 2; i
!= 0; i
--) {
1458 if (ubo_indexing
[i
-1]) {
1459 // TODO: skip adding value to kcache reservation somehow, causes
1460 // unnecessary group breaks and cache line locks
1461 value
*v
= vv
.back();
1462 if (alu
.current_idx
[i
-1] && alu
.current_idx
[i
-1] != v
) {
1464 sblog
<< "IDX" << i
-1 << " already set to " <<
1465 *alu
.current_idx
[i
-1] << ", trying to set " << *v
<< "\n";
1470 alu
.current_idx
[i
-1] = v
;
1471 PSC_DUMP(sblog
<< "IDX" << i
-1 << " set to " << *v
<< "\n";);
1476 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
1481 if ((!v
->is_any_gpr() || !v
->is_fixed()) && !v
->is_rel())
1485 value
*rel
= v
->rel
;
1488 if (!rel
->is_const()) {
1489 if (!map_src_vec(v
->muse
, true))
1492 if (rel
!= alu
.current_ar
) {
1493 if (alu
.current_ar
) {
1495 sblog
<< " current_AR is " << *alu
.current_ar
1496 << " trying to use " << *rel
<< "\n";
1501 alu
.current_ar
= rel
;
1504 sblog
<< " new current_AR assigned: " << *alu
.current_ar
1511 if (!map_src_val(v
)) {
1519 bool post_scheduler::map_src(alu_node
*n
) {
1520 if (!map_src_vec(n
->dst
, false))
1523 if (!map_src_vec(n
->src
, true))
1529 void post_scheduler::dump_regmap() {
1531 sblog
<< "# REGMAP :\n";
1533 for(rv_map::iterator I
= regmap
.begin(), E
= regmap
.end(); I
!= E
; ++I
) {
1534 sblog
<< " # " << I
->first
<< " => " << *(I
->second
) << "\n";
1538 sblog
<< " current_AR: " << *alu
.current_ar
<< "\n";
1540 sblog
<< " current_PR: " << *alu
.current_pr
<< "\n";
1541 if (alu
.current_idx
[0])
1542 sblog
<< " current IDX0: " << *alu
.current_idx
[0] << "\n";
1543 if (alu
.current_idx
[1])
1544 sblog
<< " current IDX1: " << *alu
.current_idx
[1] << "\n";
1547 void post_scheduler::recolor_locals() {
1548 alu_group_tracker
&rt
= alu
.grp();
1550 for (unsigned s
= 0; s
< ctx
.num_slots
; ++s
) {
1551 alu_node
*n
= rt
.slot(s
);
1553 value
*d
= n
->dst
[0];
1554 if (d
&& d
->is_sgpr() && !d
->is_prealloc()) {
1561 // returns true if there are interferences
1562 bool post_scheduler::check_interferences() {
1564 alu_group_tracker
&rt
= alu
.grp();
1566 unsigned interf_slots
;
1568 bool discarded
= false;
1571 sblog
<< "check_interferences: before: \n";
1579 for (unsigned s
= 0; s
< ctx
.num_slots
; ++s
) {
1580 alu_node
*n
= rt
.slot(s
);
1582 if (!unmap_dst(n
)) {
1588 for (unsigned s
= 0; s
< ctx
.num_slots
; ++s
) {
1589 alu_node
*n
= rt
.slot(s
);
1592 interf_slots
|= (1 << s
);
1598 for (unsigned i
= 0; i
< 5; ++i
) {
1599 if (interf_slots
& (1 << i
)) {
1600 sblog
<< "!!!!!! interf slot: " << i
<< " : ";
1601 dump::dump_op(rt
.slot(i
));
1610 PSC_DUMP( sblog
<< "ci: discarding slots " << interf_slots
<< "\n"; );
1612 rt
.discard_slots(interf_slots
, alu
.conflict_nodes
);
1613 regmap
= prev_regmap
;
1619 sblog
<< "check_interferences: after: \n";
1626 // add instruction(s) (alu_node or contents of alu_packed_node) to current group
1627 // returns the number of added instructions on success
1628 unsigned post_scheduler::try_add_instruction(node
*n
) {
1630 alu_group_tracker
&rt
= alu
.grp();
1632 unsigned avail_slots
= rt
.avail_slots();
1634 // Cannot schedule in same clause as instructions using this index value
1635 if (!n
->dst
.empty() && n
->dst
[0] &&
1636 (n
->dst
[0] == alu
.current_idx
[0] || n
->dst
[0] == alu
.current_idx
[1])) {
1637 PSC_DUMP(sblog
<< " CF_IDX source: " << *n
->dst
[0] << "\n";);
1641 if (n
->is_alu_packed()) {
1642 alu_packed_node
*p
= static_cast<alu_packed_node
*>(n
);
1643 unsigned slots
= p
->get_slot_mask();
1644 unsigned cnt
= __builtin_popcount(slots
);
1646 if ((slots
& avail_slots
) != slots
) {
1647 PSC_DUMP( sblog
<< " no slots \n"; );
1651 p
->update_packed_items(ctx
);
1653 if (!rt
.try_reserve(p
)) {
1654 PSC_DUMP( sblog
<< " reservation failed \n"; );
1662 alu_node
*a
= static_cast<alu_node
*>(n
);
1663 value
*d
= a
->dst
.empty() ? NULL
: a
->dst
[0];
1665 if (d
&& d
->is_special_reg()) {
1666 assert((a
->bc
.op_ptr
->flags
& AF_MOVA
) || d
->is_geometry_emit());
1670 unsigned allowed_slots
= ctx
.alu_slots_mask(a
->bc
.op_ptr
);
1673 allowed_slots
&= avail_slots
;
1679 slot
= d
->get_final_chan();
1680 a
->bc
.dst_chan
= slot
;
1681 allowed_slots
&= (1 << slot
) | 0x10;
1683 if (a
->bc
.op_ptr
->flags
& AF_MOVA
) {
1684 if (a
->bc
.slot_flags
& AF_V
)
1685 allowed_slots
&= (1 << SLOT_X
);
1687 allowed_slots
&= (1 << SLOT_TRANS
);
1691 // FIXME workaround for some problems with MULADD in trans slot on r700,
1692 // (is it really needed on r600?)
1693 if ((a
->bc
.op
== ALU_OP3_MULADD
|| a
->bc
.op
== ALU_OP3_MULADD_IEEE
) &&
1695 allowed_slots
&= 0x0F;
1698 if (!allowed_slots
) {
1699 PSC_DUMP( sblog
<< " no suitable slots\n"; );
1703 slot
= __builtin_ctz(allowed_slots
);
1706 PSC_DUMP( sblog
<< "slot: " << slot
<< "\n"; );
1708 if (!rt
.try_reserve(a
)) {
1709 PSC_DUMP( sblog
<< " reservation failed\n"; );
1718 bool post_scheduler::check_copy(node
*n
) {
1719 if (!n
->is_copy_mov())
1722 value
*s
= n
->src
[0];
1723 value
*d
= n
->dst
[0];
1725 if (!s
->is_sgpr() || !d
->is_sgpr())
1728 if (!s
->is_prealloc()) {
1731 if (!s
->chunk
|| s
->chunk
!= d
->chunk
)
1735 if (s
->gpr
== d
->gpr
) {
1738 sblog
<< "check_copy: ";
1743 rv_map::iterator F
= regmap
.find(d
->gpr
);
1744 bool gpr_free
= (F
== regmap
.end());
1746 if (d
->is_prealloc()) {
1748 PSC_DUMP( sblog
<< " copy not ready...\n";);
1752 value
*rv
= F
->second
;
1753 if (rv
!= d
&& (!rv
->chunk
|| rv
->chunk
!= d
->chunk
)) {
1754 PSC_DUMP( sblog
<< " copy not ready(2)...\n";);
1758 unmap_dst(static_cast<alu_node
*>(n
));
1761 if (s
->is_prealloc() && !map_src_val(s
))
1764 update_live(n
, NULL
);
1766 release_src_values(n
);
1768 PSC_DUMP( sblog
<< " copy coalesced...\n";);
1774 void post_scheduler::dump_group(alu_group_tracker
&rt
) {
1775 for (unsigned i
= 0; i
< 5; ++i
) {
1776 node
*n
= rt
.slot(i
);
1778 sblog
<< "slot " << i
<< " : ";
1785 void post_scheduler::process_ready_copies() {
1790 last
= ready_copies
.back();
1792 for (node_iterator N
, I
= ready_copies
.begin(), E
= ready_copies
.end();
1798 if (!check_copy(n
)) {
1803 } while (last
!= ready_copies
.back());
1805 update_local_interferences();
1809 bool post_scheduler::prepare_alu_group() {
1811 alu_group_tracker
&rt
= alu
.grp();
1816 sblog
<< "prepare_alu_group: starting...\n";
1820 ready
.append_from(&alu
.conflict_nodes
);
1822 // FIXME rework this loop
1826 process_ready_copies();
1830 for (node_iterator N
, I
= ready
.begin(), E
= ready
.end(); I
!= E
;
1842 unsigned cnt
= try_add_instruction(n
);
1848 sblog
<< "current group:\n";
1852 if (rt
.inst_count() == ctx
.num_slots
) {
1853 PSC_DUMP( sblog
<< " all slots used\n"; );
1858 if (!check_interferences())
1861 // don't try to add more instructions to the group with mova if this
1862 // can lead to breaking clause slot count limit - we don't want mova to
1863 // end up in the end of the new clause instead of beginning of the
1865 if (rt
.has_ar_load() && alu
.total_slots() > 121)
1868 if (rt
.inst_count() && i1
> 50)
1871 regmap
= prev_regmap
;
1876 sblog
<< " prepare_alu_group done, " << rt
.inst_count()
1879 sblog
<< "$$$$$$$$PAG i1=" << i1
1880 << " ready " << ready
.count()
1881 << " pending " << pending
.count()
1882 << " conflicting " << alu
.conflict_nodes
.count()
1887 return rt
.inst_count();
1890 void post_scheduler::release_src_values(node
* n
) {
1891 release_src_vec(n
->src
, true);
1892 release_src_vec(n
->dst
, false);
1895 void post_scheduler::release_op(node
*n
) {
1897 sblog
<< "release_op ";
1904 if (n
->is_copy_mov()) {
1905 ready_copies
.push_back(n
);
1906 } else if (n
->is_mova() || n
->is_pred_set()) {
1907 ready
.push_front(n
);
1913 void post_scheduler::release_src_val(value
*v
) {
1914 node
*d
= v
->any_def();
1921 void post_scheduler::release_src_vec(vvec
& vv
, bool src
) {
1923 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
1925 if (!v
|| v
->is_readonly())
1929 release_src_val(v
->rel
);
1930 release_src_vec(v
->muse
, true);
1938 void literal_tracker::reset() {
1939 memset(lt
, 0, sizeof(lt
));
1940 memset(uc
, 0, sizeof(uc
));
1943 void rp_gpr_tracker::reset() {
1944 memset(rp
, 0, sizeof(rp
));
1945 memset(uc
, 0, sizeof(uc
));
1948 void rp_kcache_tracker::reset() {
1949 memset(rp
, 0, sizeof(rp
));
1950 memset(uc
, 0, sizeof(uc
));
1953 void alu_kcache_tracker::reset() {
1954 memset(kc
, 0, sizeof(kc
));
1958 void alu_clause_tracker::reset() {
1965 alu_clause_tracker::alu_clause_tracker(shader
&sh
)
1966 : sh(sh
), kt(sh
.get_ctx().hw_class
), slot_count(),
1970 current_ar(), current_pr(), current_idx() {}
1972 void alu_clause_tracker::emit_group() {
1974 assert(grp().inst_count());
1976 alu_group_node
*g
= grp().emit();
1978 if (grp().has_update_exec_mask()) {
1979 assert(!push_exec_mask
);
1980 push_exec_mask
= true;
1986 clause
= sh
.create_clause(NST_ALU_CLAUSE
);
1989 clause
->push_front(g
);
1991 slot_count
+= grp().slot_count();
1995 PSC_DUMP( sblog
<< " #### group emitted\n"; );
1998 void alu_clause_tracker::emit_clause(container_node
*c
) {
2001 kt
.init_clause(clause
->bc
);
2003 assert(!current_ar
);
2004 assert(!current_pr
);
2007 clause
->bc
.set_op(CF_OP_ALU_PUSH_BEFORE
);
2009 c
->push_front(clause
);
2012 push_exec_mask
= false;
2016 PSC_DUMP( sblog
<< "######### ALU clause emitted\n"; );
2019 bool alu_clause_tracker::check_clause_limits() {
2021 alu_group_tracker
>
= grp();
2023 unsigned slots
= gt
.slot_count();
2025 // reserving slots to load AR and PR values
2026 unsigned reserve_slots
= (current_ar
? 1 : 0) + (current_pr
? 1 : 0);
2027 // ...and index registers
2028 reserve_slots
+= (current_idx
[0] != NULL
) + (current_idx
[1] != NULL
);
2030 if (slot_count
+ slots
> MAX_ALU_SLOTS
- reserve_slots
)
2033 if (!kt
.try_reserve(gt
))
2039 void alu_clause_tracker::new_group() {
2044 bool alu_clause_tracker::is_empty() {
2045 return clause
== NULL
;
2048 void literal_tracker::init_group_literals(alu_group_node
* g
) {
2050 g
->literals
.clear();
2051 for (unsigned i
= 0; i
< 4; ++i
) {
2055 g
->literals
.push_back(lt
[i
]);
2058 sblog
<< "literal emitted: " << lt
[i
].f
;
2059 sblog
.print_zw_hex(lt
[i
].u
, 8);
2060 sblog
<< " " << lt
[i
].i
<< "\n";
2065 bool alu_kcache_tracker::try_reserve(alu_group_tracker
& gt
) {
2066 rp_kcache_tracker
&kt
= gt
.kcache();
2071 sb_set
<unsigned> group_lines
;
2073 unsigned nl
= kt
.get_lines(group_lines
);
2076 sb_set
<unsigned> clause_lines(lines
);
2077 lines
.add_set(group_lines
);
2079 if (clause_lines
.size() == lines
.size())
2085 lines
= clause_lines
;
2090 unsigned rp_kcache_tracker::get_lines(kc_lines
& lines
) {
2093 for (unsigned i
= 0; i
< sel_count
; ++i
) {
2094 unsigned line
= rp
[i
] & 0x1fffffffu
;
2095 unsigned index_mode
= rp
[i
] >> 29;
2101 line
= (sel_count
== 2) ? line
>> 5 : line
>> 6;
2102 line
|= index_mode
<< 29;
2104 if (lines
.insert(line
).second
)
2110 bool alu_kcache_tracker::update_kc() {
2113 bc_kcache old_kc
[4];
2114 memcpy(old_kc
, kc
, sizeof(kc
));
2116 for (kc_lines::iterator I
= lines
.begin(), E
= lines
.end(); I
!= E
; ++I
) {
2117 unsigned index_mode
= *I
>> 29;
2118 unsigned line
= *I
& 0x1fffffffu
;
2119 unsigned bank
= line
>> 8;
2121 assert(index_mode
<= KC_INDEX_INVALID
);
2124 if (c
&& (bank
== kc
[c
-1].bank
) && (kc
[c
-1].addr
+ 1 == line
) &&
2125 kc
[c
-1].index_mode
== index_mode
)
2127 kc
[c
-1].mode
= KC_LOCK_2
;
2130 memcpy(kc
, old_kc
, sizeof(kc
));
2134 kc
[c
].mode
= KC_LOCK_1
;
2138 kc
[c
].index_mode
= index_mode
;
2145 alu_node
* alu_clause_tracker::create_ar_load(value
*v
, chan_select ar_channel
) {
2146 alu_node
*a
= sh
.create_alu();
2148 if (sh
.get_ctx().uses_mova_gpr
) {
2149 a
->bc
.set_op(ALU_OP1_MOVA_GPR_INT
);
2150 a
->bc
.slot
= SLOT_TRANS
;
2152 a
->bc
.set_op(ALU_OP1_MOVA_INT
);
2153 a
->bc
.slot
= SLOT_X
;
2155 a
->bc
.dst_chan
= ar_channel
;
2156 if (ar_channel
!= SEL_X
&& sh
.get_ctx().is_cayman()) {
2157 a
->bc
.dst_gpr
= ar_channel
== SEL_Y
? CM_V_SQ_MOVA_DST_CF_IDX0
: CM_V_SQ_MOVA_DST_CF_IDX1
;
2161 a
->src
.push_back(v
);
2164 sblog
<< "created AR load: ";
2172 void alu_clause_tracker::discard_current_group() {
2173 PSC_DUMP( sblog
<< "act::discard_current_group\n"; );
2174 grp().discard_all_slots(conflict_nodes
);
2177 void rp_gpr_tracker::dump() {
2178 sblog
<< "=== gpr_tracker dump:\n";
2179 for (int c
= 0; c
< 3; ++c
) {
2180 sblog
<< "cycle " << c
<< " ";
2181 for (int h
= 0; h
< 4; ++h
) {
2182 sblog
<< rp
[c
][h
] << ":" << uc
[c
][h
] << " ";
2188 } // namespace r600_sb