2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
30 #define PSC_DUMP(a) do { a } while (0)
36 #include "sb_shader.h"
39 #include "eg_sq.h" // V_SQ_CF_INDEX_NONE/0/1
43 rp_kcache_tracker::rp_kcache_tracker(shader
&sh
) : rp(), uc(),
44 // FIXME: for now we'll use "two const pairs" limit for r600, same as
45 // for other chips, otherwise additional check in alu_group_tracker is
46 // required to make sure that all 4 consts in the group fit into 2
50 bool rp_kcache_tracker::try_reserve(sel_chan r
) {
51 unsigned sel
= kc_sel(r
);
53 for (unsigned i
= 0; i
< sel_count
; ++i
) {
67 bool rp_kcache_tracker::try_reserve(node
* n
) {
68 bool need_unreserve
= false;
69 vvec::iterator
I(n
->src
.begin()), E(n
->src
.end());
74 if (!try_reserve(v
->select
))
77 need_unreserve
= true;
83 if (need_unreserve
&& I
!= n
->src
.begin()) {
89 } while (I
!= n
->src
.begin());
95 void rp_kcache_tracker::unreserve(node
* n
) {
96 vvec::iterator
I(n
->src
.begin()), E(n
->src
.end());
100 unreserve(v
->select
);
104 void rp_kcache_tracker::unreserve(sel_chan r
) {
105 unsigned sel
= kc_sel(r
);
107 for (unsigned i
= 0; i
< sel_count
; ++i
)
117 bool literal_tracker::try_reserve(alu_node
* n
) {
118 bool need_unreserve
= false;
120 vvec::iterator
I(n
->src
.begin()), E(n
->src
.end());
122 for (; I
!= E
; ++I
) {
124 if (v
->is_literal()) {
125 if (!try_reserve(v
->literal_value
))
128 need_unreserve
= true;
134 if (need_unreserve
&& I
!= n
->src
.begin()) {
139 unreserve(v
->literal_value
);
140 } while (I
!= n
->src
.begin());
145 void literal_tracker::unreserve(alu_node
* n
) {
146 unsigned nsrc
= n
->bc
.op_ptr
->src_count
, i
;
148 for (i
= 0; i
< nsrc
; ++i
) {
149 value
*v
= n
->src
[i
];
151 unreserve(v
->literal_value
);
155 bool literal_tracker::try_reserve(literal l
) {
157 PSC_DUMP( sblog
<< "literal reserve " << l
.u
<< " " << l
.f
<< "\n"; );
159 for (unsigned i
= 0; i
< MAX_ALU_LITERALS
; ++i
) {
163 PSC_DUMP( sblog
<< " reserved new uc = " << uc
[i
] << "\n"; );
165 } else if (lt
[i
] == l
) {
167 PSC_DUMP( sblog
<< " reserved uc = " << uc
[i
] << "\n"; );
171 PSC_DUMP( sblog
<< " failed to reserve literal\n"; );
175 void literal_tracker::unreserve(literal l
) {
177 PSC_DUMP( sblog
<< "literal unreserve " << l
.u
<< " " << l
.f
<< "\n"; );
179 for (unsigned i
= 0; i
< MAX_ALU_LITERALS
; ++i
) {
190 static inline unsigned bs_cycle_vector(unsigned bs
, unsigned src
) {
191 static const unsigned swz
[VEC_NUM
][3] = {
192 {0, 1, 2}, {0, 2, 1}, {1, 2, 0}, {1, 0, 2}, {2, 0, 1}, {2, 1, 0}
194 assert(bs
< VEC_NUM
&& src
< 3);
198 static inline unsigned bs_cycle_scalar(unsigned bs
, unsigned src
) {
199 static const unsigned swz
[SCL_NUM
][3] = {
200 {2, 1, 0}, {1, 2, 2}, {2, 1, 2}, {2, 2, 1}
203 if (bs
>= SCL_NUM
|| src
>= 3) {
204 // this prevents gcc warning "array subscript is above array bounds"
205 // AFAICS we should never hit this path
211 static inline unsigned bs_cycle(bool trans
, unsigned bs
, unsigned src
) {
212 return trans
? bs_cycle_scalar(bs
, src
) : bs_cycle_vector(bs
, src
);
216 bool rp_gpr_tracker::try_reserve(unsigned cycle
, unsigned sel
, unsigned chan
) {
218 if (rp
[cycle
][chan
] == 0) {
219 rp
[cycle
][chan
] = sel
;
222 } else if (rp
[cycle
][chan
] == sel
) {
230 void rp_gpr_tracker::unreserve(alu_node
* n
) {
231 unsigned nsrc
= n
->bc
.op_ptr
->src_count
, i
;
232 unsigned trans
= n
->bc
.slot
== SLOT_TRANS
;
233 unsigned bs
= n
->bc
.bank_swizzle
;
234 unsigned opt
= !trans
235 && n
->bc
.src
[0].sel
== n
->bc
.src
[1].sel
236 && n
->bc
.src
[0].chan
== n
->bc
.src
[1].chan
;
238 for (i
= 0; i
< nsrc
; ++i
) {
239 value
*v
= n
->src
[i
];
240 if (v
->is_readonly() || v
->is_undef())
244 unsigned cycle
= bs_cycle(trans
, bs
, i
);
245 unreserve(cycle
, n
->bc
.src
[i
].sel
, n
->bc
.src
[i
].chan
);
250 void rp_gpr_tracker::unreserve(unsigned cycle
, unsigned sel
, unsigned chan
) {
252 assert(rp
[cycle
][chan
] == sel
&& uc
[cycle
][chan
]);
253 if (--uc
[cycle
][chan
] == 0)
258 bool rp_gpr_tracker::try_reserve(alu_node
* n
) {
259 unsigned nsrc
= n
->bc
.op_ptr
->src_count
, i
;
260 unsigned trans
= n
->bc
.slot
== SLOT_TRANS
;
261 unsigned bs
= n
->bc
.bank_swizzle
;
262 unsigned opt
= !trans
&& nsrc
>= 2 &&
263 n
->src
[0] == n
->src
[1];
265 bool need_unreserve
= false;
266 unsigned const_count
= 0, min_gpr_cycle
= 3;
268 for (i
= 0; i
< nsrc
; ++i
) {
269 value
*v
= n
->src
[i
];
270 if (v
->is_readonly() || v
->is_undef()) {
272 if (trans
&& const_count
== 3)
278 unsigned cycle
= bs_cycle(trans
, bs
, i
);
280 if (trans
&& cycle
< min_gpr_cycle
)
281 min_gpr_cycle
= cycle
;
283 if (const_count
&& cycle
< const_count
&& trans
)
286 if (!try_reserve(cycle
, n
->bc
.src
[i
].sel
, n
->bc
.src
[i
].chan
))
289 need_unreserve
= true;
293 if ((i
== nsrc
) && (min_gpr_cycle
+ 1 > const_count
))
296 if (need_unreserve
&& i
--) {
298 value
*v
= n
->src
[i
];
299 if (!v
->is_readonly() && !v
->is_undef()) {
302 unreserve(bs_cycle(trans
, bs
, i
), n
->bc
.src
[i
].sel
,
310 alu_group_tracker::alu_group_tracker(shader
&sh
)
312 gpr(), lt(), slots(),
313 max_slots(sh
.get_ctx().is_cayman() ? 4 : 5),
314 has_mova(), uses_ar(), has_predset(), has_kill(),
315 updates_exec_mask(), consumes_lds_oqa(), produces_lds_oqa(), chan_count(), interp_param(), next_id() {
317 available_slots
= sh
.get_ctx().has_trans
? 0x1F : 0x0F;
321 sel_chan
alu_group_tracker::get_value_id(value
* v
) {
322 unsigned &id
= vmap
[v
];
325 return sel_chan(id
, v
->get_final_chan());
329 void alu_group_tracker::assign_slot(unsigned slot
, alu_node
* n
) {
332 available_slots
&= ~(1 << slot
);
334 unsigned param
= n
->interp_param();
337 assert(!interp_param
|| interp_param
== param
);
338 interp_param
= param
;
343 void alu_group_tracker::discard_all_slots(container_node
&removed_nodes
) {
344 PSC_DUMP( sblog
<< "agt::discard_all_slots\n"; );
345 discard_slots(~available_slots
& ((1 << max_slots
) - 1), removed_nodes
);
348 void alu_group_tracker::discard_slots(unsigned slot_mask
,
349 container_node
&removed_nodes
) {
352 sblog
<< "discard_slots : packed_ops : "
353 << (unsigned)packed_ops
.size() << "\n";
356 for (node_vec::iterator N
, I
= packed_ops
.begin();
357 I
!= packed_ops
.end(); I
= N
) {
360 alu_packed_node
*n
= static_cast<alu_packed_node
*>(*I
);
361 unsigned pslots
= n
->get_slot_mask();
364 sblog
<< "discard_slots : packed slot_mask : " << pslots
<< "\n";
367 if (pslots
& slot_mask
) {
370 sblog
<< "discard_slots : discarding packed...\n";
373 removed_nodes
.push_back(n
);
374 slot_mask
&= ~pslots
;
375 N
= packed_ops
.erase(I
);
376 available_slots
|= pslots
;
377 for (unsigned k
= 0; k
< max_slots
; ++k
) {
378 if (pslots
& (1 << k
))
384 for (unsigned slot
= 0; slot
< max_slots
; ++slot
) {
385 unsigned slot_bit
= 1 << slot
;
387 if (slot_mask
& slot_bit
) {
388 assert(!(available_slots
& slot_bit
));
391 assert(!(slots
[slot
]->bc
.slot_flags
& AF_4SLOT
));
394 sblog
<< "discarding slot " << slot
<< " : ";
395 dump::dump_op(slots
[slot
]);
399 removed_nodes
.push_back(slots
[slot
]);
401 available_slots
|= slot_bit
;
405 alu_node
*t
= slots
[4];
406 if (t
&& (t
->bc
.slot_flags
& AF_V
)) {
407 unsigned chan
= t
->bc
.dst_chan
;
412 sblog
<< " from trans slot to free slot " << chan
<< "\n";
424 alu_group_node
* alu_group_tracker::emit() {
426 alu_group_node
*g
= sh
.create_alu_group();
428 lt
.init_group_literals(g
);
430 for (unsigned i
= 0; i
< max_slots
; ++i
) {
431 alu_node
*n
= slots
[i
];
439 bool alu_group_tracker::try_reserve(alu_node
* n
) {
440 unsigned nsrc
= n
->bc
.op_ptr
->src_count
;
441 unsigned slot
= n
->bc
.slot
;
442 bool trans
= slot
== 4;
447 unsigned flags
= n
->bc
.op_ptr
->flags
;
449 unsigned param
= n
->interp_param();
451 if (param
&& interp_param
&& interp_param
!= param
)
454 if ((flags
& AF_KILL
) && has_predset
)
456 if ((flags
& AF_ANY_PRED
) && (has_kill
|| has_predset
))
458 if ((flags
& AF_MOVA
) && (has_mova
|| uses_ar
))
461 if (n
->uses_ar() && has_mova
)
464 if (consumes_lds_oqa
)
466 if (n
->consumes_lds_oq() && available_slots
!= (sh
.get_ctx().has_trans
? 0x1F : 0x0F))
468 for (unsigned i
= 0; i
< nsrc
; ++i
) {
470 unsigned last_id
= next_id
;
472 value
*v
= n
->src
[i
];
473 if (!v
->is_any_gpr() && !v
->is_rel())
475 sel_chan vid
= get_value_id(n
->src
[i
]);
477 if (vid
> last_id
&& chan_count
[vid
.chan()] == 3) {
481 n
->bc
.src
[i
].sel
= vid
.sel();
482 n
->bc
.src
[i
].chan
= vid
.chan();
485 if (!lt
.try_reserve(n
))
488 if (!kc
.try_reserve(n
)) {
493 unsigned fbs
= n
->forced_bank_swizzle();
495 n
->bc
.bank_swizzle
= 0;
498 n
->bc
.bank_swizzle
= VEC_210
;
500 if (gpr
.try_reserve(n
)) {
501 assign_slot(slot
, n
);
506 unsigned swz_num
= trans
? SCL_NUM
: VEC_NUM
;
507 for (unsigned bs
= 0; bs
< swz_num
; ++bs
) {
508 n
->bc
.bank_swizzle
= bs
;
509 if (gpr
.try_reserve(n
)) {
510 assign_slot(slot
, n
);
519 unsigned forced_swz_slots
= 0;
520 int first_slot
= ~0, first_nf
= ~0, last_slot
= ~0;
523 for (unsigned i
= 0; i
< max_slots
; ++i
) {
524 alu_node
*a
= slots
[i
];
526 if (first_slot
== ~0)
529 save_bs
[i
] = a
->bc
.bank_swizzle
;
530 if (a
->forced_bank_swizzle()) {
531 assert(i
!= SLOT_TRANS
);
532 forced_swz_slots
|= (1 << i
);
533 a
->bc
.bank_swizzle
= VEC_210
;
534 if (!gpr
.try_reserve(a
))
535 assert(!"internal reservation error");
540 a
->bc
.bank_swizzle
= 0;
545 if (first_nf
== ~0) {
546 assign_slot(slot
, n
);
550 assert(first_slot
!= ~0 && last_slot
!= ~0);
552 // silence "array subscript is above array bounds" with gcc 4.8
557 alu_node
*a
= slots
[i
];
558 bool backtrack
= false;
563 sblog
<< " bs: trying s" << i
<< " bs:" << a
->bc
.bank_swizzle
564 << " bt:" << backtrack
<< "\n";
567 if (!backtrack
&& gpr
.try_reserve(a
)) {
569 sblog
<< " bs: reserved s" << i
<< " bs:" << a
->bc
.bank_swizzle
573 while ((++i
<= last_slot
) && !slots
[i
]);
579 bool itrans
= i
== SLOT_TRANS
;
580 unsigned max_swz
= itrans
? SCL_221
: VEC_210
;
582 if (a
->bc
.bank_swizzle
< max_swz
) {
583 ++a
->bc
.bank_swizzle
;
586 sblog
<< " bs: inc s" << i
<< " bs:" << a
->bc
.bank_swizzle
592 a
->bc
.bank_swizzle
= 0;
593 while ((--i
>= first_nf
) && !slots
[i
]);
598 sblog
<< " bs: unreserve s" << i
<< " bs:" << a
->bc
.bank_swizzle
610 if (i
== last_slot
+ 1) {
611 assign_slot(slot
, n
);
615 // reservation failed, restore previous state
618 for (unsigned i
= 0; i
< max_slots
; ++i
) {
619 alu_node
*a
= slots
[i
];
621 a
->bc
.bank_swizzle
= save_bs
[i
];
622 bool b
= gpr
.try_reserve(a
);
632 bool alu_group_tracker::try_reserve(alu_packed_node
* p
) {
633 bool need_unreserve
= false;
634 node_iterator
I(p
->begin()), E(p
->end());
636 for (; I
!= E
; ++I
) {
637 alu_node
*n
= static_cast<alu_node
*>(*I
);
641 need_unreserve
= true;
645 packed_ops
.push_back(p
);
649 if (need_unreserve
) {
651 alu_node
*n
= static_cast<alu_node
*>(*I
);
652 slots
[n
->bc
.slot
] = NULL
;
659 void alu_group_tracker::reinit() {
661 memcpy(s
, slots
, sizeof(slots
));
665 for (int i
= max_slots
- 1; i
>= 0; --i
) {
666 if (s
[i
] && !try_reserve(s
[i
])) {
667 sblog
<< "alu_group_tracker: reinit error on slot " << i
<< "\n";
668 for (unsigned i
= 0; i
< max_slots
; ++i
) {
669 sblog
<< " slot " << i
<< " : ";
675 assert(!"alu_group_tracker: reinit error");
680 void alu_group_tracker::reset(bool keep_packed
) {
684 memset(slots
, 0, sizeof(slots
));
687 produces_lds_oqa
= 0;
688 consumes_lds_oqa
= 0;
693 updates_exec_mask
= false;
694 available_slots
= sh
.get_ctx().has_trans
? 0x1F : 0x0F;
706 void alu_group_tracker::update_flags(alu_node
* n
) {
707 unsigned flags
= n
->bc
.op_ptr
->flags
;
708 has_kill
|= (flags
& AF_KILL
);
709 has_mova
|= (flags
& AF_MOVA
);
710 has_predset
|= (flags
& AF_ANY_PRED
);
711 uses_ar
|= n
->uses_ar();
712 consumes_lds_oqa
|= n
->consumes_lds_oq();
713 produces_lds_oqa
|= n
->produces_lds_oq();
714 if (flags
& AF_ANY_PRED
) {
715 if (n
->dst
[2] != NULL
)
716 updates_exec_mask
= true;
720 int post_scheduler::run() {
721 return run_on(sh
.root
) ? 0 : 1;
724 bool post_scheduler::run_on(container_node
* n
) {
726 for (node_riterator I
= n
->rbegin(), E
= n
->rend(); I
!= E
; ++I
) {
727 if (I
->is_container()) {
728 if (I
->subtype
== NST_BB
) {
729 bb_node
* bb
= static_cast<bb_node
*>(*I
);
732 r
= run_on(static_cast<container_node
*>(*I
));
741 void post_scheduler::init_uc_val(container_node
*c
, value
*v
) {
742 node
*d
= v
->any_def();
743 if (d
&& d
->parent
== c
)
747 void post_scheduler::init_uc_vec(container_node
*c
, vvec
&vv
, bool src
) {
748 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
750 if (!v
|| v
->is_readonly())
754 init_uc_val(c
, v
->rel
);
755 init_uc_vec(c
, v
->muse
, true);
762 unsigned post_scheduler::init_ucm(container_node
*c
, node
*n
) {
763 init_uc_vec(c
, n
->src
, true);
764 init_uc_vec(c
, n
->dst
, false);
766 uc_map::iterator F
= ucm
.find(n
);
767 return F
== ucm
.end() ? 0 : F
->second
;
770 bool post_scheduler::schedule_bb(bb_node
* bb
) {
772 sblog
<< "scheduling BB " << bb
->id
<< "\n";
773 if (!pending
.empty())
774 dump::dump_op_list(&pending
);
777 assert(pending
.empty());
778 assert(bb_pending
.empty());
779 assert(ready
.empty());
781 bb_pending
.append_from(bb
);
786 while ((n
= bb_pending
.back())) {
789 sblog
<< "post_sched_bb ";
794 // May require emitting ALU ops to load index registers
795 if (n
->is_fetch_clause()) {
797 process_fetch(static_cast<container_node
*>(n
));
801 if (n
->is_alu_clause()) {
803 bool r
= process_alu(static_cast<container_node
*>(n
));
817 void post_scheduler::init_regmap() {
822 sblog
<< "init_regmap: live: ";
823 dump::dump_set(sh
, live
);
827 for (val_set::iterator I
= live
.begin(sh
), E
= live
.end(sh
); I
!= E
; ++I
) {
830 if (!v
->is_sgpr() || !v
->is_prealloc())
836 sblog
<< "init_regmap: " << r
<< " <= ";
846 static alu_node
*create_set_idx(shader
&sh
, unsigned ar_idx
) {
847 alu_node
*a
= sh
.create_alu();
849 assert(ar_idx
== V_SQ_CF_INDEX_0
|| ar_idx
== V_SQ_CF_INDEX_1
);
850 if (ar_idx
== V_SQ_CF_INDEX_0
)
851 a
->bc
.set_op(ALU_OP0_SET_CF_IDX0
);
853 a
->bc
.set_op(ALU_OP0_SET_CF_IDX1
);
855 a
->dst
.resize(1); // Dummy needed for recolor
858 sblog
<< "created IDX load: ";
866 void post_scheduler::load_index_register(value
*v
, unsigned ar_idx
)
870 if (!sh
.get_ctx().is_cayman()) {
871 // Evergreen has to first load address register, then use CF_SET_IDX0/1
872 alu_group_tracker
&rt
= alu
.grp();
873 alu_node
*set_idx
= create_set_idx(sh
, ar_idx
);
874 if (!rt
.try_reserve(set_idx
)) {
875 sblog
<< "can't emit SET_CF_IDX";
876 dump::dump_op(set_idx
);
881 if (!alu
.check_clause_limits()) {
882 // Can't happen since clause only contains MOVA/CF_SET_IDX0/1
887 alu_group_tracker
&rt
= alu
.grp();
888 alu_node
*a
= alu
.create_ar_load(v
, ar_idx
== V_SQ_CF_INDEX_1
? SEL_Z
: SEL_Y
);
890 if (!rt
.try_reserve(a
)) {
891 sblog
<< "can't emit AR load : ";
898 if (!alu
.check_clause_limits()) {
899 // Can't happen since clause only contains MOVA/CF_SET_IDX0/1
903 alu
.emit_clause(cur_bb
);
906 void post_scheduler::process_fetch(container_node
*c
) {
910 for (node_iterator N
, I
= c
->begin(), E
= c
->end(); I
!= E
; I
= N
) {
916 fetch_node
*f
= static_cast<fetch_node
*>(n
);
919 sblog
<< "process_tex ";
924 // TODO: If same values used can avoid reloading index register
925 if (f
->bc
.sampler_index_mode
!= V_SQ_CF_INDEX_NONE
||
926 f
->bc
.resource_index_mode
!= V_SQ_CF_INDEX_NONE
) {
927 unsigned index_mode
= f
->bc
.sampler_index_mode
!= V_SQ_CF_INDEX_NONE
?
928 f
->bc
.sampler_index_mode
: f
->bc
.resource_index_mode
;
930 // Currently require prior opt passes to use one TEX per indexed op
931 assert(f
->parent
->count() == 1);
933 value
*v
= f
->src
.back(); // Last src is index offset
936 cur_bb
->push_front(c
);
938 load_index_register(v
, index_mode
);
939 f
->src
.pop_back(); // Don't need index value any more
945 cur_bb
->push_front(c
);
948 bool post_scheduler::process_alu(container_node
*c
) {
956 live
= c
->live_after
;
958 init_globals(c
->live_after
, true);
959 init_globals(c
->live_before
, true);
963 update_local_interferences();
965 for (node_riterator N
, I
= c
->rbegin(), E
= c
->rend(); I
!= E
; I
= N
) {
970 unsigned uc
= init_ucm(c
, n
);
973 sblog
<< "process_alu uc=" << uc
<< " ";
981 pending
.push_back(n
);
982 PSC_DUMP( sblog
<< "pending\n"; );
988 return schedule_alu(c
);
991 void post_scheduler::update_local_interferences() {
994 sblog
<< "update_local_interferences : ";
995 dump::dump_set(sh
, live
);
1000 for (val_set::iterator I
= live
.begin(sh
), E
= live
.end(sh
); I
!= E
; ++I
) {
1002 if (v
->is_prealloc())
1005 v
->interferences
.add_set(live
);
1009 void post_scheduler::update_live_src_vec(vvec
&vv
, val_set
*born
, bool src
) {
1010 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
1016 if (src
&& v
->is_any_gpr()) {
1017 if (live
.add_val(v
)) {
1018 if (!v
->is_prealloc()) {
1019 if (!cleared_interf
.contains(v
)) {
1021 sblog
<< "clearing interferences for " << *v
<< "\n";
1023 v
->interferences
.clear();
1024 cleared_interf
.add_val(v
);
1030 } else if (v
->is_rel()) {
1031 if (!v
->rel
->is_any_gpr())
1032 live
.add_val(v
->rel
);
1033 update_live_src_vec(v
->muse
, born
, true);
1038 void post_scheduler::update_live_dst_vec(vvec
&vv
) {
1039 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
1045 update_live_dst_vec(v
->mdef
);
1046 } else if (v
->is_any_gpr()) {
1047 if (!live
.remove_val(v
)) {
1049 sblog
<< "failed to remove ";
1051 sblog
<< " from live : ";
1052 dump::dump_set(sh
, live
);
1060 void post_scheduler::update_live(node
*n
, val_set
*born
) {
1061 update_live_dst_vec(n
->dst
);
1062 update_live_src_vec(n
->src
, born
, true);
1063 update_live_src_vec(n
->dst
, born
, false);
1066 void post_scheduler::process_group() {
1067 alu_group_tracker
&rt
= alu
.grp();
1074 sblog
<< "process_group: live_before : ";
1075 dump::dump_set(sh
, live
);
1079 for (unsigned s
= 0; s
< ctx
.num_slots
; ++s
) {
1080 alu_node
*n
= rt
.slot(s
);
1084 update_live(n
, &vals_born
);
1088 sblog
<< "process_group: live_after : ";
1089 dump::dump_set(sh
, live
);
1093 update_local_interferences();
1095 for (unsigned i
= 0; i
< 5; ++i
) {
1096 node
*n
= rt
.slot(i
);
1097 if (n
&& !n
->is_mova()) {
1098 release_src_values(n
);
1103 void post_scheduler::init_globals(val_set
&s
, bool prealloc
) {
1106 sblog
<< "init_globals: ";
1107 dump::dump_set(sh
, s
);
1111 for (val_set::iterator I
= s
.begin(sh
), E
= s
.end(sh
); I
!= E
; ++I
) {
1113 if (v
->is_sgpr() && !v
->is_global()) {
1116 if (prealloc
&& v
->is_fixed()) {
1123 void post_scheduler::emit_index_registers() {
1124 for (unsigned i
= 0; i
< 2; i
++) {
1125 if (alu
.current_idx
[i
]) {
1126 regmap
= prev_regmap
;
1127 alu
.discard_current_group();
1129 load_index_register(alu
.current_idx
[i
], KC_INDEX_0
+ i
);
1130 alu
.current_idx
[i
] = NULL
;
1135 void post_scheduler::emit_clause() {
1137 if (alu
.current_ar
) {
1140 if (!alu
.check_clause_limits()) {
1141 // Can't happen since clause only contains MOVA/CF_SET_IDX0/1
1146 if (!alu
.is_empty()) {
1147 alu
.emit_clause(cur_bb
);
1150 emit_index_registers();
1153 bool post_scheduler::schedule_alu(container_node
*c
) {
1155 assert(!ready
.empty() || !ready_copies
.empty());
1157 /* This number is rather arbitrary, important is that the scheduler has
1158 * more than one try to create an instruction group
1161 int last_pending
= pending
.count();
1162 while (improving
> 0) {
1163 prev_regmap
= regmap
;
1164 if (!prepare_alu_group()) {
1166 int new_pending
= pending
.count();
1167 if ((new_pending
< last_pending
) || (last_pending
== 0))
1172 last_pending
= new_pending
;
1174 if (alu
.current_idx
[0] || alu
.current_idx
[1]) {
1175 regmap
= prev_regmap
;
1177 init_globals(live
, false);
1182 if (alu
.current_ar
) {
1189 if (!alu
.check_clause_limits()) {
1190 regmap
= prev_regmap
;
1192 init_globals(live
, false);
1201 if (!alu
.is_empty()) {
1205 if (!ready
.empty()) {
1206 sblog
<< "##post_scheduler: unscheduled ready instructions :";
1207 dump::dump_op_list(&ready
);
1208 assert(!"unscheduled ready instructions");
1211 if (!pending
.empty()) {
1212 sblog
<< "##post_scheduler: unscheduled pending instructions :";
1213 dump::dump_op_list(&pending
);
1214 assert(!"unscheduled pending instructions");
1219 void post_scheduler::add_interferences(value
*v
, sb_bitset
&rb
, val_set
&vs
) {
1220 unsigned chan
= v
->gpr
.chan();
1222 for (val_set::iterator I
= vs
.begin(sh
), E
= vs
.end(sh
);
1225 sel_chan gpr
= vi
->get_final_gpr();
1227 if (vi
->is_any_gpr() && gpr
&& vi
!= v
&&
1228 (!v
->chunk
|| v
->chunk
!= vi
->chunk
) &&
1229 vi
->is_fixed() && gpr
.chan() == chan
) {
1231 unsigned r
= gpr
.sel();
1234 sblog
<< "\tadd_interferences: " << *vi
<< "\n";
1244 void post_scheduler::set_color_local_val(value
*v
, sel_chan color
) {
1248 sblog
<< " recolored: ";
1254 void post_scheduler::set_color_local(value
*v
, sel_chan color
) {
1256 vvec
&vv
= v
->chunk
->values
;
1257 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
1259 set_color_local_val(v2
, color
);
1263 set_color_local_val(v
, color
);
1268 bool post_scheduler::recolor_local(value
*v
) {
1272 assert(v
->is_sgpr());
1273 assert(!v
->is_prealloc());
1276 unsigned chan
= v
->gpr
.chan();
1279 sblog
<< "recolor_local: ";
1281 sblog
<< " interferences: ";
1282 dump::dump_set(sh
, v
->interferences
);
1285 sblog
<< " in chunk: ";
1286 coalescer::dump_chunk(v
->chunk
);
1292 for (vvec::iterator I
= v
->chunk
->values
.begin(),
1293 E
= v
->chunk
->values
.end(); I
!= E
; ++I
) {
1296 PSC_DUMP( sblog
<< " add_interferences for " << *v2
<< " :\n"; );
1298 add_interferences(v
, rb
, v2
->interferences
);
1301 add_interferences(v
, rb
, v
->interferences
);
1305 unsigned sz
= rb
.size();
1306 sblog
<< "registers bits: " << sz
;
1307 for (unsigned r
= 0; r
< sz
; ++r
) {
1309 sblog
<< "\n " << r
<< " ";
1310 sblog
<< (rb
.get(r
) ? 1 : 0);
1314 bool no_temp_gprs
= v
->is_global();
1315 unsigned rs
, re
, pass
= no_temp_gprs
? 1 : 0;
1320 rs
= sh
.first_temp_gpr();
1324 re
= sh
.num_nontemp_gpr();
1327 for (unsigned reg
= rs
; reg
< re
; ++reg
) {
1328 if (reg
>= rb
.size() || !rb
.get(reg
)) {
1330 set_color_local(v
, sel_chan(reg
, chan
));
1337 assert(!"recolor_local failed");
1341 void post_scheduler::emit_load_ar() {
1343 regmap
= prev_regmap
;
1344 alu
.discard_current_group();
1346 alu_group_tracker
&rt
= alu
.grp();
1347 alu_node
*a
= alu
.create_ar_load(alu
.current_ar
, SEL_X
);
1349 if (!rt
.try_reserve(a
)) {
1350 sblog
<< "can't emit AR load : ";
1358 bool post_scheduler::unmap_dst_val(value
*d
) {
1360 if (d
== alu
.current_ar
) {
1365 if (d
->is_prealloc()) {
1366 sel_chan gpr
= d
->get_final_gpr();
1367 rv_map::iterator F
= regmap
.find(gpr
);
1369 if (F
!= regmap
.end())
1372 if (c
&& c
!=d
&& (!c
->chunk
|| c
->chunk
!= d
->chunk
)) {
1374 sblog
<< "dst value conflict : ";
1376 sblog
<< " regmap contains ";
1380 assert(!"scheduler error");
1389 bool post_scheduler::unmap_dst(alu_node
*n
) {
1390 value
*d
= n
->dst
.empty() ? NULL
: n
->dst
[0];
1396 if (d
&& d
->is_any_reg()) {
1399 if (alu
.current_ar
!= d
) {
1400 sblog
<< "loading wrong ar value\n";
1403 alu
.current_ar
= NULL
;
1406 } else if (d
->is_any_gpr()) {
1407 if (!unmap_dst_val(d
))
1412 for (vvec::iterator I
= d
->mdef
.begin(), E
= d
->mdef
.end();
1418 assert(d
->is_any_gpr());
1420 if (!unmap_dst_val(d
))
1427 bool post_scheduler::map_src_val(value
*v
) {
1429 if (!v
->is_prealloc())
1432 sel_chan gpr
= v
->get_final_gpr();
1433 rv_map::iterator F
= regmap
.find(gpr
);
1435 if (F
!= regmap
.end()) {
1437 if (!v
->v_equal(c
)) {
1439 sblog
<< "can't map src value ";
1441 sblog
<< ", regmap contains ";
1448 regmap
.insert(std::make_pair(gpr
, v
));
1453 bool post_scheduler::map_src_vec(vvec
&vv
, bool src
) {
1455 // Handle possible UBO indexing
1456 bool ubo_indexing
[2] = { false, false };
1457 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
1462 if (v
->is_kcache()) {
1463 unsigned index_mode
= v
->select
.kcache_index_mode();
1464 if (index_mode
== KC_INDEX_0
|| index_mode
== KC_INDEX_1
) {
1465 ubo_indexing
[index_mode
- KC_INDEX_0
] = true;
1470 // idx values stored at end of src vec, see bc_parser::prepare_alu_group
1471 for (unsigned i
= 2; i
!= 0; i
--) {
1472 if (ubo_indexing
[i
-1]) {
1473 // TODO: skip adding value to kcache reservation somehow, causes
1474 // unnecessary group breaks and cache line locks
1475 value
*v
= vv
.back();
1476 if (alu
.current_idx
[i
-1] && alu
.current_idx
[i
-1] != v
) {
1478 sblog
<< "IDX" << i
-1 << " already set to " <<
1479 *alu
.current_idx
[i
-1] << ", trying to set " << *v
<< "\n";
1484 alu
.current_idx
[i
-1] = v
;
1485 PSC_DUMP(sblog
<< "IDX" << i
-1 << " set to " << *v
<< "\n";);
1490 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
1495 if ((!v
->is_any_gpr() || !v
->is_fixed()) && !v
->is_rel())
1499 value
*rel
= v
->rel
;
1502 if (!rel
->is_const()) {
1503 if (!map_src_vec(v
->muse
, true))
1506 if (rel
!= alu
.current_ar
) {
1507 if (alu
.current_ar
) {
1509 sblog
<< " current_AR is " << *alu
.current_ar
1510 << " trying to use " << *rel
<< "\n";
1515 alu
.current_ar
= rel
;
1518 sblog
<< " new current_AR assigned: " << *alu
.current_ar
1525 if (!map_src_val(v
)) {
1533 bool post_scheduler::map_src(alu_node
*n
) {
1534 if (!map_src_vec(n
->dst
, false))
1537 if (!map_src_vec(n
->src
, true))
1543 void post_scheduler::dump_regmap() {
1545 sblog
<< "# REGMAP :\n";
1547 for(rv_map::iterator I
= regmap
.begin(), E
= regmap
.end(); I
!= E
; ++I
) {
1548 sblog
<< " # " << I
->first
<< " => " << *(I
->second
) << "\n";
1552 sblog
<< " current_AR: " << *alu
.current_ar
<< "\n";
1554 sblog
<< " current_PR: " << *alu
.current_pr
<< "\n";
1555 if (alu
.current_idx
[0])
1556 sblog
<< " current IDX0: " << *alu
.current_idx
[0] << "\n";
1557 if (alu
.current_idx
[1])
1558 sblog
<< " current IDX1: " << *alu
.current_idx
[1] << "\n";
1561 void post_scheduler::recolor_locals() {
1562 alu_group_tracker
&rt
= alu
.grp();
1564 for (unsigned s
= 0; s
< ctx
.num_slots
; ++s
) {
1565 alu_node
*n
= rt
.slot(s
);
1567 value
*d
= n
->dst
[0];
1568 if (d
&& d
->is_sgpr() && !d
->is_prealloc()) {
1575 // returns true if there are interferences
1576 bool post_scheduler::check_interferences() {
1578 alu_group_tracker
&rt
= alu
.grp();
1580 unsigned interf_slots
;
1582 bool discarded
= false;
1585 sblog
<< "check_interferences: before: \n";
1593 for (unsigned s
= 0; s
< ctx
.num_slots
; ++s
) {
1594 alu_node
*n
= rt
.slot(s
);
1596 if (!unmap_dst(n
)) {
1602 for (unsigned s
= 0; s
< ctx
.num_slots
; ++s
) {
1603 alu_node
*n
= rt
.slot(s
);
1606 interf_slots
|= (1 << s
);
1612 for (unsigned i
= 0; i
< 5; ++i
) {
1613 if (interf_slots
& (1 << i
)) {
1614 sblog
<< "!!!!!! interf slot: " << i
<< " : ";
1615 dump::dump_op(rt
.slot(i
));
1624 PSC_DUMP( sblog
<< "ci: discarding slots " << interf_slots
<< "\n"; );
1626 rt
.discard_slots(interf_slots
, alu
.conflict_nodes
);
1627 regmap
= prev_regmap
;
1633 sblog
<< "check_interferences: after: \n";
1640 // add instruction(s) (alu_node or contents of alu_packed_node) to current group
1641 // returns the number of added instructions on success
1642 unsigned post_scheduler::try_add_instruction(node
*n
) {
1644 alu_group_tracker
&rt
= alu
.grp();
1646 unsigned avail_slots
= rt
.avail_slots();
1648 // Cannot schedule in same clause as instructions using this index value
1649 if (!n
->dst
.empty() && n
->dst
[0] &&
1650 (n
->dst
[0] == alu
.current_idx
[0] || n
->dst
[0] == alu
.current_idx
[1])) {
1651 PSC_DUMP(sblog
<< " CF_IDX source: " << *n
->dst
[0] << "\n";);
1655 if (n
->is_alu_packed()) {
1656 alu_packed_node
*p
= static_cast<alu_packed_node
*>(n
);
1657 unsigned slots
= p
->get_slot_mask();
1658 unsigned cnt
= __builtin_popcount(slots
);
1660 if ((slots
& avail_slots
) != slots
) {
1661 PSC_DUMP( sblog
<< " no slots \n"; );
1665 p
->update_packed_items(ctx
);
1667 if (!rt
.try_reserve(p
)) {
1668 PSC_DUMP( sblog
<< " reservation failed \n"; );
1676 alu_node
*a
= static_cast<alu_node
*>(n
);
1677 value
*d
= a
->dst
.empty() ? NULL
: a
->dst
[0];
1679 if (d
&& d
->is_special_reg()) {
1680 assert((a
->bc
.op_ptr
->flags
& AF_MOVA
) || d
->is_geometry_emit() || d
->is_lds_oq() || d
->is_lds_access() || d
->is_scratch());
1684 unsigned allowed_slots
= ctx
.alu_slots_mask(a
->bc
.op_ptr
);
1687 allowed_slots
&= avail_slots
;
1693 slot
= d
->get_final_chan();
1694 a
->bc
.dst_chan
= slot
;
1695 allowed_slots
&= (1 << slot
) | 0x10;
1697 if (a
->bc
.op_ptr
->flags
& AF_MOVA
) {
1698 if (a
->bc
.slot_flags
& AF_V
)
1699 allowed_slots
&= (1 << SLOT_X
);
1701 allowed_slots
&= (1 << SLOT_TRANS
);
1705 // FIXME workaround for some problems with MULADD in trans slot on r700,
1706 // (is it really needed on r600?)
1707 if ((a
->bc
.op
== ALU_OP3_MULADD
|| a
->bc
.op
== ALU_OP3_MULADD_IEEE
) &&
1709 allowed_slots
&= 0x0F;
1712 if (!allowed_slots
) {
1713 PSC_DUMP( sblog
<< " no suitable slots\n"; );
1717 slot
= __builtin_ctz(allowed_slots
);
1720 PSC_DUMP( sblog
<< "slot: " << slot
<< "\n"; );
1722 if (!rt
.try_reserve(a
)) {
1723 PSC_DUMP( sblog
<< " reservation failed\n"; );
1732 bool post_scheduler::check_copy(node
*n
) {
1733 if (!n
->is_copy_mov())
1736 value
*s
= n
->src
[0];
1737 value
*d
= n
->dst
[0];
1739 if (!s
->is_sgpr() || !d
->is_sgpr())
1742 if (!s
->is_prealloc()) {
1745 if (!s
->chunk
|| s
->chunk
!= d
->chunk
)
1749 if (s
->gpr
== d
->gpr
) {
1752 sblog
<< "check_copy: ";
1757 rv_map::iterator F
= regmap
.find(d
->gpr
);
1758 bool gpr_free
= (F
== regmap
.end());
1760 if (d
->is_prealloc()) {
1762 PSC_DUMP( sblog
<< " copy not ready...\n";);
1766 value
*rv
= F
->second
;
1767 if (rv
!= d
&& (!rv
->chunk
|| rv
->chunk
!= d
->chunk
)) {
1768 PSC_DUMP( sblog
<< " copy not ready(2)...\n";);
1772 unmap_dst(static_cast<alu_node
*>(n
));
1775 if (s
->is_prealloc() && !map_src_val(s
))
1778 update_live(n
, NULL
);
1780 release_src_values(n
);
1782 PSC_DUMP( sblog
<< " copy coalesced...\n";);
1788 void post_scheduler::dump_group(alu_group_tracker
&rt
) {
1789 for (unsigned i
= 0; i
< 5; ++i
) {
1790 node
*n
= rt
.slot(i
);
1792 sblog
<< "slot " << i
<< " : ";
1799 void post_scheduler::process_ready_copies() {
1804 last
= ready_copies
.back();
1806 for (node_iterator N
, I
= ready_copies
.begin(), E
= ready_copies
.end();
1812 if (!check_copy(n
)) {
1817 } while (last
!= ready_copies
.back());
1819 update_local_interferences();
1823 bool post_scheduler::prepare_alu_group() {
1825 alu_group_tracker
&rt
= alu
.grp();
1830 sblog
<< "prepare_alu_group: starting...\n";
1834 ready
.append_from(&alu
.conflict_nodes
);
1836 // FIXME rework this loop
1840 process_ready_copies();
1844 for (node_iterator N
, I
= ready
.begin(), E
= ready
.end(); I
!= E
;
1856 unsigned cnt
= try_add_instruction(n
);
1862 sblog
<< "current group:\n";
1866 if (rt
.inst_count() == ctx
.num_slots
) {
1867 PSC_DUMP( sblog
<< " all slots used\n"; );
1872 if (!check_interferences())
1875 // don't try to add more instructions to the group with mova if this
1876 // can lead to breaking clause slot count limit - we don't want mova to
1877 // end up in the end of the new clause instead of beginning of the
1879 if (rt
.has_ar_load() && alu
.total_slots() > 121)
1882 if (rt
.inst_count() && i1
> 50)
1885 regmap
= prev_regmap
;
1890 sblog
<< " prepare_alu_group done, " << rt
.inst_count()
1893 sblog
<< "$$$$$$$$PAG i1=" << i1
1894 << " ready " << ready
.count()
1895 << " pending " << pending
.count()
1896 << " conflicting " << alu
.conflict_nodes
.count()
1901 return rt
.inst_count();
1904 void post_scheduler::release_src_values(node
* n
) {
1905 release_src_vec(n
->src
, true);
1906 release_src_vec(n
->dst
, false);
1909 void post_scheduler::release_op(node
*n
) {
1911 sblog
<< "release_op ";
1918 if (n
->is_copy_mov()) {
1919 ready_copies
.push_back(n
);
1920 } else if (n
->is_mova() || n
->is_pred_set()) {
1921 ready
.push_front(n
);
1927 void post_scheduler::release_src_val(value
*v
) {
1928 node
*d
= v
->any_def();
1935 void post_scheduler::release_src_vec(vvec
& vv
, bool src
) {
1937 for (vvec::iterator I
= vv
.begin(), E
= vv
.end(); I
!= E
; ++I
) {
1939 if (!v
|| v
->is_readonly())
1943 release_src_val(v
->rel
);
1944 release_src_vec(v
->muse
, true);
1952 void literal_tracker::reset() {
1953 memset(lt
, 0, sizeof(lt
));
1954 memset(uc
, 0, sizeof(uc
));
1957 void rp_gpr_tracker::reset() {
1958 memset(rp
, 0, sizeof(rp
));
1959 memset(uc
, 0, sizeof(uc
));
1962 void rp_kcache_tracker::reset() {
1963 memset(rp
, 0, sizeof(rp
));
1964 memset(uc
, 0, sizeof(uc
));
1967 void alu_kcache_tracker::reset() {
1968 memset(kc
, 0, sizeof(kc
));
1972 void alu_clause_tracker::reset() {
1975 outstanding_lds_oqa_reads
= 0;
1980 alu_clause_tracker::alu_clause_tracker(shader
&sh
)
1981 : sh(sh
), kt(sh
.get_ctx().hw_class
), slot_count(),
1984 push_exec_mask(), outstanding_lds_oqa_reads(),
1985 current_ar(), current_pr(), current_idx() {}
1987 void alu_clause_tracker::emit_group() {
1989 assert(grp().inst_count());
1991 alu_group_node
*g
= grp().emit();
1993 if (grp().has_update_exec_mask()) {
1994 assert(!push_exec_mask
);
1995 push_exec_mask
= true;
2001 clause
= sh
.create_clause(NST_ALU_CLAUSE
);
2004 clause
->push_front(g
);
2006 outstanding_lds_oqa_reads
+= grp().get_consumes_lds_oqa();
2007 outstanding_lds_oqa_reads
-= grp().get_produces_lds_oqa();
2008 slot_count
+= grp().slot_count();
2012 PSC_DUMP( sblog
<< " #### group emitted\n"; );
2015 void alu_clause_tracker::emit_clause(container_node
*c
) {
2018 kt
.init_clause(clause
->bc
);
2020 assert(!outstanding_lds_oqa_reads
);
2021 assert(!current_ar
);
2022 assert(!current_pr
);
2025 clause
->bc
.set_op(CF_OP_ALU_PUSH_BEFORE
);
2027 c
->push_front(clause
);
2030 push_exec_mask
= false;
2034 PSC_DUMP( sblog
<< "######### ALU clause emitted\n"; );
2037 bool alu_clause_tracker::check_clause_limits() {
2039 alu_group_tracker
>
= grp();
2041 unsigned slots
= gt
.slot_count();
2043 // reserving slots to load AR and PR values
2044 unsigned reserve_slots
= (current_ar
? 1 : 0) + (current_pr
? 1 : 0);
2045 // ...and index registers
2046 reserve_slots
+= (current_idx
[0] != NULL
) + (current_idx
[1] != NULL
);
2048 if (gt
.get_consumes_lds_oqa() && !outstanding_lds_oqa_reads
)
2049 reserve_slots
+= 60;
2051 if (slot_count
+ slots
> MAX_ALU_SLOTS
- reserve_slots
)
2054 if (!kt
.try_reserve(gt
))
2060 void alu_clause_tracker::new_group() {
2065 bool alu_clause_tracker::is_empty() {
2066 return clause
== NULL
;
2069 void literal_tracker::init_group_literals(alu_group_node
* g
) {
2071 g
->literals
.clear();
2072 for (unsigned i
= 0; i
< 4; ++i
) {
2076 g
->literals
.push_back(lt
[i
]);
2079 sblog
<< "literal emitted: " << lt
[i
].f
;
2080 sblog
.print_zw_hex(lt
[i
].u
, 8);
2081 sblog
<< " " << lt
[i
].i
<< "\n";
2086 bool alu_kcache_tracker::try_reserve(alu_group_tracker
& gt
) {
2087 rp_kcache_tracker
&kt
= gt
.kcache();
2092 sb_set
<unsigned> group_lines
;
2094 unsigned nl
= kt
.get_lines(group_lines
);
2097 sb_set
<unsigned> clause_lines(lines
);
2098 lines
.add_set(group_lines
);
2100 if (clause_lines
.size() == lines
.size())
2106 lines
= clause_lines
;
2111 unsigned rp_kcache_tracker::get_lines(kc_lines
& lines
) {
2114 for (unsigned i
= 0; i
< sel_count
; ++i
) {
2115 unsigned line
= rp
[i
] & 0x1fffffffu
;
2116 unsigned index_mode
= rp
[i
] >> 29;
2122 line
= (sel_count
== 2) ? line
>> 5 : line
>> 6;
2123 line
|= index_mode
<< 29;
2125 if (lines
.insert(line
).second
)
2131 bool alu_kcache_tracker::update_kc() {
2134 bc_kcache old_kc
[4];
2135 memcpy(old_kc
, kc
, sizeof(kc
));
2137 for (kc_lines::iterator I
= lines
.begin(), E
= lines
.end(); I
!= E
; ++I
) {
2138 unsigned index_mode
= *I
>> 29;
2139 unsigned line
= *I
& 0x1fffffffu
;
2140 unsigned bank
= line
>> 8;
2142 assert(index_mode
<= KC_INDEX_INVALID
);
2145 if (c
&& (bank
== kc
[c
-1].bank
) && (kc
[c
-1].addr
+ 1 == line
) &&
2146 kc
[c
-1].index_mode
== index_mode
)
2148 kc
[c
-1].mode
= KC_LOCK_2
;
2151 memcpy(kc
, old_kc
, sizeof(kc
));
2155 kc
[c
].mode
= KC_LOCK_1
;
2159 kc
[c
].index_mode
= index_mode
;
2166 alu_node
* alu_clause_tracker::create_ar_load(value
*v
, chan_select ar_channel
) {
2167 alu_node
*a
= sh
.create_alu();
2169 if (sh
.get_ctx().uses_mova_gpr
) {
2170 a
->bc
.set_op(ALU_OP1_MOVA_GPR_INT
);
2171 a
->bc
.slot
= SLOT_TRANS
;
2173 a
->bc
.set_op(ALU_OP1_MOVA_INT
);
2174 a
->bc
.slot
= SLOT_X
;
2176 a
->bc
.dst_chan
= ar_channel
;
2177 if (ar_channel
!= SEL_X
&& sh
.get_ctx().is_cayman()) {
2178 a
->bc
.dst_gpr
= ar_channel
== SEL_Y
? CM_V_SQ_MOVA_DST_CF_IDX0
: CM_V_SQ_MOVA_DST_CF_IDX1
;
2182 a
->src
.push_back(v
);
2185 sblog
<< "created AR load: ";
2193 void alu_clause_tracker::discard_current_group() {
2194 PSC_DUMP( sblog
<< "act::discard_current_group\n"; );
2195 grp().discard_all_slots(conflict_nodes
);
2198 void rp_gpr_tracker::dump() {
2199 sblog
<< "=== gpr_tracker dump:\n";
2200 for (int c
= 0; c
< 3; ++c
) {
2201 sblog
<< "cycle " << c
<< " ";
2202 for (int h
= 0; h
< 4; ++h
) {
2203 sblog
<< rp
[c
][h
] << ":" << uc
[c
][h
] << " ";
2209 } // namespace r600_sb