2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
32 typedef sb_map
<node
*, unsigned> uc_map
;
34 // resource trackers for scheduler
36 typedef sb_set
<unsigned> kc_lines
;
38 class rp_kcache_tracker
{
41 const unsigned sel_count
;
43 unsigned kc_sel(sel_chan r
) {
44 return sel_count
== 4 ? (unsigned)r
: ((r
- 1) >> 1) + 1;
48 rp_kcache_tracker(shader
&sh
);
50 bool try_reserve(node
*n
);
51 void unreserve(node
*n
);
54 bool try_reserve(sel_chan r
);
55 void unreserve(sel_chan r
);
59 unsigned num_sels() { return !!rp
[0] + !!rp
[1] + !!rp
[2] + !!rp
[3]; }
61 unsigned get_lines(kc_lines
&lines
);
64 class literal_tracker
{
68 literal_tracker() : lt(), uc() {}
70 bool try_reserve(alu_node
*n
);
71 void unreserve(alu_node
*n
);
73 bool try_reserve(literal l
);
74 void unreserve(literal l
);
78 unsigned count() { return !!uc
[0] + !!uc
[1] + !!uc
[2] + !!uc
[3]; }
80 void init_group_literals(alu_group_node
*g
);
84 class rp_gpr_tracker
{
90 rp_gpr_tracker() : rp(), uc() {}
92 bool try_reserve(alu_node
*n
);
93 void unreserve(alu_node
*n
);
95 bool try_reserve(unsigned cycle
, unsigned sel
, unsigned chan
);
96 void unreserve(unsigned cycle
, unsigned sel
, unsigned chan
);
103 class alu_group_tracker
{
107 rp_kcache_tracker kc
;
113 unsigned available_slots
;
117 typedef std::map
<value
*, unsigned> value_index_map
;
119 value_index_map vmap
;
125 bool updates_exec_mask
;
127 unsigned chan_count
[4];
129 // param index + 1 (0 means that group doesn't refer to Params)
130 // we can't use more than one param index in a group
131 unsigned interp_param
;
137 void assign_slot(unsigned slot
, alu_node
*n
);
140 alu_group_tracker(shader
&sh
);
142 // FIXME use fast bs correctness check (values for same chan <= 3) ??
143 bool try_reserve(alu_node
*n
);
144 bool try_reserve(alu_packed_node
*p
);
147 void reset(bool keep_packed
= false);
149 sel_chan
get_value_id(value
*v
);
150 void update_flags(alu_node
*n
);
152 alu_node
* slot(unsigned i
) { return slots
[i
]; }
154 unsigned used_slots() {
155 return (~available_slots
) & ((1 << max_slots
) - 1);
158 unsigned inst_count() {
159 return __builtin_popcount(used_slots());
162 unsigned literal_count() { return lt
.count(); }
163 unsigned literal_slot_count() { return (literal_count() + 1) >> 1; };
164 unsigned slot_count() { return inst_count() + literal_slot_count(); }
166 alu_group_node
* emit();
168 rp_kcache_tracker
& kcache() { return kc
; }
170 bool has_update_exec_mask() { return updates_exec_mask
; }
171 unsigned avail_slots() { return available_slots
; }
173 void discard_all_slots(container_node
&removed_nodes
);
174 void discard_slots(unsigned slot_mask
, container_node
&removed_nodes
);
176 bool has_ar_load() { return has_mova
; }
179 class alu_kcache_tracker
{
181 sb_set
<unsigned> lines
;
186 alu_kcache_tracker(sb_hw_class hc
)
187 : kc(), lines(), max_kcs(hc
>= HW_CLASS_EVERGREEN
? 4 : 2) {}
190 bool try_reserve(alu_group_tracker
>
);
192 void init_clause(bc_cf
&bc
) {
193 memcpy(bc
.kc
, kc
, sizeof(kc
));
197 class alu_clause_tracker
{
200 alu_kcache_tracker kt
;
203 alu_group_tracker grp0
;
204 alu_group_tracker grp1
;
213 container_node conflict_nodes
;
215 // current values of AR and PR registers that we have to preload
216 // till the end of clause (in fact, beginning, because we're scheduling
221 alu_clause_tracker(shader
&sh
);
226 alu_group_tracker
& grp() { return group
? grp1
: grp0
; }
228 alu_group_tracker
& prev_grp() { return group
? grp0
: grp1
; }
231 void emit_clause(container_node
*c
);
232 bool check_clause_limits();
236 alu_node
* create_ar_load();
238 void discard_current_group();
240 unsigned total_slots() { return slot_count
; }
243 class post_scheduler
: public pass
{
245 container_node ready
, ready_copies
; // alu only
246 container_node pending
, bb_pending
;
248 val_set live
; // values live at the end of the alu clause
250 alu_clause_tracker alu
;
252 typedef std::map
<sel_chan
, value
*> rv_map
;
253 rv_map regmap
, prev_regmap
;
255 val_set cleared_interf
;
259 post_scheduler(shader
&sh
) : pass(sh
),
260 ready(), ready_copies(), pending(), cur_bb(),
261 live(), ucm(), alu(sh
), regmap(), cleared_interf() {}
264 void run_on(container_node
*n
);
265 void schedule_bb(bb_node
*bb
);
267 void process_alu(container_node
*c
);
268 void schedule_alu(container_node
*c
);
269 bool prepare_alu_group();
271 void release_op(node
*n
);
273 void release_src_values(node
*n
);
274 void release_src_vec(vvec
&vv
, bool src
);
275 void release_src_val(value
*v
);
277 void init_uc_val(container_node
*c
, value
*v
);
278 void init_uc_vec(container_node
*c
, vvec
&vv
, bool src
);
279 unsigned init_ucm(container_node
*c
, node
*n
);
283 bool check_interferences();
285 unsigned try_add_instruction(node
*n
);
287 bool check_copy(node
*n
);
288 void dump_group(alu_group_tracker
&rt
);
290 bool unmap_dst(alu_node
*n
);
291 bool unmap_dst_val(value
*d
);
293 bool map_src(alu_node
*n
);
294 bool map_src_vec(vvec
&vv
, bool src
);
295 bool map_src_val(value
*v
);
297 bool recolor_local(value
*v
);
299 void update_local_interferences();
300 void update_live_src_vec(vvec
&vv
, val_set
*born
, bool src
);
301 void update_live_dst_vec(vvec
&vv
);
302 void update_live(node
*n
, val_set
*born
);
303 void process_group();
305 void set_color_local_val(value
*v
, sel_chan color
);
306 void set_color_local(value
*v
, sel_chan color
);
308 void add_interferences(value
*v
, sb_bitset
&rb
, val_set
&vs
);
310 void init_globals(val_set
&s
, bool prealloc
);
312 void recolor_locals();
319 void process_ready_copies();
322 } // namespace r600_sb
324 #endif /* SB_SCHED_H_ */