2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
32 typedef sb_map
<node
*, unsigned> uc_map
;
34 // resource trackers for scheduler
38 typedef sb_set
<unsigned> kc_lines
;
40 class rp_kcache_tracker
{
43 const unsigned sel_count
;
45 unsigned kc_sel(sel_chan r
) {
46 return sel_count
== 4 ? (unsigned)r
: ((r
- 1) >> 1) + 1;
50 rp_kcache_tracker(shader
&sh
);
52 bool try_reserve(node
*n
);
53 void unreserve(node
*n
);
56 bool try_reserve(sel_chan r
);
57 void unreserve(sel_chan r
);
61 unsigned num_sels() { return !!rp
[0] + !!rp
[1] + !!rp
[2] + !!rp
[3]; }
63 unsigned get_lines(kc_lines
&lines
);
66 class literal_tracker
{
71 literal_tracker() : lt(), uc() {}
73 bool try_reserve(alu_node
*n
);
74 void unreserve(alu_node
*n
);
76 bool try_reserve(literal l
);
77 void unreserve(literal l
);
81 unsigned count() { return !!uc
[0] + !!uc
[1] + !!uc
[2] + !!uc
[3]; }
83 void init_group_literals(alu_group_node
*g
);
87 class rp_gpr_tracker
{
93 rp_gpr_tracker() : rp(), uc() {}
95 bool try_reserve(alu_node
*n
);
96 void unreserve(alu_node
*n
);
98 bool try_reserve(unsigned cycle
, unsigned sel
, unsigned chan
);
99 void unreserve(unsigned cycle
, unsigned sel
, unsigned chan
);
106 class alu_group_tracker
{
110 rp_kcache_tracker kc
;
116 unsigned available_slots
;
120 typedef std::map
<value
*, unsigned> value_index_map
;
122 value_index_map vmap
;
128 bool updates_exec_mask
;
130 unsigned chan_count
[4];
132 // param index + 1 (0 means that group doesn't refer to Params)
133 // we can't use more than one param index in a group
134 unsigned interp_param
;
140 void assign_slot(unsigned slot
, alu_node
*n
);
143 alu_group_tracker(shader
&sh
);
145 // FIXME use fast bs correctness check (values for same chan <= 3) ??
146 bool try_reserve(alu_node
*n
);
147 bool try_reserve(alu_packed_node
*p
);
150 void reset(bool keep_packed
= false);
152 sel_chan
get_value_id(value
*v
);
153 void update_flags(alu_node
*n
);
155 alu_node
* slot(unsigned i
) { return slots
[i
]; }
157 unsigned used_slots() {
158 return (~available_slots
) & ((1 << max_slots
) - 1);
161 unsigned inst_count() {
162 return __builtin_popcount(used_slots());
165 unsigned literal_count() { return lt
.count(); }
166 unsigned literal_slot_count() { return (literal_count() + 1) >> 1; };
167 unsigned slot_count() { return inst_count() + literal_slot_count(); }
169 alu_group_node
* emit();
171 rp_kcache_tracker
& kcache() { return kc
; }
173 bool has_update_exec_mask() { return updates_exec_mask
; }
174 unsigned avail_slots() { return available_slots
; }
176 void discard_all_slots(container_node
&removed_nodes
);
177 void discard_slots(unsigned slot_mask
, container_node
&removed_nodes
);
179 bool has_ar_load() { return has_mova
; }
182 class alu_kcache_tracker
{
184 sb_set
<unsigned> lines
;
189 alu_kcache_tracker(sb_hw_class hc
)
190 : kc(), lines(), max_kcs(hc
>= HW_CLASS_EVERGREEN
? 4 : 2) {}
193 bool try_reserve(alu_group_tracker
>
);
195 void init_clause(bc_cf
&bc
) {
196 memcpy(bc
.kc
, kc
, sizeof(kc
));
200 class alu_clause_tracker
{
203 alu_kcache_tracker kt
;
206 alu_group_tracker grp0
;
207 alu_group_tracker grp1
;
216 container_node conflict_nodes
;
218 // current values of AR and PR registers that we have to preload
219 // till the end of clause (in fact, beginning, because we're scheduling
223 // current values of CF_IDX registers that need preloading
224 value
*current_idx
[2];
226 alu_clause_tracker(shader
&sh
);
231 alu_group_tracker
& grp() { return group
? grp1
: grp0
; }
233 alu_group_tracker
& prev_grp() { return group
? grp0
: grp1
; }
236 void emit_clause(container_node
*c
);
237 bool check_clause_limits();
241 alu_node
* create_ar_load(value
*v
, chan_select ar_channel
);
243 void discard_current_group();
245 unsigned total_slots() { return slot_count
; }
248 class post_scheduler
: public pass
{
250 container_node ready
, ready_copies
; // alu only
251 container_node pending
, bb_pending
;
253 val_set live
; // values live at the end of the alu clause
255 alu_clause_tracker alu
;
257 typedef std::map
<sel_chan
, value
*> rv_map
;
258 rv_map regmap
, prev_regmap
;
260 val_set cleared_interf
;
262 void emit_index_registers();
265 post_scheduler(shader
&sh
) : pass(sh
),
266 ready(), ready_copies(), pending(), cur_bb(),
267 live(), ucm(), alu(sh
), regmap(), cleared_interf() {}
270 void run_on(container_node
*n
);
271 void schedule_bb(bb_node
*bb
);
273 void load_index_register(value
*v
, unsigned idx
);
274 void process_fetch(container_node
*c
);
276 void process_alu(container_node
*c
);
277 void schedule_alu(container_node
*c
);
278 bool prepare_alu_group();
280 void release_op(node
*n
);
282 void release_src_values(node
*n
);
283 void release_src_vec(vvec
&vv
, bool src
);
284 void release_src_val(value
*v
);
286 void init_uc_val(container_node
*c
, value
*v
);
287 void init_uc_vec(container_node
*c
, vvec
&vv
, bool src
);
288 unsigned init_ucm(container_node
*c
, node
*n
);
292 bool check_interferences();
294 unsigned try_add_instruction(node
*n
);
296 bool check_copy(node
*n
);
297 void dump_group(alu_group_tracker
&rt
);
299 bool unmap_dst(alu_node
*n
);
300 bool unmap_dst_val(value
*d
);
302 bool map_src(alu_node
*n
);
303 bool map_src_vec(vvec
&vv
, bool src
);
304 bool map_src_val(value
*v
);
306 bool recolor_local(value
*v
);
308 void update_local_interferences();
309 void update_live_src_vec(vvec
&vv
, val_set
*born
, bool src
);
310 void update_live_dst_vec(vvec
&vv
);
311 void update_live(node
*n
, val_set
*born
);
312 void process_group();
314 void set_color_local_val(value
*v
, sel_chan color
);
315 void set_color_local(value
*v
, sel_chan color
);
317 void add_interferences(value
*v
, sb_bitset
&rb
, val_set
&vs
);
319 void init_globals(val_set
&s
, bool prealloc
);
321 void recolor_locals();
328 void process_ready_copies();
331 } // namespace r600_sb
333 #endif /* SB_SCHED_H_ */