a74484f50b3346bf0e49cfe01929a968637f3293
[mesa.git] / src / gallium / drivers / r600 / sb / sb_sched.h
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #ifndef SB_SCHED_H_
28 #define SB_SCHED_H_
29
30 namespace r600_sb {
31
32 typedef sb_map<node*, unsigned> uc_map;
33
34 // resource trackers for scheduler
35
36 typedef sb_set<unsigned> kc_lines;
37
38 class rp_kcache_tracker {
39 unsigned rp[4];
40 unsigned uc[4];
41 const unsigned sel_count;
42
43 unsigned kc_sel(sel_chan r) {
44 return sel_count == 4 ? (unsigned)r : ((r - 1) >> 1) + 1;
45 }
46
47 public:
48 rp_kcache_tracker(shader &sh);
49
50 bool try_reserve(node *n);
51 void unreserve(node *n);
52
53
54 bool try_reserve(sel_chan r);
55 void unreserve(sel_chan r);
56
57 void reset();
58
59 unsigned num_sels() { return !!rp[0] + !!rp[1] + !!rp[2] + !!rp[3]; }
60
61 unsigned get_lines(kc_lines &lines);
62 };
63
64 class literal_tracker {
65 literal lt[4];
66 unsigned uc[4];
67 public:
68 literal_tracker() : lt(), uc() {}
69
70 bool try_reserve(alu_node *n);
71 void unreserve(alu_node *n);
72
73 bool try_reserve(literal l);
74 void unreserve(literal l);
75
76 void reset();
77
78 unsigned count() { return !!uc[0] + !!uc[1] + !!uc[2] + !!uc[3]; }
79
80 void init_group_literals(alu_group_node *g);
81
82 };
83
84 class rp_gpr_tracker {
85 // rp[cycle][elem]
86 unsigned rp[3][4];
87 unsigned uc[3][4];
88
89 public:
90 rp_gpr_tracker() : rp(), uc() {}
91
92 bool try_reserve(alu_node *n);
93 void unreserve(alu_node *n);
94
95 bool try_reserve(unsigned cycle, unsigned sel, unsigned chan);
96 void unreserve(unsigned cycle, unsigned sel, unsigned chan);
97
98 void reset();
99
100 void dump();
101 };
102
103 class alu_group_tracker {
104
105 shader &sh;
106
107 rp_kcache_tracker kc;
108 rp_gpr_tracker gpr;
109 literal_tracker lt;
110
111 alu_node * slots[5];
112
113 unsigned available_slots;
114
115 unsigned max_slots;
116
117 typedef std::map<value*, unsigned> value_index_map;
118
119 value_index_map vmap;
120
121 bool has_mova;
122 bool uses_ar;
123 bool has_predset;
124 bool has_kill;
125 bool updates_exec_mask;
126
127 unsigned chan_count[4];
128
129 // param index + 1 (0 means that group doesn't refer to Params)
130 // we can't use more than one param index in a group
131 unsigned interp_param;
132
133 unsigned next_id;
134
135 node_vec packed_ops;
136
137 void assign_slot(unsigned slot, alu_node *n);
138
139 public:
140 alu_group_tracker(shader &sh);
141
142 // FIXME use fast bs correctness check (values for same chan <= 3) ??
143 bool try_reserve(alu_node *n);
144 bool try_reserve(alu_packed_node *p);
145
146 void reinit();
147 void reset(bool keep_packed = false);
148
149 sel_chan get_value_id(value *v);
150 void update_flags(alu_node *n);
151
152 alu_node* slot(unsigned i) { return slots[i]; }
153
154 unsigned used_slots() {
155 return (~available_slots) & ((1 << max_slots) - 1);
156 }
157
158 unsigned inst_count() {
159 return __builtin_popcount(used_slots());
160 }
161
162 unsigned literal_count() { return lt.count(); }
163 unsigned literal_slot_count() { return (literal_count() + 1) >> 1; };
164 unsigned slot_count() { return inst_count() + literal_slot_count(); }
165
166 alu_group_node* emit();
167
168 rp_kcache_tracker& kcache() { return kc; }
169
170 bool has_update_exec_mask() { return updates_exec_mask; }
171 unsigned avail_slots() { return available_slots; }
172
173 void discard_all_slots(container_node &removed_nodes);
174 void discard_slots(unsigned slot_mask, container_node &removed_nodes);
175
176 bool has_ar_load() { return has_mova; }
177 };
178
179 class alu_kcache_tracker {
180 bc_kcache kc[4];
181 sb_set<unsigned> lines;
182 unsigned max_kcs;
183
184 public:
185
186 alu_kcache_tracker(sb_hw_class hc)
187 : kc(), lines(), max_kcs(hc >= HW_CLASS_EVERGREEN ? 4 : 2) {}
188
189 void reset();
190 bool try_reserve(alu_group_tracker &gt);
191 bool update_kc();
192 void init_clause(bc_cf &bc) {
193 memcpy(bc.kc, kc, sizeof(kc));
194 }
195 };
196
197 class alu_clause_tracker {
198 shader &sh;
199
200 alu_kcache_tracker kt;
201 unsigned slot_count;
202
203 alu_group_tracker grp0;
204 alu_group_tracker grp1;
205
206 unsigned group;
207
208 cf_node *clause;
209
210 bool push_exec_mask;
211
212 public:
213 container_node conflict_nodes;
214
215 // current values of AR and PR registers that we have to preload
216 // till the end of clause (in fact, beginning, because we're scheduling
217 // bottom-up)
218 value *current_ar;
219 value *current_pr;
220
221 alu_clause_tracker(shader &sh);
222
223 void reset();
224
225 // current group
226 alu_group_tracker& grp() { return group ? grp1 : grp0; }
227 // previous group
228 alu_group_tracker& prev_grp() { return group ? grp0 : grp1; }
229
230 void emit_group();
231 void emit_clause(container_node *c);
232 bool check_clause_limits();
233 void new_group();
234 bool is_empty();
235
236 alu_node* create_ar_load();
237
238 void discard_current_group();
239
240 unsigned total_slots() { return slot_count; }
241 };
242
243 class post_scheduler : public pass {
244
245 container_node ready, ready_copies; // alu only
246 container_node pending, bb_pending;
247 bb_node *cur_bb;
248 val_set live; // values live at the end of the alu clause
249 uc_map ucm;
250 alu_clause_tracker alu;
251
252 typedef std::map<sel_chan, value*> rv_map;
253 rv_map regmap, prev_regmap;
254
255 val_set cleared_interf;
256
257 public:
258
259 post_scheduler(shader &sh) : pass(sh),
260 ready(), ready_copies(), pending(), cur_bb(),
261 live(), ucm(), alu(sh), regmap(), cleared_interf() {}
262
263 virtual int run();
264 void run_on(container_node *n);
265 void schedule_bb(bb_node *bb);
266
267 void process_alu(container_node *c);
268 void schedule_alu(container_node *c);
269 bool prepare_alu_group();
270
271 void release_op(node *n);
272
273 void release_src_values(node *n);
274 void release_src_vec(vvec &vv, bool src);
275 void release_src_val(value *v);
276
277 void init_uc_val(container_node *c, value *v);
278 void init_uc_vec(container_node *c, vvec &vv, bool src);
279 unsigned init_ucm(container_node *c, node *n);
280
281 void init_regmap();
282
283 bool check_interferences();
284
285 unsigned try_add_instruction(node *n);
286
287 bool check_copy(node *n);
288 void dump_group(alu_group_tracker &rt);
289
290 bool unmap_dst(alu_node *n);
291 bool unmap_dst_val(value *d);
292
293 bool map_src(alu_node *n);
294 bool map_src_vec(vvec &vv, bool src);
295 bool map_src_val(value *v);
296
297 bool recolor_local(value *v);
298
299 void update_local_interferences();
300 void update_live_src_vec(vvec &vv, val_set *born, bool src);
301 void update_live_dst_vec(vvec &vv);
302 void update_live(node *n, val_set *born);
303 void process_group();
304
305 void set_color_local_val(value *v, sel_chan color);
306 void set_color_local(value *v, sel_chan color);
307
308 void add_interferences(value *v, sb_bitset &rb, val_set &vs);
309
310 void init_globals(val_set &s, bool prealloc);
311
312 void recolor_locals();
313
314 void dump_regmap();
315
316 void emit_load_ar();
317 void emit_clause();
318
319 void process_ready_copies();
320 };
321
322 } // namespace r600_sb
323
324 #endif /* SB_SCHED_H_ */