gallium: remove PIPE_CAP_USER_CONSTANT_BUFFERS
[mesa.git] / src / gallium / drivers / r600 / sb / sb_sched.h
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #ifndef SB_SCHED_H_
28 #define SB_SCHED_H_
29
30 namespace r600_sb {
31
32 typedef sb_map<node*, unsigned> uc_map;
33
34 // resource trackers for scheduler
35 // rp = read port
36 // uc = use count
37
38 typedef sb_set<unsigned> kc_lines;
39
40 class rp_kcache_tracker {
41 unsigned rp[4];
42 unsigned uc[4];
43 const unsigned sel_count;
44
45 unsigned kc_sel(sel_chan r) {
46 return sel_count == 4 ? (unsigned)r : ((r - 1) >> 1) + 1;
47 }
48
49 public:
50 rp_kcache_tracker(shader &sh);
51
52 bool try_reserve(node *n);
53 void unreserve(node *n);
54
55
56 bool try_reserve(sel_chan r);
57 void unreserve(sel_chan r);
58
59 void reset();
60
61 unsigned num_sels() { return !!rp[0] + !!rp[1] + !!rp[2] + !!rp[3]; }
62
63 unsigned get_lines(kc_lines &lines);
64 };
65
66 class literal_tracker {
67 literal lt[4];
68 unsigned uc[4];
69
70 public:
71 literal_tracker() : lt(), uc() {}
72
73 bool try_reserve(alu_node *n);
74 void unreserve(alu_node *n);
75
76 bool try_reserve(literal l);
77 void unreserve(literal l);
78
79 void reset();
80
81 unsigned count() { return !!uc[0] + !!uc[1] + !!uc[2] + !!uc[3]; }
82
83 void init_group_literals(alu_group_node *g);
84
85 };
86
87 class rp_gpr_tracker {
88 // rp[cycle][elem]
89 unsigned rp[3][4];
90 unsigned uc[3][4];
91
92 public:
93 rp_gpr_tracker() : rp(), uc() {}
94
95 bool try_reserve(alu_node *n);
96 void unreserve(alu_node *n);
97
98 bool try_reserve(unsigned cycle, unsigned sel, unsigned chan);
99 void unreserve(unsigned cycle, unsigned sel, unsigned chan);
100
101 void reset();
102
103 void dump();
104 };
105
106 class alu_group_tracker {
107
108 shader &sh;
109
110 rp_kcache_tracker kc;
111 rp_gpr_tracker gpr;
112 literal_tracker lt;
113
114 alu_node * slots[5];
115
116 unsigned available_slots;
117
118 unsigned max_slots;
119
120 typedef std::map<value*, unsigned> value_index_map;
121
122 value_index_map vmap;
123
124 bool has_mova;
125 bool uses_ar;
126 bool has_predset;
127 bool has_kill;
128 bool updates_exec_mask;
129
130 unsigned chan_count[4];
131
132 // param index + 1 (0 means that group doesn't refer to Params)
133 // we can't use more than one param index in a group
134 unsigned interp_param;
135
136 unsigned next_id;
137
138 node_vec packed_ops;
139
140 void assign_slot(unsigned slot, alu_node *n);
141
142 public:
143 alu_group_tracker(shader &sh);
144
145 // FIXME use fast bs correctness check (values for same chan <= 3) ??
146 bool try_reserve(alu_node *n);
147 bool try_reserve(alu_packed_node *p);
148
149 void reinit();
150 void reset(bool keep_packed = false);
151
152 sel_chan get_value_id(value *v);
153 void update_flags(alu_node *n);
154
155 alu_node* slot(unsigned i) { return slots[i]; }
156
157 unsigned used_slots() {
158 return (~available_slots) & ((1 << max_slots) - 1);
159 }
160
161 unsigned inst_count() {
162 return __builtin_popcount(used_slots());
163 }
164
165 unsigned literal_count() { return lt.count(); }
166 unsigned literal_slot_count() { return (literal_count() + 1) >> 1; };
167 unsigned slot_count() { return inst_count() + literal_slot_count(); }
168
169 alu_group_node* emit();
170
171 rp_kcache_tracker& kcache() { return kc; }
172
173 bool has_update_exec_mask() { return updates_exec_mask; }
174 unsigned avail_slots() { return available_slots; }
175
176 void discard_all_slots(container_node &removed_nodes);
177 void discard_slots(unsigned slot_mask, container_node &removed_nodes);
178
179 bool has_ar_load() { return has_mova; }
180 };
181
182 class alu_kcache_tracker {
183 bc_kcache kc[4];
184 sb_set<unsigned> lines;
185 unsigned max_kcs;
186
187 public:
188
189 alu_kcache_tracker(sb_hw_class hc)
190 : kc(), lines(), max_kcs(hc >= HW_CLASS_EVERGREEN ? 4 : 2) {}
191
192 void reset();
193 bool try_reserve(alu_group_tracker &gt);
194 bool update_kc();
195 void init_clause(bc_cf &bc) {
196 memcpy(bc.kc, kc, sizeof(kc));
197 }
198 };
199
200 class alu_clause_tracker {
201 shader &sh;
202
203 alu_kcache_tracker kt;
204 unsigned slot_count;
205
206 alu_group_tracker grp0;
207 alu_group_tracker grp1;
208
209 unsigned group;
210
211 cf_node *clause;
212
213 bool push_exec_mask;
214
215 public:
216 container_node conflict_nodes;
217
218 // current values of AR and PR registers that we have to preload
219 // till the end of clause (in fact, beginning, because we're scheduling
220 // bottom-up)
221 value *current_ar;
222 value *current_pr;
223 // current values of CF_IDX registers that need preloading
224 value *current_idx[2];
225
226 alu_clause_tracker(shader &sh);
227
228 void reset();
229
230 // current group
231 alu_group_tracker& grp() { return group ? grp1 : grp0; }
232 // previous group
233 alu_group_tracker& prev_grp() { return group ? grp0 : grp1; }
234
235 void emit_group();
236 void emit_clause(container_node *c);
237 bool check_clause_limits();
238 void new_group();
239 bool is_empty();
240
241 alu_node* create_ar_load(value *v, chan_select ar_channel);
242
243 void discard_current_group();
244
245 unsigned total_slots() { return slot_count; }
246 };
247
248 class post_scheduler : public pass {
249
250 container_node ready, ready_copies; // alu only
251 container_node pending, bb_pending;
252 bb_node *cur_bb;
253 val_set live; // values live at the end of the alu clause
254 uc_map ucm;
255 alu_clause_tracker alu;
256
257 typedef std::map<sel_chan, value*> rv_map;
258 rv_map regmap, prev_regmap;
259
260 val_set cleared_interf;
261
262 void emit_index_registers();
263 public:
264
265 post_scheduler(shader &sh) : pass(sh),
266 ready(), ready_copies(), pending(), cur_bb(),
267 live(), ucm(), alu(sh), regmap(), cleared_interf() {}
268
269 virtual int run();
270 bool run_on(container_node *n);
271 bool schedule_bb(bb_node *bb);
272
273 void load_index_register(value *v, unsigned idx);
274 void process_fetch(container_node *c);
275
276 bool process_alu(container_node *c);
277 bool schedule_alu(container_node *c);
278 bool prepare_alu_group();
279
280 void release_op(node *n);
281
282 void release_src_values(node *n);
283 void release_src_vec(vvec &vv, bool src);
284 void release_src_val(value *v);
285
286 void init_uc_val(container_node *c, value *v);
287 void init_uc_vec(container_node *c, vvec &vv, bool src);
288 unsigned init_ucm(container_node *c, node *n);
289
290 void init_regmap();
291
292 bool check_interferences();
293
294 unsigned try_add_instruction(node *n);
295
296 bool check_copy(node *n);
297 void dump_group(alu_group_tracker &rt);
298
299 bool unmap_dst(alu_node *n);
300 bool unmap_dst_val(value *d);
301
302 bool map_src(alu_node *n);
303 bool map_src_vec(vvec &vv, bool src);
304 bool map_src_val(value *v);
305
306 bool recolor_local(value *v);
307
308 void update_local_interferences();
309 void update_live_src_vec(vvec &vv, val_set *born, bool src);
310 void update_live_dst_vec(vvec &vv);
311 void update_live(node *n, val_set *born);
312 void process_group();
313
314 void set_color_local_val(value *v, sel_chan color);
315 void set_color_local(value *v, sel_chan color);
316
317 void add_interferences(value *v, sb_bitset &rb, val_set &vs);
318
319 void init_globals(val_set &s, bool prealloc);
320
321 void recolor_locals();
322
323 void dump_regmap();
324
325 void emit_load_ar();
326 void emit_clause();
327
328 void process_ready_copies();
329 };
330
331 } // namespace r600_sb
332
333 #endif /* SB_SCHED_H_ */