r600g: move streamout state to drivers/radeon
[mesa.git] / src / gallium / drivers / r600 / sb / sb_core.cpp
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define SB_RA_SCHED_CHECK DEBUG
28
29 extern "C" {
30 #include "os/os_time.h"
31 #include "r600_pipe.h"
32 #include "r600_shader.h"
33
34 #include "sb_public.h"
35 }
36
37 #include <stack>
38 #include <map>
39
40 #include "sb_bc.h"
41 #include "sb_shader.h"
42 #include "sb_pass.h"
43 #include "sb_sched.h"
44
45 using namespace r600_sb;
46
47 static sb_hw_class translate_chip_class(enum chip_class cc);
48 static sb_hw_chip translate_chip(enum radeon_family rf);
49
50 sb_context *r600_sb_context_create(struct r600_context *rctx) {
51
52 sb_context *sctx = new sb_context();
53
54 if (sctx->init(rctx->isa, translate_chip(rctx->b.family),
55 translate_chip_class(rctx->b.chip_class))) {
56 delete sctx;
57 sctx = NULL;
58 }
59
60 unsigned df = rctx->screen->debug_flags;
61
62 sb_context::dump_pass = df & DBG_SB_DUMP;
63 sb_context::dump_stat = df & DBG_SB_STAT;
64 sb_context::dry_run = df & DBG_SB_DRY_RUN;
65 sb_context::no_fallback = df & DBG_SB_NO_FALLBACK;
66 sb_context::safe_math = df & DBG_SB_SAFEMATH;
67
68 sb_context::dskip_start = debug_get_num_option("R600_SB_DSKIP_START", 0);
69 sb_context::dskip_end = debug_get_num_option("R600_SB_DSKIP_END", 0);
70 sb_context::dskip_mode = debug_get_num_option("R600_SB_DSKIP_MODE", 0);
71
72 return sctx;
73 }
74
75 void r600_sb_context_destroy(void * sctx) {
76 if (sctx) {
77 sb_context *ctx = static_cast<sb_context*>(sctx);
78
79 if (sb_context::dump_stat) {
80 sblog << "\ncontext src stats: ";
81 ctx->src_stats.dump();
82 sblog << "context opt stats: ";
83 ctx->opt_stats.dump();
84 sblog << "context diff: ";
85 ctx->src_stats.dump_diff(ctx->opt_stats);
86 }
87
88 delete ctx;
89 }
90 }
91
92 int r600_sb_bytecode_process(struct r600_context *rctx,
93 struct r600_bytecode *bc,
94 struct r600_shader *pshader,
95 int dump_bytecode,
96 int optimize) {
97 int r = 0;
98 unsigned shader_id = bc->debug_id;
99
100 sb_context *ctx = (sb_context *)rctx->sb_context;
101 if (!ctx) {
102 rctx->sb_context = ctx = r600_sb_context_create(rctx);
103 }
104
105 int64_t time_start = 0;
106 if (sb_context::dump_stat) {
107 time_start = os_time_get_nano();
108 }
109
110 SB_DUMP_STAT( sblog << "\nsb: shader " << shader_id << "\n"; );
111
112 bc_parser parser(*ctx, bc, pshader);
113
114 if ((r = parser.decode())) {
115 assert(!"sb: bytecode decoding error");
116 return r;
117 }
118
119 shader *sh = parser.get_shader();
120
121 if (dump_bytecode) {
122 bc_dump(*sh, bc->bytecode, bc->ndw).run();
123 }
124
125 if (!optimize) {
126 delete sh;
127 return 0;
128 }
129
130 if (sh->target != TARGET_FETCH) {
131 sh->src_stats.ndw = bc->ndw;
132 sh->collect_stats(false);
133 }
134
135 /* skip some shaders (use shaders from default backend)
136 * dskip_start - range start, dskip_end - range_end,
137 * e.g. start = 5, end = 6 means shaders 5 & 6
138 *
139 * dskip_mode == 0 - disabled,
140 * dskip_mode == 1 - don't process the shaders from the [start;end] range
141 * dskip_mode == 2 - process only the shaders from the range
142 */
143 if (sb_context::dskip_mode) {
144 if ((sb_context::dskip_start <= shader_id &&
145 shader_id <= sb_context::dskip_end) ==
146 (sb_context::dskip_mode == 1)) {
147 sblog << "sb: skipped shader " << shader_id << " : " << "["
148 << sb_context::dskip_start << "; "
149 << sb_context::dskip_end << "] mode "
150 << sb_context::dskip_mode << "\n";
151 return 0;
152 }
153 }
154
155 if ((r = parser.prepare())) {
156 assert(!"sb: bytecode parsing error");
157 return r;
158 }
159
160 SB_DUMP_PASS( sblog << "\n\n###### after parse\n"; sh->dump_ir(); );
161
162 #define SB_RUN_PASS(n, dump) \
163 do { \
164 r = n(*sh).run(); \
165 if (r) { \
166 sblog << "sb: error (" << r << ") in the " << #n << " pass.\n"; \
167 if (sb_context::no_fallback) \
168 return r; \
169 sblog << "sb: using unoptimized bytecode...\n"; \
170 delete sh; \
171 return 0; \
172 } \
173 if (dump) { \
174 SB_DUMP_PASS( sblog << "\n\n###### after " << #n << "\n"; \
175 sh->dump_ir();); \
176 } \
177 assert(!r); \
178 } while (0)
179
180 SB_RUN_PASS(ssa_prepare, 0);
181 SB_RUN_PASS(ssa_rename, 1);
182
183 if (sh->has_alu_predication)
184 SB_RUN_PASS(psi_ops, 1);
185
186 SB_RUN_PASS(liveness, 0);
187 SB_RUN_PASS(dce_cleanup, 0);
188 SB_RUN_PASS(def_use, 0);
189
190 sh->set_undef(sh->root->live_before);
191
192 SB_RUN_PASS(if_conversion, 1);
193
194 // if_conversion breaks info about uses, but next pass (peephole)
195 // doesn't need it, so we can skip def/use update here
196 // until it's really required
197 //SB_RUN_PASS(def_use, 0);
198
199 SB_RUN_PASS(peephole, 1);
200 SB_RUN_PASS(def_use, 0);
201
202 SB_RUN_PASS(gvn, 1);
203
204 SB_RUN_PASS(liveness, 0);
205 SB_RUN_PASS(dce_cleanup, 1);
206 SB_RUN_PASS(def_use, 0);
207
208 SB_RUN_PASS(ra_split, 0);
209 SB_RUN_PASS(def_use, 0);
210
211 // create 'basic blocks'. it's not like we build CFG, they are just
212 // container nodes in the correct locations for code placement
213 sh->create_bbs();
214
215 SB_RUN_PASS(gcm, 1);
216
217 sh->compute_interferences = true;
218 SB_RUN_PASS(liveness, 0);
219
220 SB_RUN_PASS(ra_coalesce, 1);
221 SB_RUN_PASS(ra_init, 1);
222
223 SB_RUN_PASS(post_scheduler, 1);
224
225 sh->expand_bbs();
226
227 #if SB_RA_SCHED_CHECK
228 // check code correctness after regalloc/scheduler
229 SB_RUN_PASS(ra_checker, 0);
230 #endif
231
232 SB_RUN_PASS(bc_finalizer, 0);
233
234 sh->optimized = true;
235
236 bc_builder builder(*sh);
237
238 if ((r = builder.build())) {
239 assert(0);
240 return r;
241 }
242
243 bytecode &nbc = builder.get_bytecode();
244
245 if (dump_bytecode) {
246 bc_dump(*sh, &nbc).run();
247 }
248
249 if (!sb_context::dry_run) {
250
251 free(bc->bytecode);
252 bc->ndw = nbc.ndw();
253 bc->bytecode = (uint32_t*) malloc(bc->ndw << 2);
254 nbc.write_data(bc->bytecode);
255
256 bc->ngpr = sh->ngpr;
257 bc->nstack = sh->nstack;
258 } else {
259 SB_DUMP_STAT( sblog << "sb: dry run: optimized bytecode is not used\n"; );
260 }
261
262 if (sb_context::dump_stat) {
263 int64_t t = os_time_get_nano() - time_start;
264
265 sblog << "sb: processing shader " << shader_id << " done ( "
266 << ((double)t)/1000000.0 << " ms ).\n";
267
268 sh->opt_stats.ndw = bc->ndw;
269 sh->collect_stats(true);
270
271 sblog << "src stats: ";
272 sh->src_stats.dump();
273 sblog << "opt stats: ";
274 sh->opt_stats.dump();
275 sblog << "diff: ";
276 sh->src_stats.dump_diff(sh->opt_stats);
277 }
278
279 delete sh;
280 return 0;
281 }
282
283 static sb_hw_chip translate_chip(enum radeon_family rf) {
284 switch (rf) {
285
286 #define TRANSLATE_CHIP(c) case CHIP_##c: return HW_CHIP_##c
287 TRANSLATE_CHIP(R600);
288 TRANSLATE_CHIP(RV610);
289 TRANSLATE_CHIP(RV630);
290 TRANSLATE_CHIP(RV670);
291 TRANSLATE_CHIP(RV620);
292 TRANSLATE_CHIP(RV635);
293 TRANSLATE_CHIP(RS780);
294 TRANSLATE_CHIP(RS880);
295 TRANSLATE_CHIP(RV770);
296 TRANSLATE_CHIP(RV730);
297 TRANSLATE_CHIP(RV710);
298 TRANSLATE_CHIP(RV740);
299 TRANSLATE_CHIP(CEDAR);
300 TRANSLATE_CHIP(REDWOOD);
301 TRANSLATE_CHIP(JUNIPER);
302 TRANSLATE_CHIP(CYPRESS);
303 TRANSLATE_CHIP(HEMLOCK);
304 TRANSLATE_CHIP(PALM);
305 TRANSLATE_CHIP(SUMO);
306 TRANSLATE_CHIP(SUMO2);
307 TRANSLATE_CHIP(BARTS);
308 TRANSLATE_CHIP(TURKS);
309 TRANSLATE_CHIP(CAICOS);
310 TRANSLATE_CHIP(CAYMAN);
311 TRANSLATE_CHIP(ARUBA);
312 #undef TRANSLATE_CHIP
313
314 default:
315 assert(!"unknown chip");
316 return HW_CHIP_UNKNOWN;
317 }
318 }
319
320 static sb_hw_class translate_chip_class(enum chip_class cc) {
321 switch(cc) {
322 case R600: return HW_CLASS_R600;
323 case R700: return HW_CLASS_R700;
324 case EVERGREEN: return HW_CLASS_EVERGREEN;
325 case CAYMAN: return HW_CLASS_CAYMAN;
326
327 default:
328 assert(!"unknown chip class");
329 return HW_CLASS_UNKNOWN;
330 }
331 }