b919fa419d4e2c31cde9aeed00a7ddd1a60eb641
[mesa.git] / src / gallium / drivers / r600 / sb / sb_core.cpp
1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define SB_RA_SCHED_CHECK DEBUG
28
29 extern "C" {
30 #include "os/os_time.h"
31 #include "r600_pipe.h"
32 #include "r600_shader.h"
33
34 #include "sb_public.h"
35 }
36
37 #include <stack>
38 #include <map>
39 #include <iostream>
40
41 #include "sb_bc.h"
42 #include "sb_shader.h"
43 #include "sb_pass.h"
44 #include "sb_sched.h"
45
46 using namespace r600_sb;
47
48 using std::cerr;
49
50 static sb_hw_class translate_chip_class(enum chip_class cc);
51 static sb_hw_chip translate_chip(enum radeon_family rf);
52
53 sb_context *r600_sb_context_create(struct r600_context *rctx) {
54
55 sb_context *sctx = new sb_context();
56
57 if (sctx->init(rctx->isa, translate_chip(rctx->family),
58 translate_chip_class(rctx->chip_class))) {
59 delete sctx;
60 sctx = NULL;
61 }
62
63 unsigned df = rctx->screen->debug_flags;
64
65 sb_context::dump_pass = df & DBG_SB_DUMP;
66 sb_context::dump_stat = df & DBG_SB_STAT;
67 sb_context::dry_run = df & DBG_SB_DRY_RUN;
68 sb_context::no_fallback = df & DBG_SB_NO_FALLBACK;
69
70 sb_context::dskip_start = debug_get_num_option("R600_SB_DSKIP_START", 0);
71 sb_context::dskip_end = debug_get_num_option("R600_SB_DSKIP_END", 0);
72 sb_context::dskip_mode = debug_get_num_option("R600_SB_DSKIP_MODE", 0);
73
74 return sctx;
75 }
76
77 void r600_sb_context_destroy(void * sctx) {
78 if (sctx) {
79 sb_context *ctx = static_cast<sb_context*>(sctx);
80
81 if (sb_context::dump_stat) {
82 cerr << "context src stats: ";
83 ctx->src_stats.dump(cerr);
84 cerr << "context opt stats: ";
85 ctx->opt_stats.dump(cerr);
86 cerr << "context diff: ";
87 ctx->src_stats.dump_diff(cerr, ctx->opt_stats);
88 }
89
90 delete ctx;
91 }
92 }
93
94 int r600_sb_bytecode_process(struct r600_context *rctx,
95 struct r600_bytecode *bc,
96 struct r600_shader *pshader,
97 int dump_source_bytecode,
98 int optimize) {
99 int r = 0;
100 unsigned shader_id = bc->debug_id;
101
102 sb_context *ctx = (sb_context *)rctx->sb_context;
103 if (!ctx) {
104 rctx->sb_context = ctx = r600_sb_context_create(rctx);
105 }
106
107 int64_t time_start = 0;
108 if (sb_context::dump_stat) {
109 time_start = os_time_get_nano();
110 }
111
112 SB_DUMP_STAT( cerr << "\nsb: shader " << shader_id << "\n"; );
113
114 bc_parser parser(*ctx, bc, pshader, dump_source_bytecode, optimize);
115
116 if ((r = parser.parse())) {
117 assert(0);
118 return r;
119 }
120
121 /* skip some shaders (use shaders from default backend)
122 * dskip_start - range start, dskip_end - range_end,
123 * e.g. start = 5, end = 6 means shaders 5 & 6
124 *
125 * dskip_mode == 0 - disabled,
126 * dskip_mode == 1 - don't process the shaders from the [start;end] range
127 * dskip_mode == 2 - process only the shaders from the range
128 */
129 if (sb_context::dskip_mode) {
130 if ((sb_context::dskip_start <= shader_id &&
131 shader_id <= sb_context::dskip_end) ==
132 (sb_context::dskip_mode == 1)) {
133 cerr << "sb: skipped shader " << shader_id << " : " << "["
134 << sb_context::dskip_start << "; "
135 << sb_context::dskip_end << "] mode "
136 << sb_context::dskip_mode << "\n";
137 return 0;
138 }
139 }
140
141 shader *sh = parser.get_shader();
142 SB_DUMP_PASS( cerr << "\n\n###### after parse\n"; sh->dump_ir(); );
143
144 if (!optimize) {
145 delete sh;
146 return 0;
147 }
148
149 #define SB_RUN_PASS(n, dump) \
150 do { \
151 r = n(*sh).run(); \
152 if (r) { \
153 cerr << "sb: error (" << r << ") in the " << #n << " pass.\n"; \
154 if (sb_context::no_fallback) \
155 return r; \
156 cerr << "sb: using unoptimized bytecode...\n"; \
157 delete sh; \
158 return 0; \
159 } \
160 if (dump) { \
161 SB_DUMP_PASS( cerr << "\n\n###### after " << #n << "\n"; \
162 sh->dump_ir();); \
163 } \
164 assert(!r); \
165 } while (0)
166
167 SB_RUN_PASS(ssa_prepare, 0);
168 SB_RUN_PASS(ssa_rename, 1);
169
170 if (sh->has_alu_predication)
171 SB_RUN_PASS(psi_ops, 1);
172
173 SB_RUN_PASS(liveness, 0);
174 SB_RUN_PASS(dce_cleanup, 0);
175 SB_RUN_PASS(def_use, 0);
176
177 sh->set_undef(sh->root->live_before);
178
179 SB_RUN_PASS(peephole, 1);
180 SB_RUN_PASS(if_conversion, 1);
181
182 SB_RUN_PASS(def_use, 0);
183
184 SB_RUN_PASS(gvn, 1);
185
186 SB_RUN_PASS(liveness, 0);
187 SB_RUN_PASS(dce_cleanup, 1);
188 SB_RUN_PASS(def_use, 0);
189
190 SB_RUN_PASS(ra_split, 0);
191 SB_RUN_PASS(def_use, 0);
192
193 // create 'basic blocks'. it's not like we build CFG, they are just
194 // container nodes in the correct locations for code placement
195 sh->create_bbs();
196
197 SB_RUN_PASS(gcm, 1);
198
199 sh->compute_interferences = true;
200 SB_RUN_PASS(liveness, 0);
201
202 SB_RUN_PASS(ra_coalesce, 1);
203 SB_RUN_PASS(ra_init, 1);
204
205 SB_RUN_PASS(post_scheduler, 1);
206
207 sh->expand_bbs();
208
209 #if SB_RA_SCHED_CHECK
210 // check code correctness after regalloc/scheduler
211 SB_RUN_PASS(ra_checker, 0);
212 #endif
213
214 SB_RUN_PASS(bc_finalizer, 0);
215
216 sh->optimized = true;
217
218 bc_builder builder(*sh);
219
220 if ((r = builder.build())) {
221 assert(0);
222 return r;
223 }
224
225 if (!sb_context::dry_run) {
226 bytecode &nbc = builder.get_bytecode();
227
228 free(bc->bytecode);
229 bc->ndw = nbc.ndw();
230 bc->bytecode = (uint32_t*) malloc(bc->ndw << 2);
231 nbc.write_data(bc->bytecode);
232
233 bc->ngpr = sh->ngpr;
234 bc->nstack = sh->nstack;
235 } else {
236 SB_DUMP_STAT( cerr << "SB_USE_NEW_BYTECODE is not enabled\n"; );
237 }
238
239
240 if (sb_context::dump_stat) {
241 int64_t t = os_time_get_nano() - time_start;
242
243 cerr << "sb: processing shader " << shader_id << " done ( "
244 << ((double)t)/1000000.0 << " ms ).\n";
245
246 sh->opt_stats.ndw = bc->ndw;
247 sh->collect_stats(true);
248
249 cerr << "src stats: ";
250 sh->src_stats.dump(cerr);
251 cerr << "opt stats: ";
252 sh->opt_stats.dump(cerr);
253 cerr << "diff: ";
254 sh->src_stats.dump_diff(cerr, sh->opt_stats);
255 }
256
257 delete sh;
258 return 0;
259 }
260
261 static sb_hw_chip translate_chip(enum radeon_family rf) {
262 switch (rf) {
263
264 #define TRANSLATE_CHIP(c) case CHIP_##c: return HW_CHIP_##c
265 TRANSLATE_CHIP(R600);
266 TRANSLATE_CHIP(RV610);
267 TRANSLATE_CHIP(RV630);
268 TRANSLATE_CHIP(RV670);
269 TRANSLATE_CHIP(RV620);
270 TRANSLATE_CHIP(RV635);
271 TRANSLATE_CHIP(RS780);
272 TRANSLATE_CHIP(RS880);
273 TRANSLATE_CHIP(RV770);
274 TRANSLATE_CHIP(RV730);
275 TRANSLATE_CHIP(RV710);
276 TRANSLATE_CHIP(RV740);
277 TRANSLATE_CHIP(CEDAR);
278 TRANSLATE_CHIP(REDWOOD);
279 TRANSLATE_CHIP(JUNIPER);
280 TRANSLATE_CHIP(CYPRESS);
281 TRANSLATE_CHIP(HEMLOCK);
282 TRANSLATE_CHIP(PALM);
283 TRANSLATE_CHIP(SUMO);
284 TRANSLATE_CHIP(SUMO2);
285 TRANSLATE_CHIP(BARTS);
286 TRANSLATE_CHIP(TURKS);
287 TRANSLATE_CHIP(CAICOS);
288 TRANSLATE_CHIP(CAYMAN);
289 #undef TRANSLATE_CHIP
290
291 default:
292 assert(!"unknown chip");
293 return HW_CHIP_UNKNOWN;
294 }
295 }
296
297 static sb_hw_class translate_chip_class(enum chip_class cc) {
298 switch(cc) {
299 case R600: return HW_CLASS_R600;
300 case R700: return HW_CLASS_R700;
301 case EVERGREEN: return HW_CLASS_EVERGREEN;
302 case CAYMAN: return HW_CLASS_CAYMAN;
303
304 default:
305 assert(!"unknown chip class");
306 return HW_CLASS_UNKNOWN;
307 }
308 }