2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
27 #define SB_RA_SCHED_CHECK DEBUG
29 #include "os/os_time.h"
30 #include "r600_pipe.h"
31 #include "r600_shader.h"
33 #include "sb_public.h"
39 #include "sb_shader.h"
43 using namespace r600_sb
;
45 static sb_hw_class
translate_chip_class(enum chip_class cc
);
46 static sb_hw_chip
translate_chip(enum radeon_family rf
);
48 sb_context
*r600_sb_context_create(struct r600_context
*rctx
) {
50 sb_context
*sctx
= new sb_context();
52 if (sctx
->init(rctx
->isa
, translate_chip(rctx
->b
.family
),
53 translate_chip_class(rctx
->b
.chip_class
))) {
58 unsigned df
= rctx
->screen
->b
.debug_flags
;
60 sb_context::dump_pass
= df
& DBG_SB_DUMP
;
61 sb_context::dump_stat
= df
& DBG_SB_STAT
;
62 sb_context::dry_run
= df
& DBG_SB_DRY_RUN
;
63 sb_context::no_fallback
= df
& DBG_SB_NO_FALLBACK
;
64 sb_context::safe_math
= df
& DBG_SB_SAFEMATH
;
66 sb_context::dskip_start
= debug_get_num_option("R600_SB_DSKIP_START", 0);
67 sb_context::dskip_end
= debug_get_num_option("R600_SB_DSKIP_END", 0);
68 sb_context::dskip_mode
= debug_get_num_option("R600_SB_DSKIP_MODE", 0);
73 void r600_sb_context_destroy(void * sctx
) {
75 sb_context
*ctx
= static_cast<sb_context
*>(sctx
);
77 if (sb_context::dump_stat
) {
78 sblog
<< "\ncontext src stats: ";
79 ctx
->src_stats
.dump();
80 sblog
<< "context opt stats: ";
81 ctx
->opt_stats
.dump();
82 sblog
<< "context diff: ";
83 ctx
->src_stats
.dump_diff(ctx
->opt_stats
);
90 int r600_sb_bytecode_process(struct r600_context
*rctx
,
91 struct r600_bytecode
*bc
,
92 struct r600_shader
*pshader
,
96 unsigned shader_id
= bc
->debug_id
;
98 sb_context
*ctx
= (sb_context
*)rctx
->sb_context
;
100 rctx
->sb_context
= ctx
= r600_sb_context_create(rctx
);
103 int64_t time_start
= 0;
104 if (sb_context::dump_stat
) {
105 time_start
= os_time_get_nano();
108 SB_DUMP_STAT( sblog
<< "\nsb: shader " << shader_id
<< "\n"; );
110 bc_parser
parser(*ctx
, bc
, pshader
);
112 if ((r
= parser
.decode())) {
113 assert(!"sb: bytecode decoding error");
117 shader
*sh
= parser
.get_shader();
120 bc_dump(*sh
, bc
->bytecode
, bc
->ndw
).run();
128 if (sh
->target
!= TARGET_FETCH
) {
129 sh
->src_stats
.ndw
= bc
->ndw
;
130 sh
->collect_stats(false);
133 /* skip some shaders (use shaders from default backend)
134 * dskip_start - range start, dskip_end - range_end,
135 * e.g. start = 5, end = 6 means shaders 5 & 6
137 * dskip_mode == 0 - disabled,
138 * dskip_mode == 1 - don't process the shaders from the [start;end] range
139 * dskip_mode == 2 - process only the shaders from the range
141 if (sb_context::dskip_mode
) {
142 if ((sb_context::dskip_start
<= shader_id
&&
143 shader_id
<= sb_context::dskip_end
) ==
144 (sb_context::dskip_mode
== 1)) {
145 sblog
<< "sb: skipped shader " << shader_id
<< " : " << "["
146 << sb_context::dskip_start
<< "; "
147 << sb_context::dskip_end
<< "] mode "
148 << sb_context::dskip_mode
<< "\n";
153 if ((r
= parser
.prepare())) {
154 assert(!"sb: bytecode parsing error");
158 SB_DUMP_PASS( sblog
<< "\n\n###### after parse\n"; sh
->dump_ir(); );
160 #define SB_RUN_PASS(n, dump) \
164 sblog << "sb: error (" << r << ") in the " << #n << " pass.\n"; \
165 if (sb_context::no_fallback) \
167 sblog << "sb: using unoptimized bytecode...\n"; \
172 SB_DUMP_PASS( sblog << "\n\n###### after " << #n << "\n"; \
178 SB_RUN_PASS(ssa_prepare
, 0);
179 SB_RUN_PASS(ssa_rename
, 1);
181 if (sh
->has_alu_predication
)
182 SB_RUN_PASS(psi_ops
, 1);
184 SB_RUN_PASS(liveness
, 0);
186 sh
->dce_flags
= DF_REMOVE_DEAD
| DF_EXPAND
;
187 SB_RUN_PASS(dce_cleanup
, 0);
188 SB_RUN_PASS(def_use
, 0);
190 sh
->set_undef(sh
->root
->live_before
);
192 // if conversion breaks the dependency tracking between CF_EMIT ops when it removes
193 // the phi nodes for SV_GEOMETRY_EMIT. Just disable it for GS
194 if (sh
->target
!= TARGET_GS
)
195 SB_RUN_PASS(if_conversion
, 1);
197 // if_conversion breaks info about uses, but next pass (peephole)
198 // doesn't need it, so we can skip def/use update here
199 // until it's really required
200 //SB_RUN_PASS(def_use, 0);
202 SB_RUN_PASS(peephole
, 1);
203 SB_RUN_PASS(def_use
, 0);
207 SB_RUN_PASS(def_use
, 1);
209 sh
->dce_flags
= DF_REMOVE_DEAD
| DF_REMOVE_UNUSED
;
210 SB_RUN_PASS(dce_cleanup
, 1);
212 SB_RUN_PASS(ra_split
, 0);
213 SB_RUN_PASS(def_use
, 0);
215 // create 'basic blocks'. it's not like we build CFG, they are just
216 // container nodes in the correct locations for code placement
221 sh
->compute_interferences
= true;
222 SB_RUN_PASS(liveness
, 0);
224 sh
->dce_flags
= DF_REMOVE_DEAD
;
225 SB_RUN_PASS(dce_cleanup
, 1);
227 SB_RUN_PASS(ra_coalesce
, 1);
228 SB_RUN_PASS(ra_init
, 1);
230 SB_RUN_PASS(post_scheduler
, 1);
234 #if SB_RA_SCHED_CHECK
235 // check code correctness after regalloc/scheduler
236 SB_RUN_PASS(ra_checker
, 0);
239 SB_RUN_PASS(bc_finalizer
, 0);
241 sh
->optimized
= true;
243 bc_builder
builder(*sh
);
245 if ((r
= builder
.build())) {
250 bytecode
&nbc
= builder
.get_bytecode();
253 bc_dump(*sh
, &nbc
).run();
256 if (!sb_context::dry_run
) {
260 bc
->bytecode
= (uint32_t*) malloc(bc
->ndw
<< 2);
261 nbc
.write_data(bc
->bytecode
);
264 bc
->nstack
= sh
->nstack
;
266 SB_DUMP_STAT( sblog
<< "sb: dry run: optimized bytecode is not used\n"; );
269 if (sb_context::dump_stat
) {
270 int64_t t
= os_time_get_nano() - time_start
;
272 sblog
<< "sb: processing shader " << shader_id
<< " done ( "
273 << ((double)t
)/1000000.0 << " ms ).\n";
275 sh
->opt_stats
.ndw
= bc
->ndw
;
276 sh
->collect_stats(true);
278 sblog
<< "src stats: ";
279 sh
->src_stats
.dump();
280 sblog
<< "opt stats: ";
281 sh
->opt_stats
.dump();
283 sh
->src_stats
.dump_diff(sh
->opt_stats
);
290 static sb_hw_chip
translate_chip(enum radeon_family rf
) {
293 #define TRANSLATE_CHIP(c) case CHIP_##c: return HW_CHIP_##c
294 TRANSLATE_CHIP(R600
);
295 TRANSLATE_CHIP(RV610
);
296 TRANSLATE_CHIP(RV630
);
297 TRANSLATE_CHIP(RV670
);
298 TRANSLATE_CHIP(RV620
);
299 TRANSLATE_CHIP(RV635
);
300 TRANSLATE_CHIP(RS780
);
301 TRANSLATE_CHIP(RS880
);
302 TRANSLATE_CHIP(RV770
);
303 TRANSLATE_CHIP(RV730
);
304 TRANSLATE_CHIP(RV710
);
305 TRANSLATE_CHIP(RV740
);
306 TRANSLATE_CHIP(CEDAR
);
307 TRANSLATE_CHIP(REDWOOD
);
308 TRANSLATE_CHIP(JUNIPER
);
309 TRANSLATE_CHIP(CYPRESS
);
310 TRANSLATE_CHIP(HEMLOCK
);
311 TRANSLATE_CHIP(PALM
);
312 TRANSLATE_CHIP(SUMO
);
313 TRANSLATE_CHIP(SUMO2
);
314 TRANSLATE_CHIP(BARTS
);
315 TRANSLATE_CHIP(TURKS
);
316 TRANSLATE_CHIP(CAICOS
);
317 TRANSLATE_CHIP(CAYMAN
);
318 TRANSLATE_CHIP(ARUBA
);
319 #undef TRANSLATE_CHIP
322 assert(!"unknown chip");
323 return HW_CHIP_UNKNOWN
;
327 static sb_hw_class
translate_chip_class(enum chip_class cc
) {
329 case R600
: return HW_CLASS_R600
;
330 case R700
: return HW_CLASS_R700
;
331 case EVERGREEN
: return HW_CLASS_EVERGREEN
;
332 case CAYMAN
: return HW_CLASS_CAYMAN
;
335 assert(!"unknown chip class");
336 return HW_CLASS_UNKNOWN
;