2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
27 #define SB_RA_SCHED_CHECK DEBUG
30 #include "os/os_time.h"
31 #include "r600_pipe.h"
32 #include "r600_shader.h"
34 #include "sb_public.h"
41 #include "sb_shader.h"
45 using namespace r600_sb
;
47 static sb_hw_class
translate_chip_class(enum chip_class cc
);
48 static sb_hw_chip
translate_chip(enum radeon_family rf
);
50 sb_context
*r600_sb_context_create(struct r600_context
*rctx
) {
52 sb_context
*sctx
= new sb_context();
54 if (sctx
->init(rctx
->isa
, translate_chip(rctx
->family
),
55 translate_chip_class(rctx
->chip_class
))) {
60 unsigned df
= rctx
->screen
->debug_flags
;
62 sb_context::dump_pass
= df
& DBG_SB_DUMP
;
63 sb_context::dump_stat
= df
& DBG_SB_STAT
;
64 sb_context::dry_run
= df
& DBG_SB_DRY_RUN
;
65 sb_context::no_fallback
= df
& DBG_SB_NO_FALLBACK
;
66 sb_context::safe_math
= df
& DBG_SB_SAFEMATH
;
68 sb_context::dskip_start
= debug_get_num_option("R600_SB_DSKIP_START", 0);
69 sb_context::dskip_end
= debug_get_num_option("R600_SB_DSKIP_END", 0);
70 sb_context::dskip_mode
= debug_get_num_option("R600_SB_DSKIP_MODE", 0);
75 void r600_sb_context_destroy(void * sctx
) {
77 sb_context
*ctx
= static_cast<sb_context
*>(sctx
);
79 if (sb_context::dump_stat
) {
80 sblog
<< "\ncontext src stats: ";
81 ctx
->src_stats
.dump();
82 sblog
<< "context opt stats: ";
83 ctx
->opt_stats
.dump();
84 sblog
<< "context diff: ";
85 ctx
->src_stats
.dump_diff(ctx
->opt_stats
);
92 int r600_sb_bytecode_process(struct r600_context
*rctx
,
93 struct r600_bytecode
*bc
,
94 struct r600_shader
*pshader
,
98 unsigned shader_id
= bc
->debug_id
;
100 sb_context
*ctx
= (sb_context
*)rctx
->sb_context
;
102 rctx
->sb_context
= ctx
= r600_sb_context_create(rctx
);
105 int64_t time_start
= 0;
106 if (sb_context::dump_stat
) {
107 time_start
= os_time_get_nano();
110 SB_DUMP_STAT( sblog
<< "\nsb: shader " << shader_id
<< "\n"; );
112 bc_parser
parser(*ctx
, bc
, pshader
);
114 if ((r
= parser
.decode())) {
115 assert(!"sb: bytecode decoding error");
119 shader
*sh
= parser
.get_shader();
122 bc_dump(*sh
, bc
->bytecode
, bc
->ndw
).run();
130 if (sh
->target
!= TARGET_FETCH
) {
131 sh
->src_stats
.ndw
= bc
->ndw
;
132 sh
->collect_stats(false);
135 /* skip some shaders (use shaders from default backend)
136 * dskip_start - range start, dskip_end - range_end,
137 * e.g. start = 5, end = 6 means shaders 5 & 6
139 * dskip_mode == 0 - disabled,
140 * dskip_mode == 1 - don't process the shaders from the [start;end] range
141 * dskip_mode == 2 - process only the shaders from the range
143 if (sb_context::dskip_mode
) {
144 if ((sb_context::dskip_start
<= shader_id
&&
145 shader_id
<= sb_context::dskip_end
) ==
146 (sb_context::dskip_mode
== 1)) {
147 sblog
<< "sb: skipped shader " << shader_id
<< " : " << "["
148 << sb_context::dskip_start
<< "; "
149 << sb_context::dskip_end
<< "] mode "
150 << sb_context::dskip_mode
<< "\n";
155 if ((r
= parser
.prepare())) {
156 assert(!"sb: bytecode parsing error");
160 SB_DUMP_PASS( sblog
<< "\n\n###### after parse\n"; sh
->dump_ir(); );
162 #define SB_RUN_PASS(n, dump) \
166 sblog << "sb: error (" << r << ") in the " << #n << " pass.\n"; \
167 if (sb_context::no_fallback) \
169 sblog << "sb: using unoptimized bytecode...\n"; \
174 SB_DUMP_PASS( sblog << "\n\n###### after " << #n << "\n"; \
180 SB_RUN_PASS(ssa_prepare
, 0);
181 SB_RUN_PASS(ssa_rename
, 1);
183 if (sh
->has_alu_predication
)
184 SB_RUN_PASS(psi_ops
, 1);
186 SB_RUN_PASS(liveness
, 0);
187 SB_RUN_PASS(dce_cleanup
, 0);
188 SB_RUN_PASS(def_use
, 0);
190 sh
->set_undef(sh
->root
->live_before
);
192 SB_RUN_PASS(if_conversion
, 1);
194 // if_conversion breaks info about uses, but next pass (peephole)
195 // doesn't need it, so we can skip def/use update here
196 // until it's really required
197 //SB_RUN_PASS(def_use, 0);
199 SB_RUN_PASS(peephole
, 1);
200 SB_RUN_PASS(def_use
, 0);
204 SB_RUN_PASS(liveness
, 0);
205 SB_RUN_PASS(dce_cleanup
, 1);
206 SB_RUN_PASS(def_use
, 0);
208 SB_RUN_PASS(ra_split
, 0);
209 SB_RUN_PASS(def_use
, 0);
211 // create 'basic blocks'. it's not like we build CFG, they are just
212 // container nodes in the correct locations for code placement
217 sh
->compute_interferences
= true;
218 SB_RUN_PASS(liveness
, 0);
220 SB_RUN_PASS(ra_coalesce
, 1);
221 SB_RUN_PASS(ra_init
, 1);
223 SB_RUN_PASS(post_scheduler
, 1);
227 #if SB_RA_SCHED_CHECK
228 // check code correctness after regalloc/scheduler
229 SB_RUN_PASS(ra_checker
, 0);
232 SB_RUN_PASS(bc_finalizer
, 0);
234 sh
->optimized
= true;
236 bc_builder
builder(*sh
);
238 if ((r
= builder
.build())) {
243 bytecode
&nbc
= builder
.get_bytecode();
246 bc_dump(*sh
, &nbc
).run();
249 if (!sb_context::dry_run
) {
253 bc
->bytecode
= (uint32_t*) malloc(bc
->ndw
<< 2);
254 nbc
.write_data(bc
->bytecode
);
257 bc
->nstack
= sh
->nstack
;
259 SB_DUMP_STAT( sblog
<< "sb: dry run: optimized bytecode is not used\n"; );
262 if (sb_context::dump_stat
) {
263 int64_t t
= os_time_get_nano() - time_start
;
265 sblog
<< "sb: processing shader " << shader_id
<< " done ( "
266 << ((double)t
)/1000000.0 << " ms ).\n";
268 sh
->opt_stats
.ndw
= bc
->ndw
;
269 sh
->collect_stats(true);
271 sblog
<< "src stats: ";
272 sh
->src_stats
.dump();
273 sblog
<< "opt stats: ";
274 sh
->opt_stats
.dump();
276 sh
->src_stats
.dump_diff(sh
->opt_stats
);
283 static sb_hw_chip
translate_chip(enum radeon_family rf
) {
286 #define TRANSLATE_CHIP(c) case CHIP_##c: return HW_CHIP_##c
287 TRANSLATE_CHIP(R600
);
288 TRANSLATE_CHIP(RV610
);
289 TRANSLATE_CHIP(RV630
);
290 TRANSLATE_CHIP(RV670
);
291 TRANSLATE_CHIP(RV620
);
292 TRANSLATE_CHIP(RV635
);
293 TRANSLATE_CHIP(RS780
);
294 TRANSLATE_CHIP(RS880
);
295 TRANSLATE_CHIP(RV770
);
296 TRANSLATE_CHIP(RV730
);
297 TRANSLATE_CHIP(RV710
);
298 TRANSLATE_CHIP(RV740
);
299 TRANSLATE_CHIP(CEDAR
);
300 TRANSLATE_CHIP(REDWOOD
);
301 TRANSLATE_CHIP(JUNIPER
);
302 TRANSLATE_CHIP(CYPRESS
);
303 TRANSLATE_CHIP(HEMLOCK
);
304 TRANSLATE_CHIP(PALM
);
305 TRANSLATE_CHIP(SUMO
);
306 TRANSLATE_CHIP(SUMO2
);
307 TRANSLATE_CHIP(BARTS
);
308 TRANSLATE_CHIP(TURKS
);
309 TRANSLATE_CHIP(CAICOS
);
310 TRANSLATE_CHIP(CAYMAN
);
311 TRANSLATE_CHIP(ARUBA
);
312 #undef TRANSLATE_CHIP
315 assert(!"unknown chip");
316 return HW_CHIP_UNKNOWN
;
320 static sb_hw_class
translate_chip_class(enum chip_class cc
) {
322 case R600
: return HW_CLASS_R600
;
323 case R700
: return HW_CLASS_R700
;
324 case EVERGREEN
: return HW_CLASS_EVERGREEN
;
325 case CAYMAN
: return HW_CLASS_CAYMAN
;
328 assert(!"unknown chip class");
329 return HW_CLASS_UNKNOWN
;