b919fa419d4e2c31cde9aeed00a7ddd1a60eb641
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
27 #define SB_RA_SCHED_CHECK DEBUG
30 #include "os/os_time.h"
31 #include "r600_pipe.h"
32 #include "r600_shader.h"
34 #include "sb_public.h"
42 #include "sb_shader.h"
46 using namespace r600_sb
;
50 static sb_hw_class
translate_chip_class(enum chip_class cc
);
51 static sb_hw_chip
translate_chip(enum radeon_family rf
);
53 sb_context
*r600_sb_context_create(struct r600_context
*rctx
) {
55 sb_context
*sctx
= new sb_context();
57 if (sctx
->init(rctx
->isa
, translate_chip(rctx
->family
),
58 translate_chip_class(rctx
->chip_class
))) {
63 unsigned df
= rctx
->screen
->debug_flags
;
65 sb_context::dump_pass
= df
& DBG_SB_DUMP
;
66 sb_context::dump_stat
= df
& DBG_SB_STAT
;
67 sb_context::dry_run
= df
& DBG_SB_DRY_RUN
;
68 sb_context::no_fallback
= df
& DBG_SB_NO_FALLBACK
;
70 sb_context::dskip_start
= debug_get_num_option("R600_SB_DSKIP_START", 0);
71 sb_context::dskip_end
= debug_get_num_option("R600_SB_DSKIP_END", 0);
72 sb_context::dskip_mode
= debug_get_num_option("R600_SB_DSKIP_MODE", 0);
77 void r600_sb_context_destroy(void * sctx
) {
79 sb_context
*ctx
= static_cast<sb_context
*>(sctx
);
81 if (sb_context::dump_stat
) {
82 cerr
<< "context src stats: ";
83 ctx
->src_stats
.dump(cerr
);
84 cerr
<< "context opt stats: ";
85 ctx
->opt_stats
.dump(cerr
);
86 cerr
<< "context diff: ";
87 ctx
->src_stats
.dump_diff(cerr
, ctx
->opt_stats
);
94 int r600_sb_bytecode_process(struct r600_context
*rctx
,
95 struct r600_bytecode
*bc
,
96 struct r600_shader
*pshader
,
97 int dump_source_bytecode
,
100 unsigned shader_id
= bc
->debug_id
;
102 sb_context
*ctx
= (sb_context
*)rctx
->sb_context
;
104 rctx
->sb_context
= ctx
= r600_sb_context_create(rctx
);
107 int64_t time_start
= 0;
108 if (sb_context::dump_stat
) {
109 time_start
= os_time_get_nano();
112 SB_DUMP_STAT( cerr
<< "\nsb: shader " << shader_id
<< "\n"; );
114 bc_parser
parser(*ctx
, bc
, pshader
, dump_source_bytecode
, optimize
);
116 if ((r
= parser
.parse())) {
121 /* skip some shaders (use shaders from default backend)
122 * dskip_start - range start, dskip_end - range_end,
123 * e.g. start = 5, end = 6 means shaders 5 & 6
125 * dskip_mode == 0 - disabled,
126 * dskip_mode == 1 - don't process the shaders from the [start;end] range
127 * dskip_mode == 2 - process only the shaders from the range
129 if (sb_context::dskip_mode
) {
130 if ((sb_context::dskip_start
<= shader_id
&&
131 shader_id
<= sb_context::dskip_end
) ==
132 (sb_context::dskip_mode
== 1)) {
133 cerr
<< "sb: skipped shader " << shader_id
<< " : " << "["
134 << sb_context::dskip_start
<< "; "
135 << sb_context::dskip_end
<< "] mode "
136 << sb_context::dskip_mode
<< "\n";
141 shader
*sh
= parser
.get_shader();
142 SB_DUMP_PASS( cerr
<< "\n\n###### after parse\n"; sh
->dump_ir(); );
149 #define SB_RUN_PASS(n, dump) \
153 cerr << "sb: error (" << r << ") in the " << #n << " pass.\n"; \
154 if (sb_context::no_fallback) \
156 cerr << "sb: using unoptimized bytecode...\n"; \
161 SB_DUMP_PASS( cerr << "\n\n###### after " << #n << "\n"; \
167 SB_RUN_PASS(ssa_prepare
, 0);
168 SB_RUN_PASS(ssa_rename
, 1);
170 if (sh
->has_alu_predication
)
171 SB_RUN_PASS(psi_ops
, 1);
173 SB_RUN_PASS(liveness
, 0);
174 SB_RUN_PASS(dce_cleanup
, 0);
175 SB_RUN_PASS(def_use
, 0);
177 sh
->set_undef(sh
->root
->live_before
);
179 SB_RUN_PASS(peephole
, 1);
180 SB_RUN_PASS(if_conversion
, 1);
182 SB_RUN_PASS(def_use
, 0);
186 SB_RUN_PASS(liveness
, 0);
187 SB_RUN_PASS(dce_cleanup
, 1);
188 SB_RUN_PASS(def_use
, 0);
190 SB_RUN_PASS(ra_split
, 0);
191 SB_RUN_PASS(def_use
, 0);
193 // create 'basic blocks'. it's not like we build CFG, they are just
194 // container nodes in the correct locations for code placement
199 sh
->compute_interferences
= true;
200 SB_RUN_PASS(liveness
, 0);
202 SB_RUN_PASS(ra_coalesce
, 1);
203 SB_RUN_PASS(ra_init
, 1);
205 SB_RUN_PASS(post_scheduler
, 1);
209 #if SB_RA_SCHED_CHECK
210 // check code correctness after regalloc/scheduler
211 SB_RUN_PASS(ra_checker
, 0);
214 SB_RUN_PASS(bc_finalizer
, 0);
216 sh
->optimized
= true;
218 bc_builder
builder(*sh
);
220 if ((r
= builder
.build())) {
225 if (!sb_context::dry_run
) {
226 bytecode
&nbc
= builder
.get_bytecode();
230 bc
->bytecode
= (uint32_t*) malloc(bc
->ndw
<< 2);
231 nbc
.write_data(bc
->bytecode
);
234 bc
->nstack
= sh
->nstack
;
236 SB_DUMP_STAT( cerr
<< "SB_USE_NEW_BYTECODE is not enabled\n"; );
240 if (sb_context::dump_stat
) {
241 int64_t t
= os_time_get_nano() - time_start
;
243 cerr
<< "sb: processing shader " << shader_id
<< " done ( "
244 << ((double)t
)/1000000.0 << " ms ).\n";
246 sh
->opt_stats
.ndw
= bc
->ndw
;
247 sh
->collect_stats(true);
249 cerr
<< "src stats: ";
250 sh
->src_stats
.dump(cerr
);
251 cerr
<< "opt stats: ";
252 sh
->opt_stats
.dump(cerr
);
254 sh
->src_stats
.dump_diff(cerr
, sh
->opt_stats
);
261 static sb_hw_chip
translate_chip(enum radeon_family rf
) {
264 #define TRANSLATE_CHIP(c) case CHIP_##c: return HW_CHIP_##c
265 TRANSLATE_CHIP(R600
);
266 TRANSLATE_CHIP(RV610
);
267 TRANSLATE_CHIP(RV630
);
268 TRANSLATE_CHIP(RV670
);
269 TRANSLATE_CHIP(RV620
);
270 TRANSLATE_CHIP(RV635
);
271 TRANSLATE_CHIP(RS780
);
272 TRANSLATE_CHIP(RS880
);
273 TRANSLATE_CHIP(RV770
);
274 TRANSLATE_CHIP(RV730
);
275 TRANSLATE_CHIP(RV710
);
276 TRANSLATE_CHIP(RV740
);
277 TRANSLATE_CHIP(CEDAR
);
278 TRANSLATE_CHIP(REDWOOD
);
279 TRANSLATE_CHIP(JUNIPER
);
280 TRANSLATE_CHIP(CYPRESS
);
281 TRANSLATE_CHIP(HEMLOCK
);
282 TRANSLATE_CHIP(PALM
);
283 TRANSLATE_CHIP(SUMO
);
284 TRANSLATE_CHIP(SUMO2
);
285 TRANSLATE_CHIP(BARTS
);
286 TRANSLATE_CHIP(TURKS
);
287 TRANSLATE_CHIP(CAICOS
);
288 TRANSLATE_CHIP(CAYMAN
);
289 #undef TRANSLATE_CHIP
292 assert(!"unknown chip");
293 return HW_CHIP_UNKNOWN
;
297 static sb_hw_class
translate_chip_class(enum chip_class cc
) {
299 case R600
: return HW_CLASS_R600
;
300 case R700
: return HW_CLASS_R700
;
301 case EVERGREEN
: return HW_CLASS_EVERGREEN
;
302 case CAYMAN
: return HW_CLASS_CAYMAN
;
305 assert(!"unknown chip class");
306 return HW_CLASS_UNKNOWN
;