lima/gpir: Optimize conditional break/continue
[mesa.git] / src / gallium / drivers / lima / lima_program.c
1 /*
2 * Copyright (c) 2017-2019 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include "util/u_memory.h"
26 #include "util/ralloc.h"
27 #include "util/u_debug.h"
28
29 #include "tgsi/tgsi_dump.h"
30 #include "compiler/nir/nir.h"
31 #include "nir/tgsi_to_nir.h"
32
33 #include "pipe/p_state.h"
34
35 #include "lima_screen.h"
36 #include "lima_context.h"
37 #include "lima_job.h"
38 #include "lima_program.h"
39 #include "lima_bo.h"
40 #include "ir/lima_ir.h"
41
42 static const nir_shader_compiler_options vs_nir_options = {
43 .lower_ffma = true,
44 .lower_fpow = true,
45 .lower_ffract = true,
46 .lower_fdiv = true,
47 .lower_fmod = true,
48 .lower_fsqrt = true,
49 .lower_sub = true,
50 .lower_flrp32 = true,
51 .lower_flrp64 = true,
52 .lower_ftrunc = true,
53 /* could be implemented by clamp */
54 .lower_fsat = true,
55 .lower_bitops = true,
56 .lower_rotate = true,
57 .lower_sincos = true,
58 .lower_fceil = true,
59 };
60
61 static const nir_shader_compiler_options fs_nir_options = {
62 .lower_ffma = true,
63 .lower_fpow = true,
64 .lower_fdiv = true,
65 .lower_fmod = true,
66 .lower_sub = true,
67 .lower_flrp32 = true,
68 .lower_flrp64 = true,
69 .lower_fsign = true,
70 .lower_rotate = true,
71 .lower_fdot = true,
72 .lower_fdph = true,
73 .lower_bitops = true,
74 .lower_vector_cmp = true,
75 };
76
77 static const struct nir_lower_tex_options tex_options = {
78 .lower_txp = ~0u,
79 };
80
81 const void *
82 lima_program_get_compiler_options(enum pipe_shader_type shader)
83 {
84 switch (shader) {
85 case PIPE_SHADER_VERTEX:
86 return &vs_nir_options;
87 case PIPE_SHADER_FRAGMENT:
88 return &fs_nir_options;
89 default:
90 return NULL;
91 }
92 }
93
94 static int
95 type_size(const struct glsl_type *type, bool bindless)
96 {
97 return glsl_count_attribute_slots(type, false);
98 }
99
100 void
101 lima_program_optimize_vs_nir(struct nir_shader *s)
102 {
103 bool progress;
104
105 NIR_PASS_V(s, nir_lower_viewport_transform);
106 NIR_PASS_V(s, nir_lower_point_size, 1.0f, 100.0f);
107 NIR_PASS_V(s, nir_lower_io, nir_var_all, type_size, 0);
108 NIR_PASS_V(s, nir_lower_load_const_to_scalar);
109 NIR_PASS_V(s, lima_nir_lower_uniform_to_scalar);
110 NIR_PASS_V(s, nir_lower_io_to_scalar,
111 nir_var_shader_in|nir_var_shader_out);
112
113 do {
114 progress = false;
115
116 NIR_PASS_V(s, nir_lower_vars_to_ssa);
117 NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL);
118 NIR_PASS(progress, s, nir_lower_phis_to_scalar);
119 NIR_PASS(progress, s, nir_copy_prop);
120 NIR_PASS(progress, s, nir_opt_remove_phis);
121 NIR_PASS(progress, s, nir_opt_dce);
122 NIR_PASS(progress, s, nir_opt_dead_cf);
123 NIR_PASS(progress, s, nir_opt_cse);
124 NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
125 NIR_PASS(progress, s, nir_opt_algebraic);
126 NIR_PASS(progress, s, nir_opt_constant_folding);
127 NIR_PASS(progress, s, nir_opt_undef);
128 NIR_PASS(progress, s, nir_opt_loop_unroll,
129 nir_var_shader_in |
130 nir_var_shader_out |
131 nir_var_function_temp);
132 } while (progress);
133
134 NIR_PASS_V(s, nir_lower_int_to_float);
135 /* Run opt_algebraic between int_to_float and bool_to_float because
136 * int_to_float emits ftrunc, and ftrunc lowering generates bool ops
137 */
138 do {
139 progress = false;
140 NIR_PASS(progress, s, nir_opt_algebraic);
141 } while (progress);
142
143 NIR_PASS_V(s, nir_lower_bool_to_float);
144
145 NIR_PASS_V(s, nir_copy_prop);
146 NIR_PASS_V(s, nir_opt_dce);
147 NIR_PASS_V(s, nir_lower_locals_to_regs);
148 NIR_PASS_V(s, nir_convert_from_ssa, true);
149 NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp);
150 nir_sweep(s);
151 }
152
153 static bool
154 lima_alu_to_scalar_filter_cb(const nir_instr *instr, const void *data)
155 {
156 if (instr->type != nir_instr_type_alu)
157 return false;
158
159 nir_alu_instr *alu = nir_instr_as_alu(instr);
160 switch (alu->op) {
161 case nir_op_frcp:
162 case nir_op_frsq:
163 case nir_op_flog2:
164 case nir_op_fexp2:
165 case nir_op_fsqrt:
166 case nir_op_fsin:
167 case nir_op_fcos:
168 return true;
169 default:
170 break;
171 }
172
173 /* nir vec4 fcsel assumes that each component of the condition will be
174 * used to select the same component from the two options, but Utgard PP
175 * has only 1 component condition. If all condition components are not the
176 * same we need to lower it to scalar.
177 */
178 switch (alu->op) {
179 case nir_op_bcsel:
180 case nir_op_fcsel:
181 break;
182 default:
183 return false;
184 }
185
186 int num_components = nir_dest_num_components(alu->dest.dest);
187
188 uint8_t swizzle = alu->src[0].swizzle[0];
189
190 for (int i = 1; i < num_components; i++)
191 if (alu->src[0].swizzle[i] != swizzle)
192 return true;
193
194 return false;
195 }
196
197 void
198 lima_program_optimize_fs_nir(struct nir_shader *s)
199 {
200 bool progress;
201
202 NIR_PASS_V(s, nir_lower_fragcoord_wtrans);
203 NIR_PASS_V(s, nir_lower_io, nir_var_all, type_size, 0);
204 NIR_PASS_V(s, nir_lower_regs_to_ssa);
205 NIR_PASS_V(s, nir_lower_tex, &tex_options);
206
207 do {
208 progress = false;
209 NIR_PASS(progress, s, nir_opt_vectorize);
210 } while (progress);
211
212 do {
213 progress = false;
214
215 NIR_PASS_V(s, nir_lower_vars_to_ssa);
216 NIR_PASS(progress, s, nir_lower_alu_to_scalar, lima_alu_to_scalar_filter_cb, NULL);
217 NIR_PASS(progress, s, nir_copy_prop);
218 NIR_PASS(progress, s, nir_opt_remove_phis);
219 NIR_PASS(progress, s, nir_opt_dce);
220 NIR_PASS(progress, s, nir_opt_dead_cf);
221 NIR_PASS(progress, s, nir_opt_cse);
222 NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
223 NIR_PASS(progress, s, nir_opt_algebraic);
224 NIR_PASS(progress, s, nir_opt_constant_folding);
225 NIR_PASS(progress, s, nir_opt_undef);
226 NIR_PASS(progress, s, nir_opt_loop_unroll,
227 nir_var_shader_in |
228 nir_var_shader_out |
229 nir_var_function_temp);
230 NIR_PASS(progress, s, lima_nir_split_load_input);
231 } while (progress);
232
233 NIR_PASS_V(s, nir_lower_int_to_float);
234 NIR_PASS_V(s, nir_lower_bool_to_float);
235
236 /* Some ops must be lowered after being converted from int ops,
237 * so re-run nir_opt_algebraic after int lowering. */
238 do {
239 progress = false;
240 NIR_PASS(progress, s, nir_opt_algebraic);
241 } while (progress);
242
243 /* Must be run after optimization loop */
244 NIR_PASS_V(s, lima_nir_scale_trig);
245
246 /* Lower modifiers */
247 NIR_PASS_V(s, nir_lower_to_source_mods, nir_lower_all_source_mods);
248 NIR_PASS_V(s, nir_copy_prop);
249 NIR_PASS_V(s, nir_opt_dce);
250
251 NIR_PASS_V(s, nir_lower_locals_to_regs);
252 NIR_PASS_V(s, nir_convert_from_ssa, true);
253 NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp);
254
255 NIR_PASS_V(s, nir_move_vec_src_uses_to_dest);
256 NIR_PASS_V(s, nir_lower_vec_to_movs);
257
258 nir_sweep(s);
259 }
260
261 static void *
262 lima_create_fs_state(struct pipe_context *pctx,
263 const struct pipe_shader_state *cso)
264 {
265 struct lima_context *ctx = lima_context(pctx);
266 struct lima_screen *screen = lima_screen(pctx->screen);
267 struct lima_fs_shader_state *so = rzalloc(NULL, struct lima_fs_shader_state);
268
269 if (!so)
270 return NULL;
271
272 nir_shader *nir;
273 if (cso->type == PIPE_SHADER_IR_NIR)
274 nir = cso->ir.nir;
275 else {
276 assert(cso->type == PIPE_SHADER_IR_TGSI);
277
278 nir = tgsi_to_nir(cso->tokens, pctx->screen);
279 }
280
281 lima_program_optimize_fs_nir(nir);
282
283 if (lima_debug & LIMA_DEBUG_PP)
284 nir_print_shader(nir, stdout);
285
286 if (!ppir_compile_nir(so, nir, screen->pp_ra, &ctx->debug)) {
287 ralloc_free(so);
288 return NULL;
289 }
290
291 so->uses_discard = nir->info.fs.uses_discard;
292
293 return so;
294 }
295
296 static void
297 lima_bind_fs_state(struct pipe_context *pctx, void *hwcso)
298 {
299 struct lima_context *ctx = lima_context(pctx);
300
301 ctx->fs = hwcso;
302 ctx->dirty |= LIMA_CONTEXT_DIRTY_SHADER_FRAG;
303 }
304
305 static void
306 lima_delete_fs_state(struct pipe_context *pctx, void *hwcso)
307 {
308 struct lima_fs_shader_state *so = hwcso;
309
310 if (so->bo)
311 lima_bo_unreference(so->bo);
312
313 ralloc_free(so);
314 }
315
316 bool
317 lima_update_vs_state(struct lima_context *ctx)
318 {
319 struct lima_vs_shader_state *vs = ctx->vs;
320 if (!vs->bo) {
321 struct lima_screen *screen = lima_screen(ctx->base.screen);
322 vs->bo = lima_bo_create(screen, vs->shader_size, 0);
323 if (!vs->bo) {
324 fprintf(stderr, "lima: create vs shader bo fail\n");
325 return false;
326 }
327
328 memcpy(lima_bo_map(vs->bo), vs->shader, vs->shader_size);
329 ralloc_free(vs->shader);
330 vs->shader = NULL;
331 }
332
333 return true;
334 }
335
336 bool
337 lima_update_fs_state(struct lima_context *ctx)
338 {
339 struct lima_fs_shader_state *fs = ctx->fs;
340 if (!fs->bo) {
341 struct lima_screen *screen = lima_screen(ctx->base.screen);
342 fs->bo = lima_bo_create(screen, fs->shader_size, 0);
343 if (!fs->bo) {
344 fprintf(stderr, "lima: create fs shader bo fail\n");
345 return false;
346 }
347
348 memcpy(lima_bo_map(fs->bo), fs->shader, fs->shader_size);
349 ralloc_free(fs->shader);
350 fs->shader = NULL;
351 }
352
353 struct lima_job *job = lima_job_get(ctx);
354 job->pp_max_stack_size = MAX2(job->pp_max_stack_size, ctx->fs->stack_size);
355
356 return true;
357 }
358
359 static void *
360 lima_create_vs_state(struct pipe_context *pctx,
361 const struct pipe_shader_state *cso)
362 {
363 struct lima_context *ctx = lima_context(pctx);
364 struct lima_vs_shader_state *so = rzalloc(NULL, struct lima_vs_shader_state);
365
366 if (!so)
367 return NULL;
368
369 nir_shader *nir;
370 if (cso->type == PIPE_SHADER_IR_NIR)
371 nir = cso->ir.nir;
372 else {
373 assert(cso->type == PIPE_SHADER_IR_TGSI);
374
375 nir = tgsi_to_nir(cso->tokens, pctx->screen);
376 }
377
378 lima_program_optimize_vs_nir(nir);
379
380 if (lima_debug & LIMA_DEBUG_GP)
381 nir_print_shader(nir, stdout);
382
383 if (!gpir_compile_nir(so, nir, &ctx->debug)) {
384 ralloc_free(so);
385 return NULL;
386 }
387
388 ralloc_free(nir);
389
390 return so;
391 }
392
393 static void
394 lima_bind_vs_state(struct pipe_context *pctx, void *hwcso)
395 {
396 struct lima_context *ctx = lima_context(pctx);
397
398 ctx->vs = hwcso;
399 ctx->dirty |= LIMA_CONTEXT_DIRTY_SHADER_VERT;
400 }
401
402 static void
403 lima_delete_vs_state(struct pipe_context *pctx, void *hwcso)
404 {
405 struct lima_vs_shader_state *so = hwcso;
406
407 if (so->bo)
408 lima_bo_unreference(so->bo);
409
410 ralloc_free(so);
411 }
412
413 void
414 lima_program_init(struct lima_context *ctx)
415 {
416 ctx->base.create_fs_state = lima_create_fs_state;
417 ctx->base.bind_fs_state = lima_bind_fs_state;
418 ctx->base.delete_fs_state = lima_delete_fs_state;
419
420 ctx->base.create_vs_state = lima_create_vs_state;
421 ctx->base.bind_vs_state = lima_bind_vs_state;
422 ctx->base.delete_vs_state = lima_delete_vs_state;
423 }