nir/opt_vectorize: Add a callback for filtering of vectorizing.
[mesa.git] / src / gallium / drivers / lima / lima_program.c
1 /*
2 * Copyright (c) 2017-2019 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include "util/u_memory.h"
26 #include "util/ralloc.h"
27 #include "util/u_debug.h"
28
29 #include "tgsi/tgsi_dump.h"
30 #include "compiler/nir/nir.h"
31 #include "nir/tgsi_to_nir.h"
32
33 #include "pipe/p_state.h"
34
35 #include "lima_screen.h"
36 #include "lima_context.h"
37 #include "lima_job.h"
38 #include "lima_program.h"
39 #include "lima_bo.h"
40 #include "lima_format.h"
41
42 #include "ir/lima_ir.h"
43
44 static const nir_shader_compiler_options vs_nir_options = {
45 .lower_ffma = true,
46 .lower_fpow = true,
47 .lower_ffract = true,
48 .lower_fdiv = true,
49 .lower_fmod = true,
50 .lower_fsqrt = true,
51 .lower_sub = true,
52 .lower_flrp32 = true,
53 .lower_flrp64 = true,
54 /* could be implemented by clamp */
55 .lower_fsat = true,
56 .lower_bitops = true,
57 .lower_rotate = true,
58 .lower_sincos = true,
59 .lower_fceil = true,
60 };
61
62 static const nir_shader_compiler_options fs_nir_options = {
63 .lower_ffma = true,
64 .lower_fpow = true,
65 .lower_fdiv = true,
66 .lower_fmod = true,
67 .lower_sub = true,
68 .lower_flrp32 = true,
69 .lower_flrp64 = true,
70 .lower_fsign = true,
71 .lower_rotate = true,
72 .lower_fdot = true,
73 .lower_fdph = true,
74 .lower_bitops = true,
75 .lower_vector_cmp = true,
76 };
77
78 const void *
79 lima_program_get_compiler_options(enum pipe_shader_type shader)
80 {
81 switch (shader) {
82 case PIPE_SHADER_VERTEX:
83 return &vs_nir_options;
84 case PIPE_SHADER_FRAGMENT:
85 return &fs_nir_options;
86 default:
87 return NULL;
88 }
89 }
90
91 static int
92 type_size(const struct glsl_type *type, bool bindless)
93 {
94 return glsl_count_attribute_slots(type, false);
95 }
96
97 void
98 lima_program_optimize_vs_nir(struct nir_shader *s)
99 {
100 bool progress;
101
102 NIR_PASS_V(s, nir_lower_viewport_transform);
103 NIR_PASS_V(s, nir_lower_point_size, 1.0f, 100.0f);
104 NIR_PASS_V(s, nir_lower_io,
105 nir_var_shader_in | nir_var_shader_out, type_size, 0);
106 NIR_PASS_V(s, nir_lower_load_const_to_scalar);
107 NIR_PASS_V(s, lima_nir_lower_uniform_to_scalar);
108 NIR_PASS_V(s, nir_lower_io_to_scalar,
109 nir_var_shader_in|nir_var_shader_out);
110
111 do {
112 progress = false;
113
114 NIR_PASS_V(s, nir_lower_vars_to_ssa);
115 NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL);
116 NIR_PASS(progress, s, nir_lower_phis_to_scalar);
117 NIR_PASS(progress, s, nir_copy_prop);
118 NIR_PASS(progress, s, nir_opt_remove_phis);
119 NIR_PASS(progress, s, nir_opt_dce);
120 NIR_PASS(progress, s, nir_opt_dead_cf);
121 NIR_PASS(progress, s, nir_opt_cse);
122 NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
123 NIR_PASS(progress, s, nir_opt_algebraic);
124 NIR_PASS(progress, s, lima_nir_lower_ftrunc);
125 NIR_PASS(progress, s, nir_opt_constant_folding);
126 NIR_PASS(progress, s, nir_opt_undef);
127 NIR_PASS(progress, s, nir_opt_loop_unroll,
128 nir_var_shader_in |
129 nir_var_shader_out |
130 nir_var_function_temp);
131 } while (progress);
132
133 NIR_PASS_V(s, nir_lower_int_to_float);
134 /* int_to_float pass generates ftrunc, so lower it */
135 NIR_PASS(progress, s, lima_nir_lower_ftrunc);
136 NIR_PASS_V(s, nir_lower_bool_to_float);
137
138 NIR_PASS_V(s, nir_copy_prop);
139 NIR_PASS_V(s, nir_opt_dce);
140 NIR_PASS_V(s, nir_lower_locals_to_regs);
141 NIR_PASS_V(s, nir_convert_from_ssa, true);
142 NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
143 nir_sweep(s);
144 }
145
146 static bool
147 lima_alu_to_scalar_filter_cb(const nir_instr *instr, const void *data)
148 {
149 if (instr->type != nir_instr_type_alu)
150 return false;
151
152 nir_alu_instr *alu = nir_instr_as_alu(instr);
153 switch (alu->op) {
154 case nir_op_frcp:
155 case nir_op_frsq:
156 case nir_op_flog2:
157 case nir_op_fexp2:
158 case nir_op_fsqrt:
159 case nir_op_fsin:
160 case nir_op_fcos:
161 return true;
162 default:
163 break;
164 }
165
166 /* nir vec4 fcsel assumes that each component of the condition will be
167 * used to select the same component from the two options, but Utgard PP
168 * has only 1 component condition. If all condition components are not the
169 * same we need to lower it to scalar.
170 */
171 switch (alu->op) {
172 case nir_op_bcsel:
173 case nir_op_fcsel:
174 break;
175 default:
176 return false;
177 }
178
179 int num_components = nir_dest_num_components(alu->dest.dest);
180
181 uint8_t swizzle = alu->src[0].swizzle[0];
182
183 for (int i = 1; i < num_components; i++)
184 if (alu->src[0].swizzle[i] != swizzle)
185 return true;
186
187 return false;
188 }
189
190 void
191 lima_program_optimize_fs_nir(struct nir_shader *s,
192 struct nir_lower_tex_options *tex_options)
193 {
194 bool progress;
195
196 NIR_PASS_V(s, nir_lower_fragcoord_wtrans);
197 NIR_PASS_V(s, nir_lower_io,
198 nir_var_shader_in | nir_var_shader_out, type_size, 0);
199 NIR_PASS_V(s, nir_lower_regs_to_ssa);
200 NIR_PASS_V(s, nir_lower_tex, tex_options);
201
202 do {
203 progress = false;
204 NIR_PASS(progress, s, nir_opt_vectorize, NULL, NULL);
205 } while (progress);
206
207 do {
208 progress = false;
209
210 NIR_PASS_V(s, nir_lower_vars_to_ssa);
211 NIR_PASS(progress, s, nir_lower_alu_to_scalar, lima_alu_to_scalar_filter_cb, NULL);
212 NIR_PASS(progress, s, nir_copy_prop);
213 NIR_PASS(progress, s, nir_opt_remove_phis);
214 NIR_PASS(progress, s, nir_opt_dce);
215 NIR_PASS(progress, s, nir_opt_dead_cf);
216 NIR_PASS(progress, s, nir_opt_cse);
217 NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
218 NIR_PASS(progress, s, nir_opt_algebraic);
219 NIR_PASS(progress, s, nir_opt_constant_folding);
220 NIR_PASS(progress, s, nir_opt_undef);
221 NIR_PASS(progress, s, nir_opt_loop_unroll,
222 nir_var_shader_in |
223 nir_var_shader_out |
224 nir_var_function_temp);
225 NIR_PASS(progress, s, lima_nir_split_load_input);
226 } while (progress);
227
228 NIR_PASS_V(s, nir_lower_int_to_float);
229 NIR_PASS_V(s, nir_lower_bool_to_float);
230
231 /* Some ops must be lowered after being converted from int ops,
232 * so re-run nir_opt_algebraic after int lowering. */
233 do {
234 progress = false;
235 NIR_PASS(progress, s, nir_opt_algebraic);
236 } while (progress);
237
238 /* Must be run after optimization loop */
239 NIR_PASS_V(s, lima_nir_scale_trig);
240
241 /* Lower modifiers */
242 NIR_PASS_V(s, nir_lower_to_source_mods, nir_lower_all_source_mods);
243 NIR_PASS_V(s, nir_copy_prop);
244 NIR_PASS_V(s, nir_opt_dce);
245
246 NIR_PASS_V(s, nir_lower_locals_to_regs);
247 NIR_PASS_V(s, nir_convert_from_ssa, true);
248 NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
249
250 NIR_PASS_V(s, nir_move_vec_src_uses_to_dest);
251 NIR_PASS_V(s, nir_lower_vec_to_movs);
252
253 NIR_PASS_V(s, lima_nir_duplicate_load_uniforms);
254 NIR_PASS_V(s, lima_nir_duplicate_load_inputs);
255 NIR_PASS_V(s, lima_nir_duplicate_load_consts);
256
257 nir_sweep(s);
258 }
259
260 static bool
261 lima_fs_compile_shader(struct lima_context *ctx,
262 struct lima_fs_shader_state *fs,
263 struct nir_lower_tex_options *tex_options)
264 {
265 struct lima_screen *screen = lima_screen(ctx->base.screen);
266 nir_shader *nir = nir_shader_clone(fs, fs->base.ir.nir);
267
268 lima_program_optimize_fs_nir(nir, tex_options);
269
270 if (lima_debug & LIMA_DEBUG_PP)
271 nir_print_shader(nir, stdout);
272
273 if (!ppir_compile_nir(fs, nir, screen->pp_ra, &ctx->debug)) {
274 ralloc_free(nir);
275 return false;
276 }
277
278 fs->uses_discard = nir->info.fs.uses_discard;
279 ralloc_free(nir);
280
281 return true;
282 }
283
284 static void *
285 lima_create_fs_state(struct pipe_context *pctx,
286 const struct pipe_shader_state *cso)
287 {
288 struct lima_context *ctx = lima_context(pctx);
289 struct lima_fs_shader_state *so = rzalloc(NULL, struct lima_fs_shader_state);
290
291 if (!so)
292 return NULL;
293
294 nir_shader *nir;
295 if (cso->type == PIPE_SHADER_IR_NIR)
296 /* The backend takes ownership of the NIR shader on state
297 * creation.
298 */
299 nir = cso->ir.nir;
300 else {
301 assert(cso->type == PIPE_SHADER_IR_TGSI);
302
303 nir = tgsi_to_nir(cso->tokens, pctx->screen, false);
304 }
305
306 so->base.type = PIPE_SHADER_IR_NIR;
307 so->base.ir.nir = nir;
308
309 uint8_t identity[4] = { PIPE_SWIZZLE_X,
310 PIPE_SWIZZLE_Y,
311 PIPE_SWIZZLE_Z,
312 PIPE_SWIZZLE_W };
313
314 struct nir_lower_tex_options tex_options = {
315 .lower_txp = ~0u,
316 .swizzle_result = 0,
317 };
318
319 /* Initialize with identity swizzles. That should suffice for most shaders */
320 for (int i = 0; i < PIPE_MAX_SAMPLERS; i++)
321 memcpy(so->swizzles[i], identity, 4);
322
323 if (!lima_fs_compile_shader(ctx, so, &tex_options)) {
324 ralloc_free(so);
325 return NULL;
326 }
327
328 return so;
329 }
330
331 static void
332 lima_bind_fs_state(struct pipe_context *pctx, void *hwcso)
333 {
334 struct lima_context *ctx = lima_context(pctx);
335
336 ctx->fs = hwcso;
337 ctx->dirty |= LIMA_CONTEXT_DIRTY_SHADER_FRAG;
338 }
339
340 static void
341 lima_delete_fs_state(struct pipe_context *pctx, void *hwcso)
342 {
343 struct lima_fs_shader_state *so = hwcso;
344
345 if (so->bo)
346 lima_bo_unreference(so->bo);
347
348 ralloc_free(so->base.ir.nir);
349 ralloc_free(so);
350 }
351
352 bool
353 lima_update_vs_state(struct lima_context *ctx)
354 {
355 struct lima_vs_shader_state *vs = ctx->vs;
356 if (!vs->bo) {
357 struct lima_screen *screen = lima_screen(ctx->base.screen);
358 vs->bo = lima_bo_create(screen, vs->shader_size, 0);
359 if (!vs->bo) {
360 fprintf(stderr, "lima: create vs shader bo fail\n");
361 return false;
362 }
363
364 memcpy(lima_bo_map(vs->bo), vs->shader, vs->shader_size);
365 ralloc_free(vs->shader);
366 vs->shader = NULL;
367 }
368
369 return true;
370 }
371
372 bool
373 lima_update_fs_state(struct lima_context *ctx)
374 {
375 struct lima_fs_shader_state *fs = ctx->fs;
376 struct lima_texture_stateobj *lima_tex = &ctx->tex_stateobj;
377 struct nir_lower_tex_options tex_options = {
378 .lower_txp = ~0u,
379 .swizzle_result = 0,
380 };
381 bool needs_recompile = false;
382
383 /* Check if texture formats has changed since last compilation.
384 * If it has we need to recompile shader.
385 */
386 if (((ctx->dirty & LIMA_CONTEXT_DIRTY_TEXTURES) &&
387 lima_tex->num_samplers &&
388 lima_tex->num_textures)) {
389 uint8_t identity[4] = { PIPE_SWIZZLE_X,
390 PIPE_SWIZZLE_Y,
391 PIPE_SWIZZLE_Z,
392 PIPE_SWIZZLE_W };
393 for (int i = 0; i < lima_tex->num_samplers; i++) {
394 struct lima_sampler_view *texture = lima_sampler_view(lima_tex->textures[i]);
395 struct pipe_resource *prsc = texture->base.texture;
396 const uint8_t *swizzle = lima_format_get_texel_swizzle(prsc->format);
397 if (memcmp(fs->swizzles[i], swizzle, 4)) {
398 needs_recompile = true;
399 memcpy(fs->swizzles[i], swizzle, 4);
400 }
401
402 for (int j = 0; j < 4; j++)
403 tex_options.swizzles[i][j] = swizzle[j];
404
405 if (memcmp(swizzle, identity, 4))
406 tex_options.swizzle_result |= (1 << i);
407 }
408
409 /* Fill rest with identity swizzle */
410 for (int i = lima_tex->num_samplers; i < PIPE_MAX_SAMPLERS; i++)
411 memcpy(fs->swizzles[i], identity, 4);
412 }
413
414 if (needs_recompile) {
415 if (fs->bo) {
416 lima_bo_unreference(fs->bo);
417 fs->bo = NULL;
418 }
419
420 if (!lima_fs_compile_shader(ctx, fs, &tex_options))
421 return false;
422 }
423
424 if (!fs->bo) {
425 struct lima_screen *screen = lima_screen(ctx->base.screen);
426 fs->bo = lima_bo_create(screen, fs->shader_size, 0);
427 if (!fs->bo) {
428 fprintf(stderr, "lima: create fs shader bo fail\n");
429 return false;
430 }
431
432 memcpy(lima_bo_map(fs->bo), fs->shader, fs->shader_size);
433 ralloc_free(fs->shader);
434 fs->shader = NULL;
435 }
436
437 struct lima_job *job = lima_job_get(ctx);
438 job->pp_max_stack_size = MAX2(job->pp_max_stack_size, ctx->fs->stack_size);
439
440 return true;
441 }
442
443 static void *
444 lima_create_vs_state(struct pipe_context *pctx,
445 const struct pipe_shader_state *cso)
446 {
447 struct lima_context *ctx = lima_context(pctx);
448 struct lima_vs_shader_state *so = rzalloc(NULL, struct lima_vs_shader_state);
449
450 if (!so)
451 return NULL;
452
453 nir_shader *nir;
454 if (cso->type == PIPE_SHADER_IR_NIR)
455 nir = cso->ir.nir;
456 else {
457 assert(cso->type == PIPE_SHADER_IR_TGSI);
458
459 nir = tgsi_to_nir(cso->tokens, pctx->screen, false);
460 }
461
462 lima_program_optimize_vs_nir(nir);
463
464 if (lima_debug & LIMA_DEBUG_GP)
465 nir_print_shader(nir, stdout);
466
467 if (!gpir_compile_nir(so, nir, &ctx->debug)) {
468 ralloc_free(so);
469 return NULL;
470 }
471
472 ralloc_free(nir);
473
474 return so;
475 }
476
477 static void
478 lima_bind_vs_state(struct pipe_context *pctx, void *hwcso)
479 {
480 struct lima_context *ctx = lima_context(pctx);
481
482 ctx->vs = hwcso;
483 ctx->dirty |= LIMA_CONTEXT_DIRTY_SHADER_VERT;
484 }
485
486 static void
487 lima_delete_vs_state(struct pipe_context *pctx, void *hwcso)
488 {
489 struct lima_vs_shader_state *so = hwcso;
490
491 if (so->bo)
492 lima_bo_unreference(so->bo);
493
494 ralloc_free(so);
495 }
496
497 void
498 lima_program_init(struct lima_context *ctx)
499 {
500 ctx->base.create_fs_state = lima_create_fs_state;
501 ctx->base.bind_fs_state = lima_bind_fs_state;
502 ctx->base.delete_fs_state = lima_delete_fs_state;
503
504 ctx->base.create_vs_state = lima_create_vs_state;
505 ctx->base.bind_vs_state = lima_bind_vs_state;
506 ctx->base.delete_vs_state = lima_delete_vs_state;
507 }