lima/ppir: enable vectorize optimization
[mesa.git] / src / gallium / drivers / lima / lima_program.c
1 /*
2 * Copyright (c) 2017-2019 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include "util/u_memory.h"
26 #include "util/ralloc.h"
27 #include "util/u_debug.h"
28
29 #include "tgsi/tgsi_dump.h"
30 #include "compiler/nir/nir.h"
31 #include "nir/tgsi_to_nir.h"
32
33 #include "pipe/p_state.h"
34
35 #include "lima_screen.h"
36 #include "lima_context.h"
37 #include "lima_program.h"
38 #include "lima_bo.h"
39 #include "ir/lima_ir.h"
40
41 static const nir_shader_compiler_options vs_nir_options = {
42 .lower_ffma = true,
43 .lower_fpow = true,
44 .lower_ffract = true,
45 .lower_fdiv = true,
46 .lower_fmod = true,
47 .lower_fsqrt = true,
48 .lower_sub = true,
49 .lower_flrp32 = true,
50 .lower_flrp64 = true,
51 .lower_ftrunc = true,
52 /* could be implemented by clamp */
53 .lower_fsat = true,
54 .lower_bitops = true,
55 .lower_rotate = true,
56 .lower_sincos = true,
57 };
58
59 static const nir_shader_compiler_options fs_nir_options = {
60 .lower_ffma = true,
61 .lower_fpow = true,
62 .lower_fdiv = true,
63 .lower_fmod = true,
64 .lower_sub = true,
65 .lower_flrp32 = true,
66 .lower_flrp64 = true,
67 .lower_fsign = true,
68 .lower_rotate = true,
69 .lower_fdot = true,
70 .lower_bitops = true,
71 .lower_vector_cmp = true,
72 };
73
74 static const struct nir_lower_tex_options tex_options = {
75 .lower_txp = ~0u,
76 };
77
78 const void *
79 lima_program_get_compiler_options(enum pipe_shader_type shader)
80 {
81 switch (shader) {
82 case PIPE_SHADER_VERTEX:
83 return &vs_nir_options;
84 case PIPE_SHADER_FRAGMENT:
85 return &fs_nir_options;
86 default:
87 return NULL;
88 }
89 }
90
91 static int
92 type_size(const struct glsl_type *type, bool bindless)
93 {
94 return glsl_count_attribute_slots(type, false);
95 }
96
97 void
98 lima_program_optimize_vs_nir(struct nir_shader *s)
99 {
100 bool progress;
101
102 NIR_PASS_V(s, nir_lower_viewport_transform);
103 NIR_PASS_V(s, nir_lower_io, nir_var_all, type_size, 0);
104 NIR_PASS_V(s, nir_lower_load_const_to_scalar);
105 NIR_PASS_V(s, lima_nir_lower_uniform_to_scalar);
106 NIR_PASS_V(s, nir_lower_io_to_scalar,
107 nir_var_shader_in|nir_var_shader_out);
108
109 do {
110 progress = false;
111
112 NIR_PASS_V(s, nir_lower_vars_to_ssa);
113 NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL);
114 NIR_PASS(progress, s, nir_lower_phis_to_scalar);
115 NIR_PASS(progress, s, nir_copy_prop);
116 NIR_PASS(progress, s, nir_opt_remove_phis);
117 NIR_PASS(progress, s, nir_opt_dce);
118 NIR_PASS(progress, s, nir_opt_dead_cf);
119 NIR_PASS(progress, s, nir_opt_cse);
120 NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
121 NIR_PASS(progress, s, nir_opt_algebraic);
122 NIR_PASS(progress, s, nir_opt_constant_folding);
123 NIR_PASS(progress, s, nir_opt_undef);
124 NIR_PASS(progress, s, nir_opt_loop_unroll,
125 nir_var_shader_in |
126 nir_var_shader_out |
127 nir_var_function_temp);
128 } while (progress);
129
130 NIR_PASS_V(s, nir_lower_int_to_float);
131 NIR_PASS_V(s, nir_lower_bool_to_float);
132
133 /* Some ops must be lowered after being converted from int ops,
134 * so re-run nir_opt_algebraic after int lowering. */
135 do {
136 progress = false;
137 NIR_PASS(progress, s, nir_opt_algebraic);
138 } while (progress);
139
140 NIR_PASS_V(s, nir_copy_prop);
141 NIR_PASS_V(s, nir_opt_dce);
142 NIR_PASS_V(s, nir_lower_locals_to_regs);
143 NIR_PASS_V(s, nir_convert_from_ssa, true);
144 NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp);
145 nir_sweep(s);
146 }
147
148 void
149 lima_program_optimize_fs_nir(struct nir_shader *s)
150 {
151 BITSET_DECLARE(alu_lower, nir_num_opcodes) = {0};
152 bool progress;
153
154 BITSET_SET(alu_lower, nir_op_frcp);
155 BITSET_SET(alu_lower, nir_op_frsq);
156 BITSET_SET(alu_lower, nir_op_flog2);
157 BITSET_SET(alu_lower, nir_op_fexp2);
158 BITSET_SET(alu_lower, nir_op_fsqrt);
159 BITSET_SET(alu_lower, nir_op_fsin);
160 BITSET_SET(alu_lower, nir_op_fcos);
161 /* nir vec4 fcsel assumes that each component of the condition will be
162 * used to select the same component from the two options, but lima
163 * can't implement that since we only have 1 component condition */
164 BITSET_SET(alu_lower, nir_op_fcsel);
165 BITSET_SET(alu_lower, nir_op_bcsel);
166
167 NIR_PASS_V(s, nir_lower_fragcoord_wtrans);
168 NIR_PASS_V(s, nir_lower_io, nir_var_all, type_size, 0);
169 NIR_PASS_V(s, nir_lower_regs_to_ssa);
170 NIR_PASS_V(s, nir_lower_tex, &tex_options);
171
172 do {
173 progress = false;
174 NIR_PASS(progress, s, nir_opt_vectorize);
175 } while (progress);
176
177 do {
178 progress = false;
179
180 NIR_PASS_V(s, nir_lower_vars_to_ssa);
181 NIR_PASS(progress, s, nir_lower_alu_to_scalar, alu_lower);
182 NIR_PASS(progress, s, nir_lower_phis_to_scalar);
183 NIR_PASS(progress, s, nir_copy_prop);
184 NIR_PASS(progress, s, nir_opt_remove_phis);
185 NIR_PASS(progress, s, nir_opt_dce);
186 NIR_PASS(progress, s, nir_opt_dead_cf);
187 NIR_PASS(progress, s, nir_opt_cse);
188 NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
189 NIR_PASS(progress, s, nir_opt_algebraic);
190 NIR_PASS(progress, s, nir_opt_constant_folding);
191 NIR_PASS(progress, s, nir_opt_undef);
192 NIR_PASS(progress, s, nir_opt_loop_unroll,
193 nir_var_shader_in |
194 nir_var_shader_out |
195 nir_var_function_temp);
196 } while (progress);
197
198 NIR_PASS_V(s, nir_lower_int_to_float);
199 NIR_PASS_V(s, nir_lower_bool_to_float);
200
201 /* Some ops must be lowered after being converted from int ops,
202 * so re-run nir_opt_algebraic after int lowering. */
203 do {
204 progress = false;
205 NIR_PASS(progress, s, nir_opt_algebraic);
206 } while (progress);
207
208 /* Must be run after optimization loop */
209 NIR_PASS_V(s, lima_nir_scale_trig);
210
211 /* Lower modifiers */
212 NIR_PASS_V(s, nir_lower_to_source_mods, nir_lower_all_source_mods);
213 NIR_PASS_V(s, nir_copy_prop);
214 NIR_PASS_V(s, nir_opt_dce);
215
216 NIR_PASS_V(s, nir_lower_locals_to_regs);
217 NIR_PASS_V(s, nir_convert_from_ssa, true);
218 NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp);
219
220 NIR_PASS_V(s, nir_move_vec_src_uses_to_dest);
221 NIR_PASS_V(s, nir_lower_vec_to_movs);
222
223 nir_sweep(s);
224 }
225
226 static void *
227 lima_create_fs_state(struct pipe_context *pctx,
228 const struct pipe_shader_state *cso)
229 {
230 struct lima_context *ctx = lima_context(pctx);
231 struct lima_screen *screen = lima_screen(pctx->screen);
232 struct lima_fs_shader_state *so = rzalloc(NULL, struct lima_fs_shader_state);
233
234 if (!so)
235 return NULL;
236
237 nir_shader *nir;
238 if (cso->type == PIPE_SHADER_IR_NIR)
239 nir = cso->ir.nir;
240 else {
241 assert(cso->type == PIPE_SHADER_IR_TGSI);
242
243 nir = tgsi_to_nir(cso->tokens, pctx->screen);
244 }
245
246 lima_program_optimize_fs_nir(nir);
247
248 if (lima_debug & LIMA_DEBUG_PP)
249 nir_print_shader(nir, stdout);
250
251 if (!ppir_compile_nir(so, nir, screen->pp_ra, &ctx->debug)) {
252 ralloc_free(so);
253 return NULL;
254 }
255
256 return so;
257 }
258
259 static void
260 lima_bind_fs_state(struct pipe_context *pctx, void *hwcso)
261 {
262 struct lima_context *ctx = lima_context(pctx);
263
264 ctx->fs = hwcso;
265 ctx->dirty |= LIMA_CONTEXT_DIRTY_SHADER_FRAG;
266 }
267
268 static void
269 lima_delete_fs_state(struct pipe_context *pctx, void *hwcso)
270 {
271 struct lima_fs_shader_state *so = hwcso;
272
273 if (so->bo)
274 lima_bo_free(so->bo);
275
276 ralloc_free(so);
277 }
278
279 bool
280 lima_update_vs_state(struct lima_context *ctx)
281 {
282 struct lima_vs_shader_state *vs = ctx->vs;
283 if (!vs->bo) {
284 struct lima_screen *screen = lima_screen(ctx->base.screen);
285 vs->bo = lima_bo_create(screen, vs->shader_size, 0);
286 if (!vs->bo) {
287 fprintf(stderr, "lima: create vs shader bo fail\n");
288 return false;
289 }
290
291 memcpy(lima_bo_map(vs->bo), vs->shader, vs->shader_size);
292 ralloc_free(vs->shader);
293 vs->shader = NULL;
294 }
295
296 return true;
297 }
298
299 bool
300 lima_update_fs_state(struct lima_context *ctx)
301 {
302 struct lima_fs_shader_state *fs = ctx->fs;
303 if (!fs->bo) {
304 struct lima_screen *screen = lima_screen(ctx->base.screen);
305 fs->bo = lima_bo_create(screen, fs->shader_size, 0);
306 if (!fs->bo) {
307 fprintf(stderr, "lima: create fs shader bo fail\n");
308 return false;
309 }
310
311 memcpy(lima_bo_map(fs->bo), fs->shader, fs->shader_size);
312 ralloc_free(fs->shader);
313 fs->shader = NULL;
314 }
315
316 ctx->pp_max_stack_size = MAX2(ctx->pp_max_stack_size, ctx->fs->stack_size);
317
318 return true;
319 }
320
321 static void *
322 lima_create_vs_state(struct pipe_context *pctx,
323 const struct pipe_shader_state *cso)
324 {
325 struct lima_context *ctx = lima_context(pctx);
326 struct lima_vs_shader_state *so = rzalloc(NULL, struct lima_vs_shader_state);
327
328 if (!so)
329 return NULL;
330
331 nir_shader *nir;
332 if (cso->type == PIPE_SHADER_IR_NIR)
333 nir = cso->ir.nir;
334 else {
335 assert(cso->type == PIPE_SHADER_IR_TGSI);
336
337 nir = tgsi_to_nir(cso->tokens, pctx->screen);
338 }
339
340 lima_program_optimize_vs_nir(nir);
341
342 if (lima_debug & LIMA_DEBUG_GP)
343 nir_print_shader(nir, stdout);
344
345 if (!gpir_compile_nir(so, nir, &ctx->debug)) {
346 ralloc_free(so);
347 return NULL;
348 }
349
350 return so;
351 }
352
353 static void
354 lima_bind_vs_state(struct pipe_context *pctx, void *hwcso)
355 {
356 struct lima_context *ctx = lima_context(pctx);
357
358 ctx->vs = hwcso;
359 ctx->dirty |= LIMA_CONTEXT_DIRTY_SHADER_VERT;
360 }
361
362 static void
363 lima_delete_vs_state(struct pipe_context *pctx, void *hwcso)
364 {
365 struct lima_vs_shader_state *so = hwcso;
366
367 if (so->bo)
368 lima_bo_free(so->bo);
369
370 ralloc_free(so);
371 }
372
373 void
374 lima_program_init(struct lima_context *ctx)
375 {
376 ctx->base.create_fs_state = lima_create_fs_state;
377 ctx->base.bind_fs_state = lima_bind_fs_state;
378 ctx->base.delete_fs_state = lima_delete_fs_state;
379
380 ctx->base.create_vs_state = lima_create_vs_state;
381 ctx->base.bind_vs_state = lima_bind_vs_state;
382 ctx->base.delete_vs_state = lima_delete_vs_state;
383 }