2 * Copyright (c) 2017-2019 Lima Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 #include "util/u_memory.h"
26 #include "util/ralloc.h"
27 #include "util/u_debug.h"
29 #include "tgsi/tgsi_dump.h"
30 #include "compiler/nir/nir.h"
31 #include "nir/tgsi_to_nir.h"
33 #include "pipe/p_state.h"
35 #include "lima_screen.h"
36 #include "lima_context.h"
38 #include "lima_program.h"
40 #include "lima_format.h"
42 #include "ir/lima_ir.h"
44 static const nir_shader_compiler_options vs_nir_options
= {
54 /* could be implemented by clamp */
62 static const nir_shader_compiler_options fs_nir_options
= {
75 .lower_vector_cmp
= true,
79 lima_program_get_compiler_options(enum pipe_shader_type shader
)
82 case PIPE_SHADER_VERTEX
:
83 return &vs_nir_options
;
84 case PIPE_SHADER_FRAGMENT
:
85 return &fs_nir_options
;
92 type_size(const struct glsl_type
*type
, bool bindless
)
94 return glsl_count_attribute_slots(type
, false);
98 lima_program_optimize_vs_nir(struct nir_shader
*s
)
102 NIR_PASS_V(s
, nir_lower_viewport_transform
);
103 NIR_PASS_V(s
, nir_lower_point_size
, 1.0f
, 100.0f
);
104 NIR_PASS_V(s
, nir_lower_io
,
105 nir_var_shader_in
| nir_var_shader_out
, type_size
, 0);
106 NIR_PASS_V(s
, nir_lower_load_const_to_scalar
);
107 NIR_PASS_V(s
, lima_nir_lower_uniform_to_scalar
);
108 NIR_PASS_V(s
, nir_lower_io_to_scalar
,
109 nir_var_shader_in
|nir_var_shader_out
);
114 NIR_PASS_V(s
, nir_lower_vars_to_ssa
);
115 NIR_PASS(progress
, s
, nir_lower_alu_to_scalar
, NULL
, NULL
);
116 NIR_PASS(progress
, s
, nir_lower_phis_to_scalar
);
117 NIR_PASS(progress
, s
, nir_copy_prop
);
118 NIR_PASS(progress
, s
, nir_opt_remove_phis
);
119 NIR_PASS(progress
, s
, nir_opt_dce
);
120 NIR_PASS(progress
, s
, nir_opt_dead_cf
);
121 NIR_PASS(progress
, s
, nir_opt_cse
);
122 NIR_PASS(progress
, s
, nir_opt_peephole_select
, 8, true, true);
123 NIR_PASS(progress
, s
, nir_opt_algebraic
);
124 NIR_PASS(progress
, s
, lima_nir_lower_ftrunc
);
125 NIR_PASS(progress
, s
, nir_opt_constant_folding
);
126 NIR_PASS(progress
, s
, nir_opt_undef
);
127 NIR_PASS(progress
, s
, nir_opt_loop_unroll
,
130 nir_var_function_temp
);
133 NIR_PASS_V(s
, nir_lower_int_to_float
);
134 /* int_to_float pass generates ftrunc, so lower it */
135 NIR_PASS(progress
, s
, lima_nir_lower_ftrunc
);
136 NIR_PASS_V(s
, nir_lower_bool_to_float
);
138 NIR_PASS_V(s
, nir_copy_prop
);
139 NIR_PASS_V(s
, nir_opt_dce
);
140 NIR_PASS_V(s
, nir_lower_locals_to_regs
);
141 NIR_PASS_V(s
, nir_convert_from_ssa
, true);
142 NIR_PASS_V(s
, nir_remove_dead_variables
, nir_var_function_temp
, NULL
);
147 lima_alu_to_scalar_filter_cb(const nir_instr
*instr
, const void *data
)
149 if (instr
->type
!= nir_instr_type_alu
)
152 nir_alu_instr
*alu
= nir_instr_as_alu(instr
);
166 /* nir vec4 fcsel assumes that each component of the condition will be
167 * used to select the same component from the two options, but Utgard PP
168 * has only 1 component condition. If all condition components are not the
169 * same we need to lower it to scalar.
179 int num_components
= nir_dest_num_components(alu
->dest
.dest
);
181 uint8_t swizzle
= alu
->src
[0].swizzle
[0];
183 for (int i
= 1; i
< num_components
; i
++)
184 if (alu
->src
[0].swizzle
[i
] != swizzle
)
191 lima_program_optimize_fs_nir(struct nir_shader
*s
,
192 struct nir_lower_tex_options
*tex_options
)
196 NIR_PASS_V(s
, nir_lower_fragcoord_wtrans
);
197 NIR_PASS_V(s
, nir_lower_io
,
198 nir_var_shader_in
| nir_var_shader_out
, type_size
, 0);
199 NIR_PASS_V(s
, nir_lower_regs_to_ssa
);
200 NIR_PASS_V(s
, nir_lower_tex
, tex_options
);
204 NIR_PASS(progress
, s
, nir_opt_vectorize
, NULL
, NULL
);
210 NIR_PASS_V(s
, nir_lower_vars_to_ssa
);
211 NIR_PASS(progress
, s
, nir_lower_alu_to_scalar
, lima_alu_to_scalar_filter_cb
, NULL
);
212 NIR_PASS(progress
, s
, nir_copy_prop
);
213 NIR_PASS(progress
, s
, nir_opt_remove_phis
);
214 NIR_PASS(progress
, s
, nir_opt_dce
);
215 NIR_PASS(progress
, s
, nir_opt_dead_cf
);
216 NIR_PASS(progress
, s
, nir_opt_cse
);
217 NIR_PASS(progress
, s
, nir_opt_peephole_select
, 8, true, true);
218 NIR_PASS(progress
, s
, nir_opt_algebraic
);
219 NIR_PASS(progress
, s
, nir_opt_constant_folding
);
220 NIR_PASS(progress
, s
, nir_opt_undef
);
221 NIR_PASS(progress
, s
, nir_opt_loop_unroll
,
224 nir_var_function_temp
);
225 NIR_PASS(progress
, s
, lima_nir_split_load_input
);
228 NIR_PASS_V(s
, nir_lower_int_to_float
);
229 NIR_PASS_V(s
, nir_lower_bool_to_float
);
231 /* Some ops must be lowered after being converted from int ops,
232 * so re-run nir_opt_algebraic after int lowering. */
235 NIR_PASS(progress
, s
, nir_opt_algebraic
);
238 /* Must be run after optimization loop */
239 NIR_PASS_V(s
, lima_nir_scale_trig
);
241 /* Lower modifiers */
242 NIR_PASS_V(s
, nir_lower_to_source_mods
, nir_lower_all_source_mods
);
243 NIR_PASS_V(s
, nir_copy_prop
);
244 NIR_PASS_V(s
, nir_opt_dce
);
246 NIR_PASS_V(s
, nir_lower_locals_to_regs
);
247 NIR_PASS_V(s
, nir_convert_from_ssa
, true);
248 NIR_PASS_V(s
, nir_remove_dead_variables
, nir_var_function_temp
, NULL
);
250 NIR_PASS_V(s
, nir_move_vec_src_uses_to_dest
);
251 NIR_PASS_V(s
, nir_lower_vec_to_movs
);
253 NIR_PASS_V(s
, lima_nir_duplicate_load_uniforms
);
254 NIR_PASS_V(s
, lima_nir_duplicate_load_inputs
);
255 NIR_PASS_V(s
, lima_nir_duplicate_load_consts
);
261 lima_fs_compile_shader(struct lima_context
*ctx
,
262 struct lima_fs_shader_state
*fs
,
263 struct nir_lower_tex_options
*tex_options
)
265 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
266 nir_shader
*nir
= nir_shader_clone(fs
, fs
->base
.ir
.nir
);
268 lima_program_optimize_fs_nir(nir
, tex_options
);
270 if (lima_debug
& LIMA_DEBUG_PP
)
271 nir_print_shader(nir
, stdout
);
273 if (!ppir_compile_nir(fs
, nir
, screen
->pp_ra
, &ctx
->debug
)) {
278 fs
->uses_discard
= nir
->info
.fs
.uses_discard
;
285 lima_create_fs_state(struct pipe_context
*pctx
,
286 const struct pipe_shader_state
*cso
)
288 struct lima_context
*ctx
= lima_context(pctx
);
289 struct lima_fs_shader_state
*so
= rzalloc(NULL
, struct lima_fs_shader_state
);
295 if (cso
->type
== PIPE_SHADER_IR_NIR
)
296 /* The backend takes ownership of the NIR shader on state
301 assert(cso
->type
== PIPE_SHADER_IR_TGSI
);
303 nir
= tgsi_to_nir(cso
->tokens
, pctx
->screen
, false);
306 so
->base
.type
= PIPE_SHADER_IR_NIR
;
307 so
->base
.ir
.nir
= nir
;
309 uint8_t identity
[4] = { PIPE_SWIZZLE_X
,
314 struct nir_lower_tex_options tex_options
= {
319 /* Initialize with identity swizzles. That should suffice for most shaders */
320 for (int i
= 0; i
< PIPE_MAX_SAMPLERS
; i
++)
321 memcpy(so
->swizzles
[i
], identity
, 4);
323 if (!lima_fs_compile_shader(ctx
, so
, &tex_options
)) {
332 lima_bind_fs_state(struct pipe_context
*pctx
, void *hwcso
)
334 struct lima_context
*ctx
= lima_context(pctx
);
337 ctx
->dirty
|= LIMA_CONTEXT_DIRTY_SHADER_FRAG
;
341 lima_delete_fs_state(struct pipe_context
*pctx
, void *hwcso
)
343 struct lima_fs_shader_state
*so
= hwcso
;
346 lima_bo_unreference(so
->bo
);
348 ralloc_free(so
->base
.ir
.nir
);
353 lima_update_vs_state(struct lima_context
*ctx
)
355 struct lima_vs_shader_state
*vs
= ctx
->vs
;
357 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
358 vs
->bo
= lima_bo_create(screen
, vs
->shader_size
, 0);
360 fprintf(stderr
, "lima: create vs shader bo fail\n");
364 memcpy(lima_bo_map(vs
->bo
), vs
->shader
, vs
->shader_size
);
365 ralloc_free(vs
->shader
);
373 lima_update_fs_state(struct lima_context
*ctx
)
375 struct lima_fs_shader_state
*fs
= ctx
->fs
;
376 struct lima_texture_stateobj
*lima_tex
= &ctx
->tex_stateobj
;
377 struct nir_lower_tex_options tex_options
= {
381 bool needs_recompile
= false;
383 /* Check if texture formats has changed since last compilation.
384 * If it has we need to recompile shader.
386 if (((ctx
->dirty
& LIMA_CONTEXT_DIRTY_TEXTURES
) &&
387 lima_tex
->num_samplers
&&
388 lima_tex
->num_textures
)) {
389 uint8_t identity
[4] = { PIPE_SWIZZLE_X
,
393 for (int i
= 0; i
< lima_tex
->num_samplers
; i
++) {
394 struct lima_sampler_view
*texture
= lima_sampler_view(lima_tex
->textures
[i
]);
395 struct pipe_resource
*prsc
= texture
->base
.texture
;
396 const uint8_t *swizzle
= lima_format_get_texel_swizzle(prsc
->format
);
397 if (memcmp(fs
->swizzles
[i
], swizzle
, 4)) {
398 needs_recompile
= true;
399 memcpy(fs
->swizzles
[i
], swizzle
, 4);
402 for (int j
= 0; j
< 4; j
++)
403 tex_options
.swizzles
[i
][j
] = swizzle
[j
];
405 if (memcmp(swizzle
, identity
, 4))
406 tex_options
.swizzle_result
|= (1 << i
);
409 /* Fill rest with identity swizzle */
410 for (int i
= lima_tex
->num_samplers
; i
< PIPE_MAX_SAMPLERS
; i
++)
411 memcpy(fs
->swizzles
[i
], identity
, 4);
414 if (needs_recompile
) {
416 lima_bo_unreference(fs
->bo
);
420 if (!lima_fs_compile_shader(ctx
, fs
, &tex_options
))
425 struct lima_screen
*screen
= lima_screen(ctx
->base
.screen
);
426 fs
->bo
= lima_bo_create(screen
, fs
->shader_size
, 0);
428 fprintf(stderr
, "lima: create fs shader bo fail\n");
432 memcpy(lima_bo_map(fs
->bo
), fs
->shader
, fs
->shader_size
);
433 ralloc_free(fs
->shader
);
437 struct lima_job
*job
= lima_job_get(ctx
);
438 job
->pp_max_stack_size
= MAX2(job
->pp_max_stack_size
, ctx
->fs
->stack_size
);
444 lima_create_vs_state(struct pipe_context
*pctx
,
445 const struct pipe_shader_state
*cso
)
447 struct lima_context
*ctx
= lima_context(pctx
);
448 struct lima_vs_shader_state
*so
= rzalloc(NULL
, struct lima_vs_shader_state
);
454 if (cso
->type
== PIPE_SHADER_IR_NIR
)
457 assert(cso
->type
== PIPE_SHADER_IR_TGSI
);
459 nir
= tgsi_to_nir(cso
->tokens
, pctx
->screen
, false);
462 lima_program_optimize_vs_nir(nir
);
464 if (lima_debug
& LIMA_DEBUG_GP
)
465 nir_print_shader(nir
, stdout
);
467 if (!gpir_compile_nir(so
, nir
, &ctx
->debug
)) {
478 lima_bind_vs_state(struct pipe_context
*pctx
, void *hwcso
)
480 struct lima_context
*ctx
= lima_context(pctx
);
483 ctx
->dirty
|= LIMA_CONTEXT_DIRTY_SHADER_VERT
;
487 lima_delete_vs_state(struct pipe_context
*pctx
, void *hwcso
)
489 struct lima_vs_shader_state
*so
= hwcso
;
492 lima_bo_unreference(so
->bo
);
498 lima_program_init(struct lima_context
*ctx
)
500 ctx
->base
.create_fs_state
= lima_create_fs_state
;
501 ctx
->base
.bind_fs_state
= lima_bind_fs_state
;
502 ctx
->base
.delete_fs_state
= lima_delete_fs_state
;
504 ctx
->base
.create_vs_state
= lima_create_vs_state
;
505 ctx
->base
.bind_vs_state
= lima_bind_vs_state
;
506 ctx
->base
.delete_vs_state
= lima_delete_vs_state
;