2 * Copyright © 2014-2017 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "util/u_format.h"
26 #include "util/u_math.h"
27 #include "util/u_memory.h"
28 #include "util/ralloc.h"
29 #include "util/hash_table.h"
30 #include "tgsi/tgsi_dump.h"
31 #include "tgsi/tgsi_parse.h"
32 #include "compiler/nir/nir.h"
33 #include "compiler/nir/nir_builder.h"
34 #include "nir/tgsi_to_nir.h"
35 #include "compiler/v3d_compiler.h"
36 #include "vc5_context.h"
37 #include "broadcom/cle/v3d_packet_v33_pack.h"
39 static gl_varying_slot
40 vc5_get_slot_for_driver_location(nir_shader
*s
, uint32_t driver_location
)
42 nir_foreach_variable(var
, &s
->outputs
) {
43 if (var
->data
.driver_location
== driver_location
) {
44 return var
->data
.location
;
52 vc5_set_transform_feedback_outputs(struct vc5_uncompiled_shader
*so
,
53 const struct pipe_stream_output_info
*stream_output
)
55 if (!stream_output
->num_outputs
)
58 struct v3d_varying_slot slots
[PIPE_MAX_SO_OUTPUTS
* 4];
61 for (int buffer
= 0; buffer
< PIPE_MAX_SO_BUFFERS
; buffer
++) {
62 uint32_t buffer_offset
= 0;
63 uint32_t vpm_start
= slot_count
;
65 for (int i
= 0; i
< stream_output
->num_outputs
; i
++) {
66 const struct pipe_stream_output
*output
=
67 &stream_output
->output
[i
];
69 if (output
->output_buffer
!= buffer
)
72 /* We assume that the SO outputs appear in increasing
73 * order in the buffer.
75 assert(output
->dst_offset
>= buffer_offset
);
77 /* Pad any undefined slots in the output */
78 for (int j
= buffer_offset
; j
< output
->dst_offset
; j
++) {
80 v3d_slot_from_slot_and_component(VARYING_SLOT_POS
, 0);
85 /* Set the coordinate shader up to output the
86 * components of this varying.
88 for (int j
= 0; j
< output
->num_components
; j
++) {
89 gl_varying_slot slot
=
90 vc5_get_slot_for_driver_location(so
->base
.ir
.nir
, output
->register_index
);
93 v3d_slot_from_slot_and_component(slot
,
94 output
->start_component
+ j
);
100 uint32_t vpm_size
= slot_count
- vpm_start
;
104 struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked
= {
105 /* We need the offset from the coordinate shader's VPM
106 * output block, which has the [X, Y, Z, W, Xs, Ys]
107 * values at the start. Note that this will need some
108 * shifting when PSIZ is also present.
110 .first_shaded_vertex_value_to_output
= vpm_start
+ 6,
111 .number_of_consecutive_vertex_values_to_output_as_32_bit_values_minus_1
= vpm_size
- 1,
112 .output_buffer_to_write_to
= buffer
,
114 V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL
,
115 (void *)&so
->tf_specs
[so
->num_tf_specs
++],
119 so
->num_tf_outputs
= slot_count
;
120 so
->tf_outputs
= ralloc_array(so
->base
.ir
.nir
, struct v3d_varying_slot
,
122 memcpy(so
->tf_outputs
, slots
, sizeof(*slots
) * slot_count
);
126 type_size(const struct glsl_type
*type
)
128 return glsl_count_attribute_slots(type
, false);
132 vc5_shader_state_create(struct pipe_context
*pctx
,
133 const struct pipe_shader_state
*cso
)
135 struct vc5_context
*vc5
= vc5_context(pctx
);
136 struct vc5_uncompiled_shader
*so
= CALLOC_STRUCT(vc5_uncompiled_shader
);
140 so
->program_id
= vc5
->next_uncompiled_program_id
++;
144 if (cso
->type
== PIPE_SHADER_IR_NIR
) {
145 /* The backend takes ownership of the NIR shader on state
150 NIR_PASS_V(s
, nir_lower_io
, nir_var_all
, type_size
,
151 (nir_lower_io_options
)0);
153 assert(cso
->type
== PIPE_SHADER_IR_TGSI
);
155 if (V3D_DEBUG
& V3D_DEBUG_TGSI
) {
156 fprintf(stderr
, "prog %d TGSI:\n",
158 tgsi_dump(cso
->tokens
, 0);
159 fprintf(stderr
, "\n");
161 s
= tgsi_to_nir(cso
->tokens
, &v3d_nir_options
);
164 NIR_PASS_V(s
, nir_opt_global_to_local
);
165 NIR_PASS_V(s
, nir_lower_regs_to_ssa
);
166 NIR_PASS_V(s
, nir_normalize_cubemap_coords
);
168 NIR_PASS_V(s
, nir_lower_load_const_to_scalar
);
172 NIR_PASS_V(s
, nir_remove_dead_variables
, nir_var_local
);
174 /* Garbage collect dead instructions */
177 so
->base
.type
= PIPE_SHADER_IR_NIR
;
180 vc5_set_transform_feedback_outputs(so
, &cso
->stream_output
);
182 if (V3D_DEBUG
& (V3D_DEBUG_NIR
|
183 v3d_debug_flag_for_shader_stage(s
->info
.stage
))) {
184 fprintf(stderr
, "%s prog %d NIR:\n",
185 gl_shader_stage_name(s
->info
.stage
),
187 nir_print_shader(s
, stderr
);
188 fprintf(stderr
, "\n");
194 static struct vc5_compiled_shader
*
195 vc5_get_compiled_shader(struct vc5_context
*vc5
, struct v3d_key
*key
)
197 struct vc5_uncompiled_shader
*shader_state
= key
->shader_state
;
198 nir_shader
*s
= shader_state
->base
.ir
.nir
;
200 struct hash_table
*ht
;
202 if (s
->info
.stage
== MESA_SHADER_FRAGMENT
) {
204 key_size
= sizeof(struct v3d_fs_key
);
207 key_size
= sizeof(struct v3d_vs_key
);
210 struct hash_entry
*entry
= _mesa_hash_table_search(ht
, key
);
214 struct vc5_compiled_shader
*shader
=
215 rzalloc(NULL
, struct vc5_compiled_shader
);
217 int program_id
= shader_state
->program_id
;
219 p_atomic_inc_return(&shader_state
->compiled_variant_count
);
221 uint32_t shader_size
;
223 switch (s
->info
.stage
) {
224 case MESA_SHADER_VERTEX
:
225 shader
->prog_data
.vs
= rzalloc(shader
, struct v3d_vs_prog_data
);
227 qpu_insts
= v3d_compile_vs(vc5
->screen
->compiler
,
228 (struct v3d_vs_key
*)key
,
229 shader
->prog_data
.vs
, s
,
230 program_id
, variant_id
,
233 case MESA_SHADER_FRAGMENT
:
234 shader
->prog_data
.fs
= rzalloc(shader
, struct v3d_fs_prog_data
);
236 qpu_insts
= v3d_compile_fs(vc5
->screen
->compiler
,
237 (struct v3d_fs_key
*)key
,
238 shader
->prog_data
.fs
, s
,
239 program_id
, variant_id
,
243 unreachable("bad stage");
246 vc5_set_shader_uniform_dirty_flags(shader
);
248 shader
->bo
= vc5_bo_alloc(vc5
->screen
, shader_size
, "shader");
249 vc5_bo_map(shader
->bo
);
250 memcpy(shader
->bo
->map
, qpu_insts
, shader_size
);
254 struct vc5_key
*dup_key
;
255 dup_key
= ralloc_size(shader
, key_size
);
256 memcpy(dup_key
, key
, key_size
);
257 _mesa_hash_table_insert(ht
, dup_key
, shader
);
263 vc5_setup_shared_key(struct vc5_context
*vc5
, struct v3d_key
*key
,
264 struct vc5_texture_stateobj
*texstate
)
266 const struct v3d_device_info
*devinfo
= &vc5
->screen
->devinfo
;
268 for (int i
= 0; i
< texstate
->num_textures
; i
++) {
269 struct pipe_sampler_view
*sampler
= texstate
->textures
[i
];
270 struct vc5_sampler_view
*vc5_sampler
= vc5_sampler_view(sampler
);
271 struct pipe_sampler_state
*sampler_state
=
272 texstate
->samplers
[i
];
277 key
->tex
[i
].return_size
=
278 vc5_get_tex_return_size(devinfo
,
280 sampler_state
->compare_mode
);
282 /* For 16-bit, we set up the sampler to always return 2
283 * channels (meaning no recompiles for most statechanges),
284 * while for 32 we actually scale the returns with channels.
286 if (key
->tex
[i
].return_size
== 16) {
287 key
->tex
[i
].return_channels
= 2;
288 } else if (devinfo
->ver
> 40) {
289 key
->tex
[i
].return_channels
= 4;
291 key
->tex
[i
].return_channels
=
292 vc5_get_tex_return_channels(devinfo
,
296 if (key
->tex
[i
].return_size
== 32 && devinfo
->ver
< 40) {
297 memcpy(key
->tex
[i
].swizzle
,
298 vc5_sampler
->swizzle
,
299 sizeof(vc5_sampler
->swizzle
));
301 /* For 16-bit returns, we let the sampler state handle
304 key
->tex
[i
].swizzle
[0] = PIPE_SWIZZLE_X
;
305 key
->tex
[i
].swizzle
[1] = PIPE_SWIZZLE_Y
;
306 key
->tex
[i
].swizzle
[2] = PIPE_SWIZZLE_Z
;
307 key
->tex
[i
].swizzle
[3] = PIPE_SWIZZLE_W
;
310 if (sampler
->texture
->nr_samples
> 1) {
311 key
->tex
[i
].msaa_width
= sampler
->texture
->width0
;
312 key
->tex
[i
].msaa_height
= sampler
->texture
->height0
;
314 key
->tex
[i
].compare_mode
= sampler_state
->compare_mode
;
315 key
->tex
[i
].compare_func
= sampler_state
->compare_func
;
316 key
->tex
[i
].clamp_s
=
317 sampler_state
->wrap_s
== PIPE_TEX_WRAP_CLAMP
;
318 key
->tex
[i
].clamp_t
=
319 sampler_state
->wrap_t
== PIPE_TEX_WRAP_CLAMP
;
320 key
->tex
[i
].clamp_r
=
321 sampler_state
->wrap_r
== PIPE_TEX_WRAP_CLAMP
;
325 key
->ucp_enables
= vc5
->rasterizer
->base
.clip_plane_enable
;
329 vc5_update_compiled_fs(struct vc5_context
*vc5
, uint8_t prim_mode
)
331 struct vc5_job
*job
= vc5
->job
;
332 struct v3d_fs_key local_key
;
333 struct v3d_fs_key
*key
= &local_key
;
335 if (!(vc5
->dirty
& (VC5_DIRTY_PRIM_MODE
|
337 VC5_DIRTY_FRAMEBUFFER
|
339 VC5_DIRTY_RASTERIZER
|
340 VC5_DIRTY_SAMPLE_MASK
|
342 VC5_DIRTY_UNCOMPILED_FS
))) {
346 memset(key
, 0, sizeof(*key
));
347 vc5_setup_shared_key(vc5
, &key
->base
, &vc5
->fragtex
);
348 key
->base
.shader_state
= vc5
->prog
.bind_fs
;
349 key
->is_points
= (prim_mode
== PIPE_PRIM_POINTS
);
350 key
->is_lines
= (prim_mode
>= PIPE_PRIM_LINES
&&
351 prim_mode
<= PIPE_PRIM_LINE_STRIP
);
352 key
->clamp_color
= vc5
->rasterizer
->base
.clamp_fragment_color
;
353 if (vc5
->blend
->logicop_enable
) {
354 key
->logicop_func
= vc5
->blend
->logicop_func
;
356 key
->logicop_func
= PIPE_LOGICOP_COPY
;
359 key
->msaa
= vc5
->rasterizer
->base
.multisample
;
360 key
->sample_coverage
= (vc5
->rasterizer
->base
.multisample
&&
361 vc5
->sample_mask
!= (1 << VC5_MAX_SAMPLES
) - 1);
362 key
->sample_alpha_to_coverage
= vc5
->blend
->alpha_to_coverage
;
363 key
->sample_alpha_to_one
= vc5
->blend
->alpha_to_one
;
366 key
->depth_enabled
= (vc5
->zsa
->base
.depth
.enabled
||
367 vc5
->zsa
->base
.stencil
[0].enabled
);
368 if (vc5
->zsa
->base
.alpha
.enabled
) {
369 key
->alpha_test
= true;
370 key
->alpha_test_func
= vc5
->zsa
->base
.alpha
.func
;
373 /* gl_FragColor's propagation to however many bound color buffers
374 * there are means that the buffer count needs to be in the key.
376 key
->nr_cbufs
= vc5
->framebuffer
.nr_cbufs
;
377 key
->swap_color_rb
= vc5
->swap_color_rb
;
379 for (int i
= 0; i
< key
->nr_cbufs
; i
++) {
380 struct pipe_surface
*cbuf
= vc5
->framebuffer
.cbufs
[i
];
381 const struct util_format_description
*desc
=
382 util_format_description(cbuf
->format
);
384 if (desc
->channel
[0].type
== UTIL_FORMAT_TYPE_FLOAT
&&
385 desc
->channel
[0].size
== 32) {
386 key
->f32_color_rb
|= 1 << i
;
390 if (key
->is_points
) {
391 key
->point_sprite_mask
=
392 vc5
->rasterizer
->base
.sprite_coord_enable
;
393 key
->point_coord_upper_left
=
394 (vc5
->rasterizer
->base
.sprite_coord_mode
==
395 PIPE_SPRITE_COORD_UPPER_LEFT
);
398 key
->light_twoside
= vc5
->rasterizer
->base
.light_twoside
;
399 key
->shade_model_flat
= vc5
->rasterizer
->base
.flatshade
;
401 struct vc5_compiled_shader
*old_fs
= vc5
->prog
.fs
;
402 vc5
->prog
.fs
= vc5_get_compiled_shader(vc5
, &key
->base
);
403 if (vc5
->prog
.fs
== old_fs
)
406 vc5
->dirty
|= VC5_DIRTY_COMPILED_FS
;
409 vc5
->prog
.fs
->prog_data
.fs
->flat_shade_flags
!=
410 old_fs
->prog_data
.fs
->flat_shade_flags
) {
411 vc5
->dirty
|= VC5_DIRTY_FLAT_SHADE_FLAGS
;
414 if (old_fs
&& memcmp(vc5
->prog
.fs
->prog_data
.fs
->input_slots
,
415 old_fs
->prog_data
.fs
->input_slots
,
416 sizeof(vc5
->prog
.fs
->prog_data
.fs
->input_slots
))) {
417 vc5
->dirty
|= VC5_DIRTY_FS_INPUTS
;
422 vc5_update_compiled_vs(struct vc5_context
*vc5
, uint8_t prim_mode
)
424 struct v3d_vs_key local_key
;
425 struct v3d_vs_key
*key
= &local_key
;
427 if (!(vc5
->dirty
& (VC5_DIRTY_PRIM_MODE
|
428 VC5_DIRTY_RASTERIZER
|
431 VC5_DIRTY_UNCOMPILED_VS
|
432 VC5_DIRTY_FS_INPUTS
))) {
436 memset(key
, 0, sizeof(*key
));
437 vc5_setup_shared_key(vc5
, &key
->base
, &vc5
->verttex
);
438 key
->base
.shader_state
= vc5
->prog
.bind_vs
;
439 key
->num_fs_inputs
= vc5
->prog
.fs
->prog_data
.fs
->base
.num_inputs
;
440 STATIC_ASSERT(sizeof(key
->fs_inputs
) ==
441 sizeof(vc5
->prog
.fs
->prog_data
.fs
->input_slots
));
442 memcpy(key
->fs_inputs
, vc5
->prog
.fs
->prog_data
.fs
->input_slots
,
443 sizeof(key
->fs_inputs
));
444 key
->clamp_color
= vc5
->rasterizer
->base
.clamp_vertex_color
;
446 key
->per_vertex_point_size
=
447 (prim_mode
== PIPE_PRIM_POINTS
&&
448 vc5
->rasterizer
->base
.point_size_per_vertex
);
450 struct vc5_compiled_shader
*vs
=
451 vc5_get_compiled_shader(vc5
, &key
->base
);
452 if (vs
!= vc5
->prog
.vs
) {
454 vc5
->dirty
|= VC5_DIRTY_COMPILED_VS
;
457 key
->is_coord
= true;
458 /* Coord shaders only output varyings used by transform feedback. */
459 struct vc5_uncompiled_shader
*shader_state
= key
->base
.shader_state
;
460 memcpy(key
->fs_inputs
, shader_state
->tf_outputs
,
461 sizeof(*key
->fs_inputs
) * shader_state
->num_tf_outputs
);
462 if (shader_state
->num_tf_outputs
< key
->num_fs_inputs
) {
463 memset(&key
->fs_inputs
[shader_state
->num_tf_outputs
],
465 sizeof(*key
->fs_inputs
) * (key
->num_fs_inputs
-
466 shader_state
->num_tf_outputs
));
468 key
->num_fs_inputs
= shader_state
->num_tf_outputs
;
470 struct vc5_compiled_shader
*cs
=
471 vc5_get_compiled_shader(vc5
, &key
->base
);
472 if (cs
!= vc5
->prog
.cs
) {
474 vc5
->dirty
|= VC5_DIRTY_COMPILED_CS
;
479 vc5_update_compiled_shaders(struct vc5_context
*vc5
, uint8_t prim_mode
)
481 vc5_update_compiled_fs(vc5
, prim_mode
);
482 vc5_update_compiled_vs(vc5
, prim_mode
);
486 fs_cache_hash(const void *key
)
488 return _mesa_hash_data(key
, sizeof(struct v3d_fs_key
));
492 vs_cache_hash(const void *key
)
494 return _mesa_hash_data(key
, sizeof(struct v3d_vs_key
));
498 fs_cache_compare(const void *key1
, const void *key2
)
500 return memcmp(key1
, key2
, sizeof(struct v3d_fs_key
)) == 0;
504 vs_cache_compare(const void *key1
, const void *key2
)
506 return memcmp(key1
, key2
, sizeof(struct v3d_vs_key
)) == 0;
510 delete_from_cache_if_matches(struct hash_table
*ht
,
511 struct vc5_compiled_shader
**last_compile
,
512 struct hash_entry
*entry
,
513 struct vc5_uncompiled_shader
*so
)
515 const struct v3d_key
*key
= entry
->key
;
517 if (key
->shader_state
== so
) {
518 struct vc5_compiled_shader
*shader
= entry
->data
;
519 _mesa_hash_table_remove(ht
, entry
);
520 vc5_bo_unreference(&shader
->bo
);
522 if (shader
== *last_compile
)
523 *last_compile
= NULL
;
530 vc5_shader_state_delete(struct pipe_context
*pctx
, void *hwcso
)
532 struct vc5_context
*vc5
= vc5_context(pctx
);
533 struct vc5_uncompiled_shader
*so
= hwcso
;
535 struct hash_entry
*entry
;
536 hash_table_foreach(vc5
->fs_cache
, entry
) {
537 delete_from_cache_if_matches(vc5
->fs_cache
, &vc5
->prog
.fs
,
540 hash_table_foreach(vc5
->vs_cache
, entry
) {
541 delete_from_cache_if_matches(vc5
->vs_cache
, &vc5
->prog
.vs
,
545 ralloc_free(so
->base
.ir
.nir
);
550 vc5_fp_state_bind(struct pipe_context
*pctx
, void *hwcso
)
552 struct vc5_context
*vc5
= vc5_context(pctx
);
553 vc5
->prog
.bind_fs
= hwcso
;
554 vc5
->dirty
|= VC5_DIRTY_UNCOMPILED_FS
;
558 vc5_vp_state_bind(struct pipe_context
*pctx
, void *hwcso
)
560 struct vc5_context
*vc5
= vc5_context(pctx
);
561 vc5
->prog
.bind_vs
= hwcso
;
562 vc5
->dirty
|= VC5_DIRTY_UNCOMPILED_VS
;
566 vc5_program_init(struct pipe_context
*pctx
)
568 struct vc5_context
*vc5
= vc5_context(pctx
);
570 pctx
->create_vs_state
= vc5_shader_state_create
;
571 pctx
->delete_vs_state
= vc5_shader_state_delete
;
573 pctx
->create_fs_state
= vc5_shader_state_create
;
574 pctx
->delete_fs_state
= vc5_shader_state_delete
;
576 pctx
->bind_fs_state
= vc5_fp_state_bind
;
577 pctx
->bind_vs_state
= vc5_vp_state_bind
;
579 vc5
->fs_cache
= _mesa_hash_table_create(pctx
, fs_cache_hash
,
581 vc5
->vs_cache
= _mesa_hash_table_create(pctx
, vs_cache_hash
,
586 vc5_program_fini(struct pipe_context
*pctx
)
588 struct vc5_context
*vc5
= vc5_context(pctx
);
590 struct hash_entry
*entry
;
591 hash_table_foreach(vc5
->fs_cache
, entry
) {
592 struct vc5_compiled_shader
*shader
= entry
->data
;
593 vc5_bo_unreference(&shader
->bo
);
595 _mesa_hash_table_remove(vc5
->fs_cache
, entry
);
598 hash_table_foreach(vc5
->vs_cache
, entry
) {
599 struct vc5_compiled_shader
*shader
= entry
->data
;
600 vc5_bo_unreference(&shader
->bo
);
602 _mesa_hash_table_remove(vc5
->vs_cache
, entry
);