2 * Copyright © 2014-2017 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "util/format/u_format.h"
26 #include "util/u_math.h"
27 #include "util/u_memory.h"
28 #include "util/ralloc.h"
29 #include "util/hash_table.h"
30 #include "util/u_upload_mgr.h"
31 #include "tgsi/tgsi_dump.h"
32 #include "tgsi/tgsi_parse.h"
33 #include "compiler/nir/nir.h"
34 #include "compiler/nir/nir_builder.h"
35 #include "nir/tgsi_to_nir.h"
36 #include "compiler/v3d_compiler.h"
37 #include "v3d_context.h"
38 #include "broadcom/cle/v3d_packet_v33_pack.h"
40 static struct v3d_compiled_shader
*
41 v3d_get_compiled_shader(struct v3d_context
*v3d
,
42 struct v3d_key
*key
, size_t key_size
);
44 v3d_setup_shared_precompile_key(struct v3d_uncompiled_shader
*uncompiled
,
47 static gl_varying_slot
48 v3d_get_slot_for_driver_location(nir_shader
*s
, uint32_t driver_location
)
50 nir_foreach_variable(var
, &s
->outputs
) {
51 if (var
->data
.driver_location
== driver_location
) {
52 return var
->data
.location
;
60 * Precomputes the TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC array for the shader.
62 * A shader can have 16 of these specs, and each one of them can write up to
63 * 16 dwords. Since we allow a total of 64 transform feedback output
64 * components (not 16 vectors), we have to group the writes of multiple
65 * varyings together in a single data spec.
68 v3d_set_transform_feedback_outputs(struct v3d_uncompiled_shader
*so
,
69 const struct pipe_stream_output_info
*stream_output
)
71 if (!stream_output
->num_outputs
)
74 struct v3d_varying_slot slots
[PIPE_MAX_SO_OUTPUTS
* 4];
77 for (int buffer
= 0; buffer
< PIPE_MAX_SO_BUFFERS
; buffer
++) {
78 uint32_t buffer_offset
= 0;
79 uint32_t vpm_start
= slot_count
;
81 for (int i
= 0; i
< stream_output
->num_outputs
; i
++) {
82 const struct pipe_stream_output
*output
=
83 &stream_output
->output
[i
];
85 if (output
->output_buffer
!= buffer
)
88 /* We assume that the SO outputs appear in increasing
89 * order in the buffer.
91 assert(output
->dst_offset
>= buffer_offset
);
93 /* Pad any undefined slots in the output */
94 for (int j
= buffer_offset
; j
< output
->dst_offset
; j
++) {
96 v3d_slot_from_slot_and_component(VARYING_SLOT_POS
, 0);
101 /* Set the coordinate shader up to output the
102 * components of this varying.
104 for (int j
= 0; j
< output
->num_components
; j
++) {
105 gl_varying_slot slot
=
106 v3d_get_slot_for_driver_location(so
->base
.ir
.nir
, output
->register_index
);
109 v3d_slot_from_slot_and_component(slot
,
110 output
->start_component
+ j
);
116 uint32_t vpm_size
= slot_count
- vpm_start
;
120 uint32_t vpm_start_offset
= vpm_start
+ 6;
123 uint32_t write_size
= MIN2(vpm_size
, 1 << 4);
125 struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked
= {
126 /* We need the offset from the coordinate shader's VPM
127 * output block, which has the [X, Y, Z, W, Xs, Ys]
128 * values at the start.
130 .first_shaded_vertex_value_to_output
= vpm_start_offset
,
131 .number_of_consecutive_vertex_values_to_output_as_32_bit_values
= write_size
,
132 .output_buffer_to_write_to
= buffer
,
136 assert(unpacked
.first_shaded_vertex_value_to_output
!= 8 ||
137 so
->num_tf_specs
!= 0);
139 assert(so
->num_tf_specs
!= ARRAY_SIZE(so
->tf_specs
));
140 V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL
,
141 (void *)&so
->tf_specs
[so
->num_tf_specs
],
144 /* If point size is being written by the shader, then
145 * all the VPM start offsets are shifted up by one.
146 * We won't know that until the variant is compiled,
149 unpacked
.first_shaded_vertex_value_to_output
++;
152 assert(unpacked
.first_shaded_vertex_value_to_output
!= 8 ||
153 so
->num_tf_specs
!= 0);
155 V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL
,
156 (void *)&so
->tf_specs_psiz
[so
->num_tf_specs
],
159 vpm_start_offset
+= write_size
;
160 vpm_size
-= write_size
;
162 so
->base
.stream_output
.stride
[buffer
] =
163 stream_output
->stride
[buffer
];
166 so
->num_tf_outputs
= slot_count
;
167 so
->tf_outputs
= ralloc_array(so
->base
.ir
.nir
, struct v3d_varying_slot
,
169 memcpy(so
->tf_outputs
, slots
, sizeof(*slots
) * slot_count
);
173 type_size(const struct glsl_type
*type
, bool bindless
)
175 return glsl_count_attribute_slots(type
, false);
179 * Precompiles a shader variant at shader state creation time if
180 * V3D_DEBUG=precompile is set. Used for shader-db
181 * (https://gitlab.freedesktop.org/mesa/shader-db)
184 v3d_shader_precompile(struct v3d_context
*v3d
,
185 struct v3d_uncompiled_shader
*so
)
187 nir_shader
*s
= so
->base
.ir
.nir
;
189 if (s
->info
.stage
== MESA_SHADER_FRAGMENT
) {
190 struct v3d_fs_key key
= {
191 .base
.shader_state
= so
,
194 nir_foreach_variable(var
, &s
->outputs
) {
195 if (var
->data
.location
== FRAG_RESULT_COLOR
) {
197 } else if (var
->data
.location
>= FRAG_RESULT_DATA0
) {
198 key
.cbufs
|= 1 << (var
->data
.location
-
203 key
.logicop_func
= PIPE_LOGICOP_COPY
;
205 v3d_setup_shared_precompile_key(so
, &key
.base
);
206 v3d_get_compiled_shader(v3d
, &key
.base
, sizeof(key
));
208 /* FIXME: add geometry shaders */
210 struct v3d_vs_key key
= {
211 .base
.shader_state
= so
,
212 /* Emit fixed function outputs */
213 .base
.is_last_geometry_stage
= true,
216 v3d_setup_shared_precompile_key(so
, &key
.base
);
218 /* Compile VS: All outputs */
219 nir_foreach_variable(var
, &s
->outputs
) {
220 unsigned array_len
= MAX2(glsl_get_length(var
->type
), 1);
221 assert(array_len
== 1);
224 int slot
= var
->data
.location
;
225 for (int i
= 0; i
< glsl_get_components(var
->type
); i
++) {
226 int swiz
= var
->data
.location_frac
+ i
;
227 key
.used_outputs
[key
.num_used_outputs
++] =
228 v3d_slot_from_slot_and_component(slot
,
233 v3d_get_compiled_shader(v3d
, &key
.base
, sizeof(key
));
235 /* Compile VS bin shader: only position (XXX: include TF) */
237 key
.num_used_outputs
= 0;
238 for (int i
= 0; i
< 4; i
++) {
239 key
.used_outputs
[key
.num_used_outputs
++] =
240 v3d_slot_from_slot_and_component(VARYING_SLOT_POS
,
243 v3d_get_compiled_shader(v3d
, &key
.base
, sizeof(key
));
248 v3d_uncompiled_shader_create(struct pipe_context
*pctx
,
249 enum pipe_shader_ir type
, void *ir
)
251 struct v3d_context
*v3d
= v3d_context(pctx
);
252 struct v3d_uncompiled_shader
*so
= CALLOC_STRUCT(v3d_uncompiled_shader
);
256 so
->program_id
= v3d
->next_uncompiled_program_id
++;
260 if (type
== PIPE_SHADER_IR_NIR
) {
261 /* The backend takes ownership of the NIR shader on state
266 assert(type
== PIPE_SHADER_IR_TGSI
);
268 if (V3D_DEBUG
& V3D_DEBUG_TGSI
) {
269 fprintf(stderr
, "prog %d TGSI:\n",
272 fprintf(stderr
, "\n");
274 s
= tgsi_to_nir(ir
, pctx
->screen
);
277 nir_variable_mode lower_mode
= nir_var_all
& ~nir_var_uniform
;
278 if (s
->info
.stage
== MESA_SHADER_VERTEX
||
279 s
->info
.stage
== MESA_SHADER_GEOMETRY
) {
280 lower_mode
&= ~(nir_var_shader_in
| nir_var_shader_out
);
282 NIR_PASS_V(s
, nir_lower_io
, lower_mode
,
284 (nir_lower_io_options
)0);
286 NIR_PASS_V(s
, nir_lower_regs_to_ssa
);
287 NIR_PASS_V(s
, nir_normalize_cubemap_coords
);
289 NIR_PASS_V(s
, nir_lower_load_const_to_scalar
);
293 NIR_PASS_V(s
, nir_remove_dead_variables
, nir_var_function_temp
);
295 /* Garbage collect dead instructions */
298 so
->base
.type
= PIPE_SHADER_IR_NIR
;
301 if (V3D_DEBUG
& (V3D_DEBUG_NIR
|
302 v3d_debug_flag_for_shader_stage(s
->info
.stage
))) {
303 fprintf(stderr
, "%s prog %d NIR:\n",
304 gl_shader_stage_name(s
->info
.stage
),
306 nir_print_shader(s
, stderr
);
307 fprintf(stderr
, "\n");
310 if (V3D_DEBUG
& V3D_DEBUG_PRECOMPILE
)
311 v3d_shader_precompile(v3d
, so
);
317 v3d_shader_debug_output(const char *message
, void *data
)
319 struct v3d_context
*v3d
= data
;
321 pipe_debug_message(&v3d
->debug
, SHADER_INFO
, "%s", message
);
325 v3d_shader_state_create(struct pipe_context
*pctx
,
326 const struct pipe_shader_state
*cso
)
328 struct v3d_uncompiled_shader
*so
=
329 v3d_uncompiled_shader_create(pctx
,
331 (cso
->type
== PIPE_SHADER_IR_TGSI
?
332 (void *)cso
->tokens
:
335 v3d_set_transform_feedback_outputs(so
, &cso
->stream_output
);
340 struct v3d_compiled_shader
*
341 v3d_get_compiled_shader(struct v3d_context
*v3d
,
345 struct v3d_uncompiled_shader
*shader_state
= key
->shader_state
;
346 nir_shader
*s
= shader_state
->base
.ir
.nir
;
348 struct hash_table
*ht
= v3d
->prog
.cache
[s
->info
.stage
];
349 struct hash_entry
*entry
= _mesa_hash_table_search(ht
, key
);
353 struct v3d_compiled_shader
*shader
=
354 rzalloc(NULL
, struct v3d_compiled_shader
);
356 int program_id
= shader_state
->program_id
;
358 p_atomic_inc_return(&shader_state
->compiled_variant_count
);
360 uint32_t shader_size
;
362 qpu_insts
= v3d_compile(v3d
->screen
->compiler
, key
,
363 &shader
->prog_data
.base
, s
,
364 v3d_shader_debug_output
,
366 program_id
, variant_id
, &shader_size
);
367 ralloc_steal(shader
, shader
->prog_data
.base
);
369 v3d_set_shader_uniform_dirty_flags(shader
);
372 u_upload_data(v3d
->state_uploader
, 0, shader_size
, 8,
373 qpu_insts
, &shader
->offset
, &shader
->resource
);
379 struct v3d_key
*dup_key
;
380 dup_key
= ralloc_size(shader
, key_size
);
381 memcpy(dup_key
, key
, key_size
);
382 _mesa_hash_table_insert(ht
, dup_key
, shader
);
385 if (shader
->prog_data
.base
->spill_size
>
386 v3d
->prog
.spill_size_per_thread
) {
387 /* The TIDX register we use for choosing the area to access
388 * for scratch space is: (core << 6) | (qpu << 2) | thread.
389 * Even at minimum threadcount in a particular shader, that
390 * means we still multiply by qpus by 4.
392 int total_spill_size
= (v3d
->screen
->devinfo
.qpu_count
* 4 *
393 shader
->prog_data
.base
->spill_size
);
395 v3d_bo_unreference(&v3d
->prog
.spill_bo
);
396 v3d
->prog
.spill_bo
= v3d_bo_alloc(v3d
->screen
,
397 total_spill_size
, "spill");
398 v3d
->prog
.spill_size_per_thread
=
399 shader
->prog_data
.base
->spill_size
;
406 v3d_free_compiled_shader(struct v3d_compiled_shader
*shader
)
408 pipe_resource_reference(&shader
->resource
, NULL
);
413 v3d_setup_shared_key(struct v3d_context
*v3d
, struct v3d_key
*key
,
414 struct v3d_texture_stateobj
*texstate
)
416 const struct v3d_device_info
*devinfo
= &v3d
->screen
->devinfo
;
418 for (int i
= 0; i
< texstate
->num_textures
; i
++) {
419 struct pipe_sampler_view
*sampler
= texstate
->textures
[i
];
420 struct v3d_sampler_view
*v3d_sampler
= v3d_sampler_view(sampler
);
421 struct pipe_sampler_state
*sampler_state
=
422 texstate
->samplers
[i
];
427 key
->tex
[i
].return_size
=
428 v3d_get_tex_return_size(devinfo
,
430 sampler_state
->compare_mode
);
432 /* For 16-bit, we set up the sampler to always return 2
433 * channels (meaning no recompiles for most statechanges),
434 * while for 32 we actually scale the returns with channels.
436 if (key
->tex
[i
].return_size
== 16) {
437 key
->tex
[i
].return_channels
= 2;
438 } else if (devinfo
->ver
> 40) {
439 key
->tex
[i
].return_channels
= 4;
441 key
->tex
[i
].return_channels
=
442 v3d_get_tex_return_channels(devinfo
,
446 if (key
->tex
[i
].return_size
== 32 && devinfo
->ver
< 40) {
447 memcpy(key
->tex
[i
].swizzle
,
448 v3d_sampler
->swizzle
,
449 sizeof(v3d_sampler
->swizzle
));
451 /* For 16-bit returns, we let the sampler state handle
454 key
->tex
[i
].swizzle
[0] = PIPE_SWIZZLE_X
;
455 key
->tex
[i
].swizzle
[1] = PIPE_SWIZZLE_Y
;
456 key
->tex
[i
].swizzle
[2] = PIPE_SWIZZLE_Z
;
457 key
->tex
[i
].swizzle
[3] = PIPE_SWIZZLE_W
;
461 key
->tex
[i
].clamp_s
=
462 sampler_state
->wrap_s
== PIPE_TEX_WRAP_CLAMP
;
463 key
->tex
[i
].clamp_t
=
464 sampler_state
->wrap_t
== PIPE_TEX_WRAP_CLAMP
;
465 key
->tex
[i
].clamp_r
=
466 sampler_state
->wrap_r
== PIPE_TEX_WRAP_CLAMP
;
472 v3d_setup_shared_precompile_key(struct v3d_uncompiled_shader
*uncompiled
,
475 nir_shader
*s
= uncompiled
->base
.ir
.nir
;
477 for (int i
= 0; i
< s
->info
.num_textures
; i
++) {
478 key
->tex
[i
].return_size
= 16;
479 key
->tex
[i
].return_channels
= 2;
481 key
->tex
[i
].swizzle
[0] = PIPE_SWIZZLE_X
;
482 key
->tex
[i
].swizzle
[1] = PIPE_SWIZZLE_Y
;
483 key
->tex
[i
].swizzle
[2] = PIPE_SWIZZLE_Z
;
484 key
->tex
[i
].swizzle
[3] = PIPE_SWIZZLE_W
;
489 v3d_update_compiled_fs(struct v3d_context
*v3d
, uint8_t prim_mode
)
491 struct v3d_job
*job
= v3d
->job
;
492 struct v3d_fs_key local_key
;
493 struct v3d_fs_key
*key
= &local_key
;
494 nir_shader
*s
= v3d
->prog
.bind_fs
->base
.ir
.nir
;
496 if (!(v3d
->dirty
& (VC5_DIRTY_PRIM_MODE
|
498 VC5_DIRTY_FRAMEBUFFER
|
500 VC5_DIRTY_RASTERIZER
|
501 VC5_DIRTY_SAMPLE_STATE
|
503 VC5_DIRTY_UNCOMPILED_FS
))) {
507 memset(key
, 0, sizeof(*key
));
508 v3d_setup_shared_key(v3d
, &key
->base
, &v3d
->tex
[PIPE_SHADER_FRAGMENT
]);
509 key
->base
.shader_state
= v3d
->prog
.bind_fs
;
510 key
->base
.ucp_enables
= v3d
->rasterizer
->base
.clip_plane_enable
;
511 key
->is_points
= (prim_mode
== PIPE_PRIM_POINTS
);
512 key
->is_lines
= (prim_mode
>= PIPE_PRIM_LINES
&&
513 prim_mode
<= PIPE_PRIM_LINE_STRIP
);
514 key
->clamp_color
= v3d
->rasterizer
->base
.clamp_fragment_color
;
515 if (v3d
->blend
->base
.logicop_enable
) {
516 key
->logicop_func
= v3d
->blend
->base
.logicop_func
;
518 key
->logicop_func
= PIPE_LOGICOP_COPY
;
521 key
->msaa
= v3d
->rasterizer
->base
.multisample
;
522 key
->sample_coverage
= (v3d
->rasterizer
->base
.multisample
&&
523 v3d
->sample_mask
!= (1 << V3D_MAX_SAMPLES
) - 1);
524 key
->sample_alpha_to_coverage
= v3d
->blend
->base
.alpha_to_coverage
;
525 key
->sample_alpha_to_one
= v3d
->blend
->base
.alpha_to_one
;
528 key
->depth_enabled
= (v3d
->zsa
->base
.depth
.enabled
||
529 v3d
->zsa
->base
.stencil
[0].enabled
);
530 if (v3d
->zsa
->base
.alpha
.enabled
) {
531 key
->alpha_test
= true;
532 key
->alpha_test_func
= v3d
->zsa
->base
.alpha
.func
;
535 key
->swap_color_rb
= v3d
->swap_color_rb
;
537 for (int i
= 0; i
< v3d
->framebuffer
.nr_cbufs
; i
++) {
538 struct pipe_surface
*cbuf
= v3d
->framebuffer
.cbufs
[i
];
542 /* gl_FragColor's propagation to however many bound color
543 * buffers there are means that the shader compile needs to
544 * know what buffers are present.
546 key
->cbufs
|= 1 << i
;
548 /* If logic operations are enabled then we might emit color
549 * reads and we need to know the color buffer format and
552 if (key
->logicop_func
!= PIPE_LOGICOP_COPY
) {
553 key
->color_fmt
[i
].format
= cbuf
->format
;
554 key
->color_fmt
[i
].swizzle
=
555 v3d_get_format_swizzle(&v3d
->screen
->devinfo
,
559 const struct util_format_description
*desc
=
560 util_format_description(cbuf
->format
);
562 if (desc
->channel
[0].type
== UTIL_FORMAT_TYPE_FLOAT
&&
563 desc
->channel
[0].size
== 32) {
564 key
->f32_color_rb
|= 1 << i
;
567 if (s
->info
.fs
.untyped_color_outputs
) {
568 if (util_format_is_pure_uint(cbuf
->format
))
569 key
->uint_color_rb
|= 1 << i
;
570 else if (util_format_is_pure_sint(cbuf
->format
))
571 key
->int_color_rb
|= 1 << i
;
575 if (key
->is_points
) {
576 key
->point_sprite_mask
=
577 v3d
->rasterizer
->base
.sprite_coord_enable
;
578 key
->point_coord_upper_left
=
579 (v3d
->rasterizer
->base
.sprite_coord_mode
==
580 PIPE_SPRITE_COORD_UPPER_LEFT
);
583 key
->light_twoside
= v3d
->rasterizer
->base
.light_twoside
;
584 key
->shade_model_flat
= v3d
->rasterizer
->base
.flatshade
;
586 struct v3d_compiled_shader
*old_fs
= v3d
->prog
.fs
;
587 v3d
->prog
.fs
= v3d_get_compiled_shader(v3d
, &key
->base
, sizeof(*key
));
588 if (v3d
->prog
.fs
== old_fs
)
591 v3d
->dirty
|= VC5_DIRTY_COMPILED_FS
;
594 if (v3d
->prog
.fs
->prog_data
.fs
->flat_shade_flags
!=
595 old_fs
->prog_data
.fs
->flat_shade_flags
) {
596 v3d
->dirty
|= VC5_DIRTY_FLAT_SHADE_FLAGS
;
599 if (v3d
->prog
.fs
->prog_data
.fs
->noperspective_flags
!=
600 old_fs
->prog_data
.fs
->noperspective_flags
) {
601 v3d
->dirty
|= VC5_DIRTY_NOPERSPECTIVE_FLAGS
;
604 if (v3d
->prog
.fs
->prog_data
.fs
->centroid_flags
!=
605 old_fs
->prog_data
.fs
->centroid_flags
) {
606 v3d
->dirty
|= VC5_DIRTY_CENTROID_FLAGS
;
610 if (old_fs
&& memcmp(v3d
->prog
.fs
->prog_data
.fs
->input_slots
,
611 old_fs
->prog_data
.fs
->input_slots
,
612 sizeof(v3d
->prog
.fs
->prog_data
.fs
->input_slots
))) {
613 v3d
->dirty
|= VC5_DIRTY_FS_INPUTS
;
618 v3d_update_compiled_gs(struct v3d_context
*v3d
, uint8_t prim_mode
)
620 struct v3d_gs_key local_key
;
621 struct v3d_gs_key
*key
= &local_key
;
623 if (!(v3d
->dirty
& (VC5_DIRTY_GEOMTEX
|
624 VC5_DIRTY_RASTERIZER
|
625 VC5_DIRTY_UNCOMPILED_GS
|
626 VC5_DIRTY_PRIM_MODE
|
627 VC5_DIRTY_FS_INPUTS
))) {
631 if (!v3d
->prog
.bind_gs
) {
633 v3d
->prog
.gs_bin
= NULL
;
637 memset(key
, 0, sizeof(*key
));
638 v3d_setup_shared_key(v3d
, &key
->base
, &v3d
->tex
[PIPE_SHADER_GEOMETRY
]);
639 key
->base
.shader_state
= v3d
->prog
.bind_gs
;
640 key
->base
.ucp_enables
= v3d
->rasterizer
->base
.clip_plane_enable
;
641 key
->base
.is_last_geometry_stage
= true;
642 key
->num_used_outputs
= v3d
->prog
.fs
->prog_data
.fs
->num_inputs
;
643 STATIC_ASSERT(sizeof(key
->used_outputs
) ==
644 sizeof(v3d
->prog
.fs
->prog_data
.fs
->input_slots
));
645 memcpy(key
->used_outputs
, v3d
->prog
.fs
->prog_data
.fs
->input_slots
,
646 sizeof(key
->used_outputs
));
648 key
->per_vertex_point_size
=
649 (prim_mode
== PIPE_PRIM_POINTS
&&
650 v3d
->rasterizer
->base
.point_size_per_vertex
);
652 struct v3d_compiled_shader
*gs
=
653 v3d_get_compiled_shader(v3d
, &key
->base
, sizeof(*key
));
654 if (gs
!= v3d
->prog
.gs
) {
656 v3d
->dirty
|= VC5_DIRTY_COMPILED_GS
;
659 key
->is_coord
= true;
661 /* The last bin-mode shader in the geometry pipeline only outputs
662 * varyings used by transform feedback.
664 struct v3d_uncompiled_shader
*shader_state
= key
->base
.shader_state
;
665 memcpy(key
->used_outputs
, shader_state
->tf_outputs
,
666 sizeof(*key
->used_outputs
) * shader_state
->num_tf_outputs
);
667 if (shader_state
->num_tf_outputs
< key
->num_used_outputs
) {
668 uint32_t size
= sizeof(*key
->used_outputs
) *
669 (key
->num_used_outputs
-
670 shader_state
->num_tf_outputs
);
671 memset(&key
->used_outputs
[shader_state
->num_tf_outputs
],
674 key
->num_used_outputs
= shader_state
->num_tf_outputs
;
676 struct v3d_compiled_shader
*old_gs
= v3d
->prog
.gs
;
677 struct v3d_compiled_shader
*gs_bin
=
678 v3d_get_compiled_shader(v3d
, &key
->base
, sizeof(*key
));
679 if (gs_bin
!= old_gs
) {
680 v3d
->prog
.gs_bin
= gs_bin
;
681 v3d
->dirty
|= VC5_DIRTY_COMPILED_GS_BIN
;
684 if (old_gs
&& memcmp(v3d
->prog
.gs
->prog_data
.gs
->input_slots
,
685 old_gs
->prog_data
.gs
->input_slots
,
686 sizeof(v3d
->prog
.gs
->prog_data
.gs
->input_slots
))) {
687 v3d
->dirty
|= VC5_DIRTY_GS_INPUTS
;
692 v3d_update_compiled_vs(struct v3d_context
*v3d
, uint8_t prim_mode
)
694 struct v3d_vs_key local_key
;
695 struct v3d_vs_key
*key
= &local_key
;
697 if (!(v3d
->dirty
& (VC5_DIRTY_VERTTEX
|
699 VC5_DIRTY_UNCOMPILED_VS
|
700 (v3d
->prog
.bind_gs
? 0 : VC5_DIRTY_RASTERIZER
) |
701 (v3d
->prog
.bind_gs
? 0 : VC5_DIRTY_PRIM_MODE
) |
702 (v3d
->prog
.bind_gs
? VC5_DIRTY_GS_INPUTS
:
703 VC5_DIRTY_FS_INPUTS
)))) {
707 memset(key
, 0, sizeof(*key
));
708 v3d_setup_shared_key(v3d
, &key
->base
, &v3d
->tex
[PIPE_SHADER_VERTEX
]);
709 key
->base
.shader_state
= v3d
->prog
.bind_vs
;
710 key
->base
.ucp_enables
= v3d
->rasterizer
->base
.clip_plane_enable
;
711 key
->base
.is_last_geometry_stage
= !v3d
->prog
.bind_gs
;
713 if (!v3d
->prog
.bind_gs
) {
714 key
->num_used_outputs
= v3d
->prog
.fs
->prog_data
.fs
->num_inputs
;
715 STATIC_ASSERT(sizeof(key
->used_outputs
) ==
716 sizeof(v3d
->prog
.fs
->prog_data
.fs
->input_slots
));
717 memcpy(key
->used_outputs
, v3d
->prog
.fs
->prog_data
.fs
->input_slots
,
718 sizeof(key
->used_outputs
));
720 key
->num_used_outputs
= v3d
->prog
.gs
->prog_data
.gs
->num_inputs
;
721 STATIC_ASSERT(sizeof(key
->used_outputs
) ==
722 sizeof(v3d
->prog
.gs
->prog_data
.gs
->input_slots
));
723 memcpy(key
->used_outputs
, v3d
->prog
.gs
->prog_data
.gs
->input_slots
,
724 sizeof(key
->used_outputs
));
727 key
->clamp_color
= v3d
->rasterizer
->base
.clamp_vertex_color
;
729 key
->per_vertex_point_size
=
730 (prim_mode
== PIPE_PRIM_POINTS
&&
731 v3d
->rasterizer
->base
.point_size_per_vertex
);
733 struct v3d_compiled_shader
*vs
=
734 v3d_get_compiled_shader(v3d
, &key
->base
, sizeof(*key
));
735 if (vs
!= v3d
->prog
.vs
) {
737 v3d
->dirty
|= VC5_DIRTY_COMPILED_VS
;
740 key
->is_coord
= true;
742 /* Coord shaders only output varyings used by transform feedback,
743 * unless they are linked to other shaders in the geometry side
744 * of the pipeline, since in that case any of the output varyings
745 * could be required in later geometry stages to compute
746 * gl_Position or TF outputs.
748 if (!v3d
->prog
.bind_gs
) {
749 struct v3d_uncompiled_shader
*shader_state
=
750 key
->base
.shader_state
;
751 memcpy(key
->used_outputs
, shader_state
->tf_outputs
,
752 sizeof(*key
->used_outputs
) *
753 shader_state
->num_tf_outputs
);
754 if (shader_state
->num_tf_outputs
< key
->num_used_outputs
) {
755 uint32_t tail_bytes
=
756 sizeof(*key
->used_outputs
) *
757 (key
->num_used_outputs
-
758 shader_state
->num_tf_outputs
);
759 memset(&key
->used_outputs
[shader_state
->num_tf_outputs
],
762 key
->num_used_outputs
= shader_state
->num_tf_outputs
;
765 struct v3d_compiled_shader
*cs
=
766 v3d_get_compiled_shader(v3d
, &key
->base
, sizeof(*key
));
767 if (cs
!= v3d
->prog
.cs
) {
769 v3d
->dirty
|= VC5_DIRTY_COMPILED_CS
;
774 v3d_update_compiled_shaders(struct v3d_context
*v3d
, uint8_t prim_mode
)
776 v3d_update_compiled_fs(v3d
, prim_mode
);
777 v3d_update_compiled_gs(v3d
, prim_mode
);
778 v3d_update_compiled_vs(v3d
, prim_mode
);
782 v3d_update_compiled_cs(struct v3d_context
*v3d
)
784 struct v3d_key local_key
;
785 struct v3d_key
*key
= &local_key
;
787 if (!(v3d
->dirty
& (VC5_DIRTY_UNCOMPILED_CS
|
788 VC5_DIRTY_COMPTEX
))) {
792 memset(key
, 0, sizeof(*key
));
793 v3d_setup_shared_key(v3d
, key
, &v3d
->tex
[PIPE_SHADER_COMPUTE
]);
794 key
->shader_state
= v3d
->prog
.bind_compute
;
796 struct v3d_compiled_shader
*cs
=
797 v3d_get_compiled_shader(v3d
, key
, sizeof(*key
));
798 if (cs
!= v3d
->prog
.compute
) {
799 v3d
->prog
.compute
= cs
;
800 v3d
->dirty
|= VC5_DIRTY_COMPILED_CS
; /* XXX */
805 fs_cache_hash(const void *key
)
807 return _mesa_hash_data(key
, sizeof(struct v3d_fs_key
));
811 gs_cache_hash(const void *key
)
813 return _mesa_hash_data(key
, sizeof(struct v3d_gs_key
));
817 vs_cache_hash(const void *key
)
819 return _mesa_hash_data(key
, sizeof(struct v3d_vs_key
));
823 cs_cache_hash(const void *key
)
825 return _mesa_hash_data(key
, sizeof(struct v3d_key
));
829 fs_cache_compare(const void *key1
, const void *key2
)
831 return memcmp(key1
, key2
, sizeof(struct v3d_fs_key
)) == 0;
835 gs_cache_compare(const void *key1
, const void *key2
)
837 return memcmp(key1
, key2
, sizeof(struct v3d_gs_key
)) == 0;
841 vs_cache_compare(const void *key1
, const void *key2
)
843 return memcmp(key1
, key2
, sizeof(struct v3d_vs_key
)) == 0;
847 cs_cache_compare(const void *key1
, const void *key2
)
849 return memcmp(key1
, key2
, sizeof(struct v3d_key
)) == 0;
853 v3d_shader_state_delete(struct pipe_context
*pctx
, void *hwcso
)
855 struct v3d_context
*v3d
= v3d_context(pctx
);
856 struct v3d_uncompiled_shader
*so
= hwcso
;
857 nir_shader
*s
= so
->base
.ir
.nir
;
859 hash_table_foreach(v3d
->prog
.cache
[s
->info
.stage
], entry
) {
860 const struct v3d_key
*key
= entry
->key
;
861 struct v3d_compiled_shader
*shader
= entry
->data
;
863 if (key
->shader_state
!= so
)
866 if (v3d
->prog
.fs
== shader
)
868 if (v3d
->prog
.vs
== shader
)
870 if (v3d
->prog
.cs
== shader
)
872 if (v3d
->prog
.compute
== shader
)
873 v3d
->prog
.compute
= NULL
;
875 _mesa_hash_table_remove(v3d
->prog
.cache
[s
->info
.stage
], entry
);
876 v3d_free_compiled_shader(shader
);
879 ralloc_free(so
->base
.ir
.nir
);
884 v3d_fp_state_bind(struct pipe_context
*pctx
, void *hwcso
)
886 struct v3d_context
*v3d
= v3d_context(pctx
);
887 v3d
->prog
.bind_fs
= hwcso
;
888 v3d
->dirty
|= VC5_DIRTY_UNCOMPILED_FS
;
892 v3d_gp_state_bind(struct pipe_context
*pctx
, void *hwcso
)
894 struct v3d_context
*v3d
= v3d_context(pctx
);
895 v3d
->prog
.bind_gs
= hwcso
;
896 v3d
->dirty
|= VC5_DIRTY_UNCOMPILED_GS
;
900 v3d_vp_state_bind(struct pipe_context
*pctx
, void *hwcso
)
902 struct v3d_context
*v3d
= v3d_context(pctx
);
903 v3d
->prog
.bind_vs
= hwcso
;
904 v3d
->dirty
|= VC5_DIRTY_UNCOMPILED_VS
;
908 v3d_compute_state_bind(struct pipe_context
*pctx
, void *state
)
910 struct v3d_context
*v3d
= v3d_context(pctx
);
912 v3d
->prog
.bind_compute
= state
;
913 v3d
->dirty
|= VC5_DIRTY_UNCOMPILED_CS
;
917 v3d_create_compute_state(struct pipe_context
*pctx
,
918 const struct pipe_compute_state
*cso
)
920 return v3d_uncompiled_shader_create(pctx
, cso
->ir_type
,
925 v3d_program_init(struct pipe_context
*pctx
)
927 struct v3d_context
*v3d
= v3d_context(pctx
);
929 pctx
->create_vs_state
= v3d_shader_state_create
;
930 pctx
->delete_vs_state
= v3d_shader_state_delete
;
932 pctx
->create_gs_state
= v3d_shader_state_create
;
933 pctx
->delete_gs_state
= v3d_shader_state_delete
;
935 pctx
->create_fs_state
= v3d_shader_state_create
;
936 pctx
->delete_fs_state
= v3d_shader_state_delete
;
938 pctx
->bind_fs_state
= v3d_fp_state_bind
;
939 pctx
->bind_gs_state
= v3d_gp_state_bind
;
940 pctx
->bind_vs_state
= v3d_vp_state_bind
;
942 if (v3d
->screen
->has_csd
) {
943 pctx
->create_compute_state
= v3d_create_compute_state
;
944 pctx
->delete_compute_state
= v3d_shader_state_delete
;
945 pctx
->bind_compute_state
= v3d_compute_state_bind
;
948 v3d
->prog
.cache
[MESA_SHADER_VERTEX
] =
949 _mesa_hash_table_create(pctx
, vs_cache_hash
, vs_cache_compare
);
950 v3d
->prog
.cache
[MESA_SHADER_GEOMETRY
] =
951 _mesa_hash_table_create(pctx
, gs_cache_hash
, gs_cache_compare
);
952 v3d
->prog
.cache
[MESA_SHADER_FRAGMENT
] =
953 _mesa_hash_table_create(pctx
, fs_cache_hash
, fs_cache_compare
);
954 v3d
->prog
.cache
[MESA_SHADER_COMPUTE
] =
955 _mesa_hash_table_create(pctx
, cs_cache_hash
, cs_cache_compare
);
959 v3d_program_fini(struct pipe_context
*pctx
)
961 struct v3d_context
*v3d
= v3d_context(pctx
);
963 for (int i
= 0; i
< MESA_SHADER_STAGES
; i
++) {
964 struct hash_table
*cache
= v3d
->prog
.cache
[i
];
968 hash_table_foreach(cache
, entry
) {
969 struct v3d_compiled_shader
*shader
= entry
->data
;
970 v3d_free_compiled_shader(shader
);
971 _mesa_hash_table_remove(cache
, entry
);
975 v3d_bo_unreference(&v3d
->prog
.spill_bo
);