2 * Copyright © 2014-2017 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "util/format/u_format.h"
26 #include "util/u_math.h"
27 #include "util/u_memory.h"
28 #include "util/ralloc.h"
29 #include "util/hash_table.h"
30 #include "util/u_upload_mgr.h"
31 #include "tgsi/tgsi_dump.h"
32 #include "tgsi/tgsi_parse.h"
33 #include "compiler/nir/nir.h"
34 #include "compiler/nir/nir_builder.h"
35 #include "nir/tgsi_to_nir.h"
36 #include "compiler/v3d_compiler.h"
37 #include "v3d_context.h"
38 #include "broadcom/cle/v3d_packet_v33_pack.h"
40 static struct v3d_compiled_shader
*
41 v3d_get_compiled_shader(struct v3d_context
*v3d
,
42 struct v3d_key
*key
, size_t key_size
);
44 v3d_setup_shared_precompile_key(struct v3d_uncompiled_shader
*uncompiled
,
47 static gl_varying_slot
48 v3d_get_slot_for_driver_location(nir_shader
*s
, uint32_t driver_location
)
50 nir_foreach_shader_out_variable(var
, s
) {
51 if (var
->data
.driver_location
== driver_location
) {
52 return var
->data
.location
;
60 * Precomputes the TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC array for the shader.
62 * A shader can have 16 of these specs, and each one of them can write up to
63 * 16 dwords. Since we allow a total of 64 transform feedback output
64 * components (not 16 vectors), we have to group the writes of multiple
65 * varyings together in a single data spec.
68 v3d_set_transform_feedback_outputs(struct v3d_uncompiled_shader
*so
,
69 const struct pipe_stream_output_info
*stream_output
)
71 if (!stream_output
->num_outputs
)
74 struct v3d_varying_slot slots
[PIPE_MAX_SO_OUTPUTS
* 4];
77 for (int buffer
= 0; buffer
< PIPE_MAX_SO_BUFFERS
; buffer
++) {
78 uint32_t buffer_offset
= 0;
79 uint32_t vpm_start
= slot_count
;
81 for (int i
= 0; i
< stream_output
->num_outputs
; i
++) {
82 const struct pipe_stream_output
*output
=
83 &stream_output
->output
[i
];
85 if (output
->output_buffer
!= buffer
)
88 /* We assume that the SO outputs appear in increasing
89 * order in the buffer.
91 assert(output
->dst_offset
>= buffer_offset
);
93 /* Pad any undefined slots in the output */
94 for (int j
= buffer_offset
; j
< output
->dst_offset
; j
++) {
96 v3d_slot_from_slot_and_component(VARYING_SLOT_POS
, 0);
101 /* Set the coordinate shader up to output the
102 * components of this varying.
104 for (int j
= 0; j
< output
->num_components
; j
++) {
105 gl_varying_slot slot
=
106 v3d_get_slot_for_driver_location(so
->base
.ir
.nir
, output
->register_index
);
109 v3d_slot_from_slot_and_component(slot
,
110 output
->start_component
+ j
);
116 uint32_t vpm_size
= slot_count
- vpm_start
;
120 uint32_t vpm_start_offset
= vpm_start
+ 6;
123 uint32_t write_size
= MIN2(vpm_size
, 1 << 4);
125 struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked
= {
126 /* We need the offset from the coordinate shader's VPM
127 * output block, which has the [X, Y, Z, W, Xs, Ys]
128 * values at the start.
130 .first_shaded_vertex_value_to_output
= vpm_start_offset
,
131 .number_of_consecutive_vertex_values_to_output_as_32_bit_values
= write_size
,
132 .output_buffer_to_write_to
= buffer
,
136 assert(unpacked
.first_shaded_vertex_value_to_output
!= 8 ||
137 so
->num_tf_specs
!= 0);
139 assert(so
->num_tf_specs
!= ARRAY_SIZE(so
->tf_specs
));
140 V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL
,
141 (void *)&so
->tf_specs
[so
->num_tf_specs
],
144 /* If point size is being written by the shader, then
145 * all the VPM start offsets are shifted up by one.
146 * We won't know that until the variant is compiled,
149 unpacked
.first_shaded_vertex_value_to_output
++;
152 assert(unpacked
.first_shaded_vertex_value_to_output
!= 8 ||
153 so
->num_tf_specs
!= 0);
155 V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL
,
156 (void *)&so
->tf_specs_psiz
[so
->num_tf_specs
],
159 vpm_start_offset
+= write_size
;
160 vpm_size
-= write_size
;
162 so
->base
.stream_output
.stride
[buffer
] =
163 stream_output
->stride
[buffer
];
166 so
->num_tf_outputs
= slot_count
;
167 so
->tf_outputs
= ralloc_array(so
->base
.ir
.nir
, struct v3d_varying_slot
,
169 memcpy(so
->tf_outputs
, slots
, sizeof(*slots
) * slot_count
);
173 type_size(const struct glsl_type
*type
, bool bindless
)
175 return glsl_count_attribute_slots(type
, false);
179 precompile_all_outputs(nir_shader
*s
,
180 struct v3d_varying_slot
*outputs
,
181 uint8_t *num_outputs
)
183 nir_foreach_shader_out_variable(var
, s
) {
184 const int array_len
= MAX2(glsl_get_length(var
->type
), 1);
185 for (int j
= 0; j
< array_len
; j
++) {
186 const int slot
= var
->data
.location
+ j
;
187 const int num_components
=
188 glsl_get_components(var
->type
);
189 for (int i
= 0; i
< num_components
; i
++) {
190 const int swiz
= var
->data
.location_frac
+ i
;
191 outputs
[(*num_outputs
)++] =
192 v3d_slot_from_slot_and_component(slot
,
200 * Precompiles a shader variant at shader state creation time if
201 * V3D_DEBUG=precompile is set. Used for shader-db
202 * (https://gitlab.freedesktop.org/mesa/shader-db)
205 v3d_shader_precompile(struct v3d_context
*v3d
,
206 struct v3d_uncompiled_shader
*so
)
208 nir_shader
*s
= so
->base
.ir
.nir
;
210 if (s
->info
.stage
== MESA_SHADER_FRAGMENT
) {
211 struct v3d_fs_key key
= {
212 .base
.shader_state
= so
,
215 nir_foreach_shader_out_variable(var
, s
) {
216 if (var
->data
.location
== FRAG_RESULT_COLOR
) {
218 } else if (var
->data
.location
>= FRAG_RESULT_DATA0
) {
219 key
.cbufs
|= 1 << (var
->data
.location
-
224 key
.logicop_func
= PIPE_LOGICOP_COPY
;
226 v3d_setup_shared_precompile_key(so
, &key
.base
);
227 v3d_get_compiled_shader(v3d
, &key
.base
, sizeof(key
));
228 } else if (s
->info
.stage
== MESA_SHADER_GEOMETRY
) {
229 struct v3d_gs_key key
= {
230 .base
.shader_state
= so
,
231 .base
.is_last_geometry_stage
= true,
234 v3d_setup_shared_precompile_key(so
, &key
.base
);
236 precompile_all_outputs(s
,
238 &key
.num_used_outputs
);
240 v3d_get_compiled_shader(v3d
, &key
.base
, sizeof(key
));
242 /* Compile GS bin shader: only position (XXX: include TF) */
244 key
.num_used_outputs
= 0;
245 for (int i
= 0; i
< 4; i
++) {
246 key
.used_outputs
[key
.num_used_outputs
++] =
247 v3d_slot_from_slot_and_component(VARYING_SLOT_POS
,
250 v3d_get_compiled_shader(v3d
, &key
.base
, sizeof(key
));
252 assert(s
->info
.stage
== MESA_SHADER_VERTEX
);
253 struct v3d_vs_key key
= {
254 .base
.shader_state
= so
,
255 /* Emit fixed function outputs */
256 .base
.is_last_geometry_stage
= true,
259 v3d_setup_shared_precompile_key(so
, &key
.base
);
261 precompile_all_outputs(s
,
263 &key
.num_used_outputs
);
265 v3d_get_compiled_shader(v3d
, &key
.base
, sizeof(key
));
267 /* Compile VS bin shader: only position (XXX: include TF) */
269 key
.num_used_outputs
= 0;
270 for (int i
= 0; i
< 4; i
++) {
271 key
.used_outputs
[key
.num_used_outputs
++] =
272 v3d_slot_from_slot_and_component(VARYING_SLOT_POS
,
275 v3d_get_compiled_shader(v3d
, &key
.base
, sizeof(key
));
280 v3d_uncompiled_shader_create(struct pipe_context
*pctx
,
281 enum pipe_shader_ir type
, void *ir
)
283 struct v3d_context
*v3d
= v3d_context(pctx
);
284 struct v3d_uncompiled_shader
*so
= CALLOC_STRUCT(v3d_uncompiled_shader
);
288 so
->program_id
= v3d
->next_uncompiled_program_id
++;
292 if (type
== PIPE_SHADER_IR_NIR
) {
293 /* The backend takes ownership of the NIR shader on state
298 assert(type
== PIPE_SHADER_IR_TGSI
);
300 if (V3D_DEBUG
& V3D_DEBUG_TGSI
) {
301 fprintf(stderr
, "prog %d TGSI:\n",
304 fprintf(stderr
, "\n");
306 s
= tgsi_to_nir(ir
, pctx
->screen
, false);
309 if (s
->info
.stage
!= MESA_SHADER_VERTEX
&&
310 s
->info
.stage
!= MESA_SHADER_GEOMETRY
) {
311 NIR_PASS_V(s
, nir_lower_io
,
312 nir_var_shader_in
| nir_var_shader_out
,
313 type_size
, (nir_lower_io_options
)0);
316 NIR_PASS_V(s
, nir_lower_regs_to_ssa
);
317 NIR_PASS_V(s
, nir_normalize_cubemap_coords
);
319 NIR_PASS_V(s
, nir_lower_load_const_to_scalar
);
323 NIR_PASS_V(s
, nir_remove_dead_variables
, nir_var_function_temp
, NULL
);
325 /* Garbage collect dead instructions */
328 so
->base
.type
= PIPE_SHADER_IR_NIR
;
331 if (V3D_DEBUG
& (V3D_DEBUG_NIR
|
332 v3d_debug_flag_for_shader_stage(s
->info
.stage
))) {
333 fprintf(stderr
, "%s prog %d NIR:\n",
334 gl_shader_stage_name(s
->info
.stage
),
336 nir_print_shader(s
, stderr
);
337 fprintf(stderr
, "\n");
340 if (V3D_DEBUG
& V3D_DEBUG_PRECOMPILE
)
341 v3d_shader_precompile(v3d
, so
);
347 v3d_shader_debug_output(const char *message
, void *data
)
349 struct v3d_context
*v3d
= data
;
351 pipe_debug_message(&v3d
->debug
, SHADER_INFO
, "%s", message
);
355 v3d_shader_state_create(struct pipe_context
*pctx
,
356 const struct pipe_shader_state
*cso
)
358 struct v3d_uncompiled_shader
*so
=
359 v3d_uncompiled_shader_create(pctx
,
361 (cso
->type
== PIPE_SHADER_IR_TGSI
?
362 (void *)cso
->tokens
:
365 v3d_set_transform_feedback_outputs(so
, &cso
->stream_output
);
370 struct v3d_compiled_shader
*
371 v3d_get_compiled_shader(struct v3d_context
*v3d
,
375 struct v3d_uncompiled_shader
*shader_state
= key
->shader_state
;
376 nir_shader
*s
= shader_state
->base
.ir
.nir
;
378 struct hash_table
*ht
= v3d
->prog
.cache
[s
->info
.stage
];
379 struct hash_entry
*entry
= _mesa_hash_table_search(ht
, key
);
383 struct v3d_compiled_shader
*shader
=
384 rzalloc(NULL
, struct v3d_compiled_shader
);
386 int program_id
= shader_state
->program_id
;
388 p_atomic_inc_return(&shader_state
->compiled_variant_count
);
390 uint32_t shader_size
;
392 qpu_insts
= v3d_compile(v3d
->screen
->compiler
, key
,
393 &shader
->prog_data
.base
, s
,
394 v3d_shader_debug_output
,
396 program_id
, variant_id
, &shader_size
);
397 ralloc_steal(shader
, shader
->prog_data
.base
);
399 v3d_set_shader_uniform_dirty_flags(shader
);
402 u_upload_data(v3d
->state_uploader
, 0, shader_size
, 8,
403 qpu_insts
, &shader
->offset
, &shader
->resource
);
409 struct v3d_key
*dup_key
;
410 dup_key
= ralloc_size(shader
, key_size
);
411 memcpy(dup_key
, key
, key_size
);
412 _mesa_hash_table_insert(ht
, dup_key
, shader
);
415 if (shader
->prog_data
.base
->spill_size
>
416 v3d
->prog
.spill_size_per_thread
) {
417 /* The TIDX register we use for choosing the area to access
418 * for scratch space is: (core << 6) | (qpu << 2) | thread.
419 * Even at minimum threadcount in a particular shader, that
420 * means we still multiply by qpus by 4.
422 int total_spill_size
= (v3d
->screen
->devinfo
.qpu_count
* 4 *
423 shader
->prog_data
.base
->spill_size
);
425 v3d_bo_unreference(&v3d
->prog
.spill_bo
);
426 v3d
->prog
.spill_bo
= v3d_bo_alloc(v3d
->screen
,
427 total_spill_size
, "spill");
428 v3d
->prog
.spill_size_per_thread
=
429 shader
->prog_data
.base
->spill_size
;
436 v3d_free_compiled_shader(struct v3d_compiled_shader
*shader
)
438 pipe_resource_reference(&shader
->resource
, NULL
);
443 v3d_setup_shared_key(struct v3d_context
*v3d
, struct v3d_key
*key
,
444 struct v3d_texture_stateobj
*texstate
)
446 const struct v3d_device_info
*devinfo
= &v3d
->screen
->devinfo
;
448 for (int i
= 0; i
< texstate
->num_textures
; i
++) {
449 struct pipe_sampler_view
*sampler
= texstate
->textures
[i
];
450 struct v3d_sampler_view
*v3d_sampler
= v3d_sampler_view(sampler
);
451 struct pipe_sampler_state
*sampler_state
=
452 texstate
->samplers
[i
];
457 key
->tex
[i
].return_size
=
458 v3d_get_tex_return_size(devinfo
,
460 sampler_state
->compare_mode
);
462 /* For 16-bit, we set up the sampler to always return 2
463 * channels (meaning no recompiles for most statechanges),
464 * while for 32 we actually scale the returns with channels.
466 if (key
->tex
[i
].return_size
== 16) {
467 key
->tex
[i
].return_channels
= 2;
468 } else if (devinfo
->ver
> 40) {
469 key
->tex
[i
].return_channels
= 4;
471 key
->tex
[i
].return_channels
=
472 v3d_get_tex_return_channels(devinfo
,
476 if (key
->tex
[i
].return_size
== 32 && devinfo
->ver
< 40) {
477 memcpy(key
->tex
[i
].swizzle
,
478 v3d_sampler
->swizzle
,
479 sizeof(v3d_sampler
->swizzle
));
481 /* For 16-bit returns, we let the sampler state handle
484 key
->tex
[i
].swizzle
[0] = PIPE_SWIZZLE_X
;
485 key
->tex
[i
].swizzle
[1] = PIPE_SWIZZLE_Y
;
486 key
->tex
[i
].swizzle
[2] = PIPE_SWIZZLE_Z
;
487 key
->tex
[i
].swizzle
[3] = PIPE_SWIZZLE_W
;
491 key
->tex
[i
].clamp_s
=
492 sampler_state
->wrap_s
== PIPE_TEX_WRAP_CLAMP
;
493 key
->tex
[i
].clamp_t
=
494 sampler_state
->wrap_t
== PIPE_TEX_WRAP_CLAMP
;
495 key
->tex
[i
].clamp_r
=
496 sampler_state
->wrap_r
== PIPE_TEX_WRAP_CLAMP
;
502 v3d_setup_shared_precompile_key(struct v3d_uncompiled_shader
*uncompiled
,
505 nir_shader
*s
= uncompiled
->base
.ir
.nir
;
507 for (int i
= 0; i
< s
->info
.num_textures
; i
++) {
508 key
->tex
[i
].return_size
= 16;
509 key
->tex
[i
].return_channels
= 2;
511 key
->tex
[i
].swizzle
[0] = PIPE_SWIZZLE_X
;
512 key
->tex
[i
].swizzle
[1] = PIPE_SWIZZLE_Y
;
513 key
->tex
[i
].swizzle
[2] = PIPE_SWIZZLE_Z
;
514 key
->tex
[i
].swizzle
[3] = PIPE_SWIZZLE_W
;
519 v3d_update_compiled_fs(struct v3d_context
*v3d
, uint8_t prim_mode
)
521 struct v3d_job
*job
= v3d
->job
;
522 struct v3d_fs_key local_key
;
523 struct v3d_fs_key
*key
= &local_key
;
524 nir_shader
*s
= v3d
->prog
.bind_fs
->base
.ir
.nir
;
526 if (!(v3d
->dirty
& (VC5_DIRTY_PRIM_MODE
|
528 VC5_DIRTY_FRAMEBUFFER
|
530 VC5_DIRTY_RASTERIZER
|
531 VC5_DIRTY_SAMPLE_STATE
|
533 VC5_DIRTY_UNCOMPILED_FS
))) {
537 memset(key
, 0, sizeof(*key
));
538 v3d_setup_shared_key(v3d
, &key
->base
, &v3d
->tex
[PIPE_SHADER_FRAGMENT
]);
539 key
->base
.shader_state
= v3d
->prog
.bind_fs
;
540 key
->base
.ucp_enables
= v3d
->rasterizer
->base
.clip_plane_enable
;
541 key
->is_points
= (prim_mode
== PIPE_PRIM_POINTS
);
542 key
->is_lines
= (prim_mode
>= PIPE_PRIM_LINES
&&
543 prim_mode
<= PIPE_PRIM_LINE_STRIP
);
544 key
->line_smoothing
= (key
->is_lines
&&
545 v3d_line_smoothing_enabled(v3d
));
546 key
->clamp_color
= v3d
->rasterizer
->base
.clamp_fragment_color
;
547 if (v3d
->blend
->base
.logicop_enable
) {
548 key
->logicop_func
= v3d
->blend
->base
.logicop_func
;
550 key
->logicop_func
= PIPE_LOGICOP_COPY
;
553 key
->msaa
= v3d
->rasterizer
->base
.multisample
;
554 key
->sample_coverage
= (v3d
->rasterizer
->base
.multisample
&&
555 v3d
->sample_mask
!= (1 << V3D_MAX_SAMPLES
) - 1);
556 key
->sample_alpha_to_coverage
= v3d
->blend
->base
.alpha_to_coverage
;
557 key
->sample_alpha_to_one
= v3d
->blend
->base
.alpha_to_one
;
560 key
->depth_enabled
= (v3d
->zsa
->base
.depth
.enabled
||
561 v3d
->zsa
->base
.stencil
[0].enabled
);
562 if (v3d
->zsa
->base
.alpha
.enabled
) {
563 key
->alpha_test
= true;
564 key
->alpha_test_func
= v3d
->zsa
->base
.alpha
.func
;
567 key
->swap_color_rb
= v3d
->swap_color_rb
;
569 for (int i
= 0; i
< v3d
->framebuffer
.nr_cbufs
; i
++) {
570 struct pipe_surface
*cbuf
= v3d
->framebuffer
.cbufs
[i
];
574 /* gl_FragColor's propagation to however many bound color
575 * buffers there are means that the shader compile needs to
576 * know what buffers are present.
578 key
->cbufs
|= 1 << i
;
580 /* If logic operations are enabled then we might emit color
581 * reads and we need to know the color buffer format and
584 if (key
->logicop_func
!= PIPE_LOGICOP_COPY
) {
585 key
->color_fmt
[i
].format
= cbuf
->format
;
586 key
->color_fmt
[i
].swizzle
=
587 v3d_get_format_swizzle(&v3d
->screen
->devinfo
,
591 const struct util_format_description
*desc
=
592 util_format_description(cbuf
->format
);
594 if (desc
->channel
[0].type
== UTIL_FORMAT_TYPE_FLOAT
&&
595 desc
->channel
[0].size
== 32) {
596 key
->f32_color_rb
|= 1 << i
;
599 if (s
->info
.fs
.untyped_color_outputs
) {
600 if (util_format_is_pure_uint(cbuf
->format
))
601 key
->uint_color_rb
|= 1 << i
;
602 else if (util_format_is_pure_sint(cbuf
->format
))
603 key
->int_color_rb
|= 1 << i
;
607 if (key
->is_points
) {
608 key
->point_sprite_mask
=
609 v3d
->rasterizer
->base
.sprite_coord_enable
;
610 key
->point_coord_upper_left
=
611 (v3d
->rasterizer
->base
.sprite_coord_mode
==
612 PIPE_SPRITE_COORD_UPPER_LEFT
);
615 key
->light_twoside
= v3d
->rasterizer
->base
.light_twoside
;
616 key
->shade_model_flat
= v3d
->rasterizer
->base
.flatshade
;
618 struct v3d_compiled_shader
*old_fs
= v3d
->prog
.fs
;
619 v3d
->prog
.fs
= v3d_get_compiled_shader(v3d
, &key
->base
, sizeof(*key
));
620 if (v3d
->prog
.fs
== old_fs
)
623 v3d
->dirty
|= VC5_DIRTY_COMPILED_FS
;
626 if (v3d
->prog
.fs
->prog_data
.fs
->flat_shade_flags
!=
627 old_fs
->prog_data
.fs
->flat_shade_flags
) {
628 v3d
->dirty
|= VC5_DIRTY_FLAT_SHADE_FLAGS
;
631 if (v3d
->prog
.fs
->prog_data
.fs
->noperspective_flags
!=
632 old_fs
->prog_data
.fs
->noperspective_flags
) {
633 v3d
->dirty
|= VC5_DIRTY_NOPERSPECTIVE_FLAGS
;
636 if (v3d
->prog
.fs
->prog_data
.fs
->centroid_flags
!=
637 old_fs
->prog_data
.fs
->centroid_flags
) {
638 v3d
->dirty
|= VC5_DIRTY_CENTROID_FLAGS
;
642 if (old_fs
&& memcmp(v3d
->prog
.fs
->prog_data
.fs
->input_slots
,
643 old_fs
->prog_data
.fs
->input_slots
,
644 sizeof(v3d
->prog
.fs
->prog_data
.fs
->input_slots
))) {
645 v3d
->dirty
|= VC5_DIRTY_FS_INPUTS
;
650 v3d_update_compiled_gs(struct v3d_context
*v3d
, uint8_t prim_mode
)
652 struct v3d_gs_key local_key
;
653 struct v3d_gs_key
*key
= &local_key
;
655 if (!(v3d
->dirty
& (VC5_DIRTY_GEOMTEX
|
656 VC5_DIRTY_RASTERIZER
|
657 VC5_DIRTY_UNCOMPILED_GS
|
658 VC5_DIRTY_PRIM_MODE
|
659 VC5_DIRTY_FS_INPUTS
))) {
663 if (!v3d
->prog
.bind_gs
) {
665 v3d
->prog
.gs_bin
= NULL
;
669 memset(key
, 0, sizeof(*key
));
670 v3d_setup_shared_key(v3d
, &key
->base
, &v3d
->tex
[PIPE_SHADER_GEOMETRY
]);
671 key
->base
.shader_state
= v3d
->prog
.bind_gs
;
672 key
->base
.ucp_enables
= v3d
->rasterizer
->base
.clip_plane_enable
;
673 key
->base
.is_last_geometry_stage
= true;
674 key
->num_used_outputs
= v3d
->prog
.fs
->prog_data
.fs
->num_inputs
;
675 STATIC_ASSERT(sizeof(key
->used_outputs
) ==
676 sizeof(v3d
->prog
.fs
->prog_data
.fs
->input_slots
));
677 memcpy(key
->used_outputs
, v3d
->prog
.fs
->prog_data
.fs
->input_slots
,
678 sizeof(key
->used_outputs
));
680 key
->per_vertex_point_size
=
681 (prim_mode
== PIPE_PRIM_POINTS
&&
682 v3d
->rasterizer
->base
.point_size_per_vertex
);
684 struct v3d_compiled_shader
*gs
=
685 v3d_get_compiled_shader(v3d
, &key
->base
, sizeof(*key
));
686 if (gs
!= v3d
->prog
.gs
) {
688 v3d
->dirty
|= VC5_DIRTY_COMPILED_GS
;
691 key
->is_coord
= true;
693 /* The last bin-mode shader in the geometry pipeline only outputs
694 * varyings used by transform feedback.
696 struct v3d_uncompiled_shader
*shader_state
= key
->base
.shader_state
;
697 memcpy(key
->used_outputs
, shader_state
->tf_outputs
,
698 sizeof(*key
->used_outputs
) * shader_state
->num_tf_outputs
);
699 if (shader_state
->num_tf_outputs
< key
->num_used_outputs
) {
700 uint32_t size
= sizeof(*key
->used_outputs
) *
701 (key
->num_used_outputs
-
702 shader_state
->num_tf_outputs
);
703 memset(&key
->used_outputs
[shader_state
->num_tf_outputs
],
706 key
->num_used_outputs
= shader_state
->num_tf_outputs
;
708 struct v3d_compiled_shader
*old_gs
= v3d
->prog
.gs
;
709 struct v3d_compiled_shader
*gs_bin
=
710 v3d_get_compiled_shader(v3d
, &key
->base
, sizeof(*key
));
711 if (gs_bin
!= old_gs
) {
712 v3d
->prog
.gs_bin
= gs_bin
;
713 v3d
->dirty
|= VC5_DIRTY_COMPILED_GS_BIN
;
716 if (old_gs
&& memcmp(v3d
->prog
.gs
->prog_data
.gs
->input_slots
,
717 old_gs
->prog_data
.gs
->input_slots
,
718 sizeof(v3d
->prog
.gs
->prog_data
.gs
->input_slots
))) {
719 v3d
->dirty
|= VC5_DIRTY_GS_INPUTS
;
724 v3d_update_compiled_vs(struct v3d_context
*v3d
, uint8_t prim_mode
)
726 struct v3d_vs_key local_key
;
727 struct v3d_vs_key
*key
= &local_key
;
729 if (!(v3d
->dirty
& (VC5_DIRTY_VERTTEX
|
731 VC5_DIRTY_UNCOMPILED_VS
|
732 (v3d
->prog
.bind_gs
? 0 : VC5_DIRTY_RASTERIZER
) |
733 (v3d
->prog
.bind_gs
? 0 : VC5_DIRTY_PRIM_MODE
) |
734 (v3d
->prog
.bind_gs
? VC5_DIRTY_GS_INPUTS
:
735 VC5_DIRTY_FS_INPUTS
)))) {
739 memset(key
, 0, sizeof(*key
));
740 v3d_setup_shared_key(v3d
, &key
->base
, &v3d
->tex
[PIPE_SHADER_VERTEX
]);
741 key
->base
.shader_state
= v3d
->prog
.bind_vs
;
742 key
->base
.ucp_enables
= v3d
->rasterizer
->base
.clip_plane_enable
;
743 key
->base
.is_last_geometry_stage
= !v3d
->prog
.bind_gs
;
745 if (!v3d
->prog
.bind_gs
) {
746 key
->num_used_outputs
= v3d
->prog
.fs
->prog_data
.fs
->num_inputs
;
747 STATIC_ASSERT(sizeof(key
->used_outputs
) ==
748 sizeof(v3d
->prog
.fs
->prog_data
.fs
->input_slots
));
749 memcpy(key
->used_outputs
, v3d
->prog
.fs
->prog_data
.fs
->input_slots
,
750 sizeof(key
->used_outputs
));
752 key
->num_used_outputs
= v3d
->prog
.gs
->prog_data
.gs
->num_inputs
;
753 STATIC_ASSERT(sizeof(key
->used_outputs
) ==
754 sizeof(v3d
->prog
.gs
->prog_data
.gs
->input_slots
));
755 memcpy(key
->used_outputs
, v3d
->prog
.gs
->prog_data
.gs
->input_slots
,
756 sizeof(key
->used_outputs
));
759 key
->clamp_color
= v3d
->rasterizer
->base
.clamp_vertex_color
;
761 key
->per_vertex_point_size
=
762 (prim_mode
== PIPE_PRIM_POINTS
&&
763 v3d
->rasterizer
->base
.point_size_per_vertex
);
765 struct v3d_compiled_shader
*vs
=
766 v3d_get_compiled_shader(v3d
, &key
->base
, sizeof(*key
));
767 if (vs
!= v3d
->prog
.vs
) {
769 v3d
->dirty
|= VC5_DIRTY_COMPILED_VS
;
772 key
->is_coord
= true;
774 /* Coord shaders only output varyings used by transform feedback,
775 * unless they are linked to other shaders in the geometry side
776 * of the pipeline, since in that case any of the output varyings
777 * could be required in later geometry stages to compute
778 * gl_Position or TF outputs.
780 if (!v3d
->prog
.bind_gs
) {
781 struct v3d_uncompiled_shader
*shader_state
=
782 key
->base
.shader_state
;
783 memcpy(key
->used_outputs
, shader_state
->tf_outputs
,
784 sizeof(*key
->used_outputs
) *
785 shader_state
->num_tf_outputs
);
786 if (shader_state
->num_tf_outputs
< key
->num_used_outputs
) {
787 uint32_t tail_bytes
=
788 sizeof(*key
->used_outputs
) *
789 (key
->num_used_outputs
-
790 shader_state
->num_tf_outputs
);
791 memset(&key
->used_outputs
[shader_state
->num_tf_outputs
],
794 key
->num_used_outputs
= shader_state
->num_tf_outputs
;
797 struct v3d_compiled_shader
*cs
=
798 v3d_get_compiled_shader(v3d
, &key
->base
, sizeof(*key
));
799 if (cs
!= v3d
->prog
.cs
) {
801 v3d
->dirty
|= VC5_DIRTY_COMPILED_CS
;
806 v3d_update_compiled_shaders(struct v3d_context
*v3d
, uint8_t prim_mode
)
808 v3d_update_compiled_fs(v3d
, prim_mode
);
809 v3d_update_compiled_gs(v3d
, prim_mode
);
810 v3d_update_compiled_vs(v3d
, prim_mode
);
814 v3d_update_compiled_cs(struct v3d_context
*v3d
)
816 struct v3d_key local_key
;
817 struct v3d_key
*key
= &local_key
;
819 if (!(v3d
->dirty
& (VC5_DIRTY_UNCOMPILED_CS
|
820 VC5_DIRTY_COMPTEX
))) {
824 memset(key
, 0, sizeof(*key
));
825 v3d_setup_shared_key(v3d
, key
, &v3d
->tex
[PIPE_SHADER_COMPUTE
]);
826 key
->shader_state
= v3d
->prog
.bind_compute
;
828 struct v3d_compiled_shader
*cs
=
829 v3d_get_compiled_shader(v3d
, key
, sizeof(*key
));
830 if (cs
!= v3d
->prog
.compute
) {
831 v3d
->prog
.compute
= cs
;
832 v3d
->dirty
|= VC5_DIRTY_COMPILED_CS
; /* XXX */
837 fs_cache_hash(const void *key
)
839 return _mesa_hash_data(key
, sizeof(struct v3d_fs_key
));
843 gs_cache_hash(const void *key
)
845 return _mesa_hash_data(key
, sizeof(struct v3d_gs_key
));
849 vs_cache_hash(const void *key
)
851 return _mesa_hash_data(key
, sizeof(struct v3d_vs_key
));
855 cs_cache_hash(const void *key
)
857 return _mesa_hash_data(key
, sizeof(struct v3d_key
));
861 fs_cache_compare(const void *key1
, const void *key2
)
863 return memcmp(key1
, key2
, sizeof(struct v3d_fs_key
)) == 0;
867 gs_cache_compare(const void *key1
, const void *key2
)
869 return memcmp(key1
, key2
, sizeof(struct v3d_gs_key
)) == 0;
873 vs_cache_compare(const void *key1
, const void *key2
)
875 return memcmp(key1
, key2
, sizeof(struct v3d_vs_key
)) == 0;
879 cs_cache_compare(const void *key1
, const void *key2
)
881 return memcmp(key1
, key2
, sizeof(struct v3d_key
)) == 0;
885 v3d_shader_state_delete(struct pipe_context
*pctx
, void *hwcso
)
887 struct v3d_context
*v3d
= v3d_context(pctx
);
888 struct v3d_uncompiled_shader
*so
= hwcso
;
889 nir_shader
*s
= so
->base
.ir
.nir
;
891 hash_table_foreach(v3d
->prog
.cache
[s
->info
.stage
], entry
) {
892 const struct v3d_key
*key
= entry
->key
;
893 struct v3d_compiled_shader
*shader
= entry
->data
;
895 if (key
->shader_state
!= so
)
898 if (v3d
->prog
.fs
== shader
)
900 if (v3d
->prog
.vs
== shader
)
902 if (v3d
->prog
.cs
== shader
)
904 if (v3d
->prog
.compute
== shader
)
905 v3d
->prog
.compute
= NULL
;
907 _mesa_hash_table_remove(v3d
->prog
.cache
[s
->info
.stage
], entry
);
908 v3d_free_compiled_shader(shader
);
911 ralloc_free(so
->base
.ir
.nir
);
916 v3d_fp_state_bind(struct pipe_context
*pctx
, void *hwcso
)
918 struct v3d_context
*v3d
= v3d_context(pctx
);
919 v3d
->prog
.bind_fs
= hwcso
;
920 v3d
->dirty
|= VC5_DIRTY_UNCOMPILED_FS
;
924 v3d_gp_state_bind(struct pipe_context
*pctx
, void *hwcso
)
926 struct v3d_context
*v3d
= v3d_context(pctx
);
927 v3d
->prog
.bind_gs
= hwcso
;
928 v3d
->dirty
|= VC5_DIRTY_UNCOMPILED_GS
;
932 v3d_vp_state_bind(struct pipe_context
*pctx
, void *hwcso
)
934 struct v3d_context
*v3d
= v3d_context(pctx
);
935 v3d
->prog
.bind_vs
= hwcso
;
936 v3d
->dirty
|= VC5_DIRTY_UNCOMPILED_VS
;
940 v3d_compute_state_bind(struct pipe_context
*pctx
, void *state
)
942 struct v3d_context
*v3d
= v3d_context(pctx
);
944 v3d
->prog
.bind_compute
= state
;
945 v3d
->dirty
|= VC5_DIRTY_UNCOMPILED_CS
;
949 v3d_create_compute_state(struct pipe_context
*pctx
,
950 const struct pipe_compute_state
*cso
)
952 return v3d_uncompiled_shader_create(pctx
, cso
->ir_type
,
957 v3d_program_init(struct pipe_context
*pctx
)
959 struct v3d_context
*v3d
= v3d_context(pctx
);
961 pctx
->create_vs_state
= v3d_shader_state_create
;
962 pctx
->delete_vs_state
= v3d_shader_state_delete
;
964 pctx
->create_gs_state
= v3d_shader_state_create
;
965 pctx
->delete_gs_state
= v3d_shader_state_delete
;
967 pctx
->create_fs_state
= v3d_shader_state_create
;
968 pctx
->delete_fs_state
= v3d_shader_state_delete
;
970 pctx
->bind_fs_state
= v3d_fp_state_bind
;
971 pctx
->bind_gs_state
= v3d_gp_state_bind
;
972 pctx
->bind_vs_state
= v3d_vp_state_bind
;
974 if (v3d
->screen
->has_csd
) {
975 pctx
->create_compute_state
= v3d_create_compute_state
;
976 pctx
->delete_compute_state
= v3d_shader_state_delete
;
977 pctx
->bind_compute_state
= v3d_compute_state_bind
;
980 v3d
->prog
.cache
[MESA_SHADER_VERTEX
] =
981 _mesa_hash_table_create(pctx
, vs_cache_hash
, vs_cache_compare
);
982 v3d
->prog
.cache
[MESA_SHADER_GEOMETRY
] =
983 _mesa_hash_table_create(pctx
, gs_cache_hash
, gs_cache_compare
);
984 v3d
->prog
.cache
[MESA_SHADER_FRAGMENT
] =
985 _mesa_hash_table_create(pctx
, fs_cache_hash
, fs_cache_compare
);
986 v3d
->prog
.cache
[MESA_SHADER_COMPUTE
] =
987 _mesa_hash_table_create(pctx
, cs_cache_hash
, cs_cache_compare
);
991 v3d_program_fini(struct pipe_context
*pctx
)
993 struct v3d_context
*v3d
= v3d_context(pctx
);
995 for (int i
= 0; i
< MESA_SHADER_STAGES
; i
++) {
996 struct hash_table
*cache
= v3d
->prog
.cache
[i
];
1000 hash_table_foreach(cache
, entry
) {
1001 struct v3d_compiled_shader
*shader
= entry
->data
;
1002 v3d_free_compiled_shader(shader
);
1003 _mesa_hash_table_remove(cache
, entry
);
1007 v3d_bo_unreference(&v3d
->prog
.spill_bo
);