2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "genhw/genhw.h" /* for SBE setup */
29 #include "core/ilo_builder.h"
30 #include "core/ilo_state_3d.h"
31 #include "core/intel_winsys.h"
32 #include "shader/ilo_shader_internal.h"
33 #include "tgsi/tgsi_parse.h"
35 #include "ilo_state.h"
36 #include "ilo_shader.h"
38 struct ilo_shader_cache
{
39 struct list_head shaders
;
40 struct list_head changed
;
44 * Create a shader cache. A shader cache can manage shaders and upload them
47 struct ilo_shader_cache
*
48 ilo_shader_cache_create(void)
50 struct ilo_shader_cache
*shc
;
52 shc
= CALLOC_STRUCT(ilo_shader_cache
);
56 list_inithead(&shc
->shaders
);
57 list_inithead(&shc
->changed
);
63 * Destroy a shader cache.
66 ilo_shader_cache_destroy(struct ilo_shader_cache
*shc
)
72 * Add a shader to the cache.
75 ilo_shader_cache_add(struct ilo_shader_cache
*shc
,
76 struct ilo_shader_state
*shader
)
78 struct ilo_shader
*sh
;
81 LIST_FOR_EACH_ENTRY(sh
, &shader
->variants
, list
)
84 list_add(&shader
->list
, &shc
->changed
);
88 * Remove a shader from the cache.
91 ilo_shader_cache_remove(struct ilo_shader_cache
*shc
,
92 struct ilo_shader_state
*shader
)
94 list_del(&shader
->list
);
99 * Notify the cache that a managed shader has changed.
102 ilo_shader_cache_notify_change(struct ilo_shader_cache
*shc
,
103 struct ilo_shader_state
*shader
)
105 if (shader
->cache
== shc
) {
106 list_del(&shader
->list
);
107 list_add(&shader
->list
, &shc
->changed
);
112 * Upload managed shaders to the bo. Only shaders that are changed or added
113 * after the last upload are uploaded.
116 ilo_shader_cache_upload(struct ilo_shader_cache
*shc
,
117 struct ilo_builder
*builder
)
119 struct ilo_shader_state
*shader
, *next
;
121 LIST_FOR_EACH_ENTRY_SAFE(shader
, next
, &shc
->changed
, list
) {
122 struct ilo_shader
*sh
;
124 LIST_FOR_EACH_ENTRY(sh
, &shader
->variants
, list
) {
128 sh
->cache_offset
= ilo_builder_instruction_write(builder
,
129 sh
->kernel_size
, sh
->kernel
);
134 list_del(&shader
->list
);
135 list_add(&shader
->list
, &shc
->shaders
);
140 * Invalidate all shaders so that they get uploaded in next
141 * ilo_shader_cache_upload().
144 ilo_shader_cache_invalidate(struct ilo_shader_cache
*shc
)
146 struct ilo_shader_state
*shader
, *next
;
148 LIST_FOR_EACH_ENTRY_SAFE(shader
, next
, &shc
->shaders
, list
) {
149 list_del(&shader
->list
);
150 list_add(&shader
->list
, &shc
->changed
);
153 LIST_FOR_EACH_ENTRY(shader
, &shc
->changed
, list
) {
154 struct ilo_shader
*sh
;
156 LIST_FOR_EACH_ENTRY(sh
, &shader
->variants
, list
)
157 sh
->uploaded
= false;
162 * Initialize a shader variant.
165 ilo_shader_variant_init(struct ilo_shader_variant
*variant
,
166 const struct ilo_shader_info
*info
,
167 const struct ilo_state_vector
*vec
)
171 memset(variant
, 0, sizeof(*variant
));
173 switch (info
->type
) {
174 case PIPE_SHADER_VERTEX
:
175 variant
->u
.vs
.rasterizer_discard
=
176 vec
->rasterizer
->state
.rasterizer_discard
;
177 variant
->u
.vs
.num_ucps
=
178 util_last_bit(vec
->rasterizer
->state
.clip_plane_enable
);
180 case PIPE_SHADER_GEOMETRY
:
181 variant
->u
.gs
.rasterizer_discard
=
182 vec
->rasterizer
->state
.rasterizer_discard
;
183 variant
->u
.gs
.num_inputs
= vec
->vs
->shader
->out
.count
;
184 for (i
= 0; i
< vec
->vs
->shader
->out
.count
; i
++) {
185 variant
->u
.gs
.semantic_names
[i
] =
186 vec
->vs
->shader
->out
.semantic_names
[i
];
187 variant
->u
.gs
.semantic_indices
[i
] =
188 vec
->vs
->shader
->out
.semantic_indices
[i
];
191 case PIPE_SHADER_FRAGMENT
:
192 variant
->u
.fs
.flatshade
=
193 (info
->has_color_interp
&& vec
->rasterizer
->state
.flatshade
);
194 variant
->u
.fs
.fb_height
= (info
->has_pos
) ?
195 vec
->fb
.state
.height
: 1;
196 variant
->u
.fs
.num_cbufs
= vec
->fb
.state
.nr_cbufs
;
199 assert(!"unknown shader type");
203 /* use PCB unless constant buffer 0 is not in user buffer */
204 if ((vec
->cbuf
[info
->type
].enabled_mask
& 0x1) &&
205 !vec
->cbuf
[info
->type
].cso
[0].user_buffer
)
206 variant
->use_pcb
= false;
208 variant
->use_pcb
= true;
210 num_views
= vec
->view
[info
->type
].count
;
211 assert(info
->num_samplers
<= num_views
);
213 variant
->num_sampler_views
= info
->num_samplers
;
214 for (i
= 0; i
< info
->num_samplers
; i
++) {
215 const struct pipe_sampler_view
*view
= vec
->view
[info
->type
].states
[i
];
216 const struct ilo_sampler_cso
*sampler
= vec
->sampler
[info
->type
].cso
[i
];
219 variant
->sampler_view_swizzles
[i
].r
= view
->swizzle_r
;
220 variant
->sampler_view_swizzles
[i
].g
= view
->swizzle_g
;
221 variant
->sampler_view_swizzles
[i
].b
= view
->swizzle_b
;
222 variant
->sampler_view_swizzles
[i
].a
= view
->swizzle_a
;
224 else if (info
->shadow_samplers
& (1 << i
)) {
225 variant
->sampler_view_swizzles
[i
].r
= PIPE_SWIZZLE_RED
;
226 variant
->sampler_view_swizzles
[i
].g
= PIPE_SWIZZLE_RED
;
227 variant
->sampler_view_swizzles
[i
].b
= PIPE_SWIZZLE_RED
;
228 variant
->sampler_view_swizzles
[i
].a
= PIPE_SWIZZLE_ONE
;
231 variant
->sampler_view_swizzles
[i
].r
= PIPE_SWIZZLE_RED
;
232 variant
->sampler_view_swizzles
[i
].g
= PIPE_SWIZZLE_GREEN
;
233 variant
->sampler_view_swizzles
[i
].b
= PIPE_SWIZZLE_BLUE
;
234 variant
->sampler_view_swizzles
[i
].a
= PIPE_SWIZZLE_ALPHA
;
238 * When non-nearest filter and PIPE_TEX_WRAP_CLAMP wrap mode is used,
239 * the HW wrap mode is set to GEN6_TEXCOORDMODE_CLAMP_BORDER, and we
240 * need to manually saturate the texture coordinates.
243 variant
->saturate_tex_coords
[0] |= sampler
->saturate_s
<< i
;
244 variant
->saturate_tex_coords
[1] |= sampler
->saturate_t
<< i
;
245 variant
->saturate_tex_coords
[2] |= sampler
->saturate_r
<< i
;
251 * Guess the shader variant, knowing that the context may still change.
254 ilo_shader_variant_guess(struct ilo_shader_variant
*variant
,
255 const struct ilo_shader_info
*info
,
256 const struct ilo_state_vector
*vec
)
260 memset(variant
, 0, sizeof(*variant
));
262 switch (info
->type
) {
263 case PIPE_SHADER_VERTEX
:
265 case PIPE_SHADER_GEOMETRY
:
267 case PIPE_SHADER_FRAGMENT
:
268 variant
->u
.fs
.flatshade
= false;
269 variant
->u
.fs
.fb_height
= (info
->has_pos
) ?
270 vec
->fb
.state
.height
: 1;
271 variant
->u
.fs
.num_cbufs
= 1;
274 assert(!"unknown shader type");
278 variant
->use_pcb
= true;
280 variant
->num_sampler_views
= info
->num_samplers
;
281 for (i
= 0; i
< info
->num_samplers
; i
++) {
282 if (info
->shadow_samplers
& (1 << i
)) {
283 variant
->sampler_view_swizzles
[i
].r
= PIPE_SWIZZLE_RED
;
284 variant
->sampler_view_swizzles
[i
].g
= PIPE_SWIZZLE_RED
;
285 variant
->sampler_view_swizzles
[i
].b
= PIPE_SWIZZLE_RED
;
286 variant
->sampler_view_swizzles
[i
].a
= PIPE_SWIZZLE_ONE
;
289 variant
->sampler_view_swizzles
[i
].r
= PIPE_SWIZZLE_RED
;
290 variant
->sampler_view_swizzles
[i
].g
= PIPE_SWIZZLE_GREEN
;
291 variant
->sampler_view_swizzles
[i
].b
= PIPE_SWIZZLE_BLUE
;
292 variant
->sampler_view_swizzles
[i
].a
= PIPE_SWIZZLE_ALPHA
;
299 * Parse a TGSI instruction for the shader info.
302 ilo_shader_info_parse_inst(struct ilo_shader_info
*info
,
303 const struct tgsi_full_instruction
*inst
)
307 /* look for edgeflag passthrough */
308 if (info
->edgeflag_out
>= 0 &&
309 inst
->Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
310 inst
->Dst
[0].Register
.File
== TGSI_FILE_OUTPUT
&&
311 inst
->Dst
[0].Register
.Index
== info
->edgeflag_out
) {
313 assert(inst
->Src
[0].Register
.File
== TGSI_FILE_INPUT
);
314 info
->edgeflag_in
= inst
->Src
[0].Register
.Index
;
317 if (inst
->Instruction
.Texture
) {
320 switch (inst
->Texture
.Texture
) {
321 case TGSI_TEXTURE_SHADOW1D
:
322 case TGSI_TEXTURE_SHADOW2D
:
323 case TGSI_TEXTURE_SHADOWRECT
:
324 case TGSI_TEXTURE_SHADOW1D_ARRAY
:
325 case TGSI_TEXTURE_SHADOW2D_ARRAY
:
326 case TGSI_TEXTURE_SHADOWCUBE
:
327 case TGSI_TEXTURE_SHADOWCUBE_ARRAY
:
335 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
336 const struct tgsi_full_src_register
*src
= &inst
->Src
[i
];
338 if (src
->Register
.File
== TGSI_FILE_SAMPLER
) {
339 const int idx
= src
->Register
.Index
;
341 if (idx
>= info
->num_samplers
)
342 info
->num_samplers
= idx
+ 1;
345 info
->shadow_samplers
|= 1 << idx
;
352 * Parse a TGSI property for the shader info.
355 ilo_shader_info_parse_prop(struct ilo_shader_info
*info
,
356 const struct tgsi_full_property
*prop
)
358 switch (prop
->Property
.PropertyName
) {
359 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS
:
360 info
->fs_color0_writes_all_cbufs
= prop
->u
[0].Data
;
368 * Parse a TGSI declaration for the shader info.
371 ilo_shader_info_parse_decl(struct ilo_shader_info
*info
,
372 const struct tgsi_full_declaration
*decl
)
374 switch (decl
->Declaration
.File
) {
375 case TGSI_FILE_INPUT
:
376 if (decl
->Declaration
.Interpolate
&&
377 decl
->Interp
.Interpolate
== TGSI_INTERPOLATE_COLOR
)
378 info
->has_color_interp
= true;
379 if (decl
->Declaration
.Semantic
&&
380 decl
->Semantic
.Name
== TGSI_SEMANTIC_POSITION
)
381 info
->has_pos
= true;
383 case TGSI_FILE_OUTPUT
:
384 if (decl
->Declaration
.Semantic
&&
385 decl
->Semantic
.Name
== TGSI_SEMANTIC_EDGEFLAG
)
386 info
->edgeflag_out
= decl
->Range
.First
;
388 case TGSI_FILE_CONSTANT
:
390 const int idx
= (decl
->Declaration
.Dimension
) ?
391 decl
->Dim
.Index2D
: 0;
392 if (info
->constant_buffer_count
<= idx
)
393 info
->constant_buffer_count
= idx
+ 1;
396 case TGSI_FILE_SYSTEM_VALUE
:
397 if (decl
->Declaration
.Semantic
&&
398 decl
->Semantic
.Name
== TGSI_SEMANTIC_INSTANCEID
)
399 info
->has_instanceid
= true;
400 if (decl
->Declaration
.Semantic
&&
401 decl
->Semantic
.Name
== TGSI_SEMANTIC_VERTEXID
)
402 info
->has_vertexid
= true;
410 ilo_shader_info_parse_tokens(struct ilo_shader_info
*info
)
412 struct tgsi_parse_context parse
;
414 info
->edgeflag_in
= -1;
415 info
->edgeflag_out
= -1;
417 tgsi_parse_init(&parse
, info
->tokens
);
418 while (!tgsi_parse_end_of_tokens(&parse
)) {
419 const union tgsi_full_token
*token
;
421 tgsi_parse_token(&parse
);
422 token
= &parse
.FullToken
;
424 switch (token
->Token
.Type
) {
425 case TGSI_TOKEN_TYPE_DECLARATION
:
426 ilo_shader_info_parse_decl(info
, &token
->FullDeclaration
);
428 case TGSI_TOKEN_TYPE_INSTRUCTION
:
429 ilo_shader_info_parse_inst(info
, &token
->FullInstruction
);
431 case TGSI_TOKEN_TYPE_PROPERTY
:
432 ilo_shader_info_parse_prop(info
, &token
->FullProperty
);
438 tgsi_parse_free(&parse
);
442 * Create a shader state.
444 static struct ilo_shader_state
*
445 ilo_shader_state_create(const struct ilo_dev
*dev
,
446 const struct ilo_state_vector
*vec
,
447 int type
, const void *templ
)
449 struct ilo_shader_state
*state
;
450 struct ilo_shader_variant variant
;
452 state
= CALLOC_STRUCT(ilo_shader_state
);
456 state
->info
.dev
= dev
;
457 state
->info
.type
= type
;
459 if (type
== PIPE_SHADER_COMPUTE
) {
460 const struct pipe_compute_state
*c
=
461 (const struct pipe_compute_state
*) templ
;
463 state
->info
.tokens
= tgsi_dup_tokens(c
->prog
);
464 state
->info
.compute
.req_local_mem
= c
->req_local_mem
;
465 state
->info
.compute
.req_private_mem
= c
->req_private_mem
;
466 state
->info
.compute
.req_input_mem
= c
->req_input_mem
;
469 const struct pipe_shader_state
*s
=
470 (const struct pipe_shader_state
*) templ
;
472 state
->info
.tokens
= tgsi_dup_tokens(s
->tokens
);
473 state
->info
.stream_output
= s
->stream_output
;
476 list_inithead(&state
->variants
);
478 ilo_shader_info_parse_tokens(&state
->info
);
480 /* guess and compile now */
481 ilo_shader_variant_guess(&variant
, &state
->info
, vec
);
482 if (!ilo_shader_state_use_variant(state
, &variant
)) {
483 ilo_shader_destroy(state
);
491 * Add a compiled shader to the shader state.
494 ilo_shader_state_add_shader(struct ilo_shader_state
*state
,
495 struct ilo_shader
*sh
)
497 list_add(&sh
->list
, &state
->variants
);
498 state
->num_variants
++;
499 state
->total_size
+= sh
->kernel_size
;
502 ilo_shader_cache_notify_change(state
->cache
, state
);
506 * Remove a compiled shader from the shader state.
509 ilo_shader_state_remove_shader(struct ilo_shader_state
*state
,
510 struct ilo_shader
*sh
)
513 state
->num_variants
--;
514 state
->total_size
-= sh
->kernel_size
;
518 * Garbage collect shader variants in the shader state.
521 ilo_shader_state_gc(struct ilo_shader_state
*state
)
523 /* activate when the variants take up more than 4KiB of space */
524 const int limit
= 4 * 1024;
525 struct ilo_shader
*sh
, *next
;
527 if (state
->total_size
< limit
)
530 /* remove from the tail as the most recently ones are at the head */
531 LIST_FOR_EACH_ENTRY_SAFE_REV(sh
, next
, &state
->variants
, list
) {
532 ilo_shader_state_remove_shader(state
, sh
);
533 ilo_shader_destroy_kernel(sh
);
535 if (state
->total_size
<= limit
/ 2)
541 * Search for a shader variant.
543 static struct ilo_shader
*
544 ilo_shader_state_search_variant(struct ilo_shader_state
*state
,
545 const struct ilo_shader_variant
*variant
)
547 struct ilo_shader
*sh
= NULL
, *tmp
;
549 LIST_FOR_EACH_ENTRY(tmp
, &state
->variants
, list
) {
550 if (memcmp(&tmp
->variant
, variant
, sizeof(*variant
)) == 0) {
560 copy_so_info(struct ilo_shader
*sh
,
561 const struct pipe_stream_output_info
*so_info
)
565 if (!so_info
->num_outputs
)
568 sh
->so_info
= *so_info
;
570 for (i
= 0; i
< so_info
->num_outputs
; i
++) {
571 /* figure out which attribute is sourced */
572 for (attr
= 0; attr
< sh
->out
.count
; attr
++) {
573 const int reg_idx
= sh
->out
.register_indices
[attr
];
574 if (reg_idx
== so_info
->output
[i
].register_index
)
578 if (attr
< sh
->out
.count
) {
579 sh
->so_info
.output
[i
].register_index
= attr
;
582 assert(!"stream output an undefined register");
583 sh
->so_info
.output
[i
].register_index
= 0;
586 /* PSIZE is at W channel */
587 if (sh
->out
.semantic_names
[attr
] == TGSI_SEMANTIC_PSIZE
) {
588 assert(so_info
->output
[i
].start_component
== 0);
589 assert(so_info
->output
[i
].num_components
== 1);
590 sh
->so_info
.output
[i
].start_component
= 3;
596 * Add a shader variant to the shader state.
598 static struct ilo_shader
*
599 ilo_shader_state_add_variant(struct ilo_shader_state
*state
,
600 const struct ilo_shader_variant
*variant
)
602 struct ilo_shader
*sh
;
604 switch (state
->info
.type
) {
605 case PIPE_SHADER_VERTEX
:
606 sh
= ilo_shader_compile_vs(state
, variant
);
608 case PIPE_SHADER_FRAGMENT
:
609 sh
= ilo_shader_compile_fs(state
, variant
);
611 case PIPE_SHADER_GEOMETRY
:
612 sh
= ilo_shader_compile_gs(state
, variant
);
614 case PIPE_SHADER_COMPUTE
:
615 sh
= ilo_shader_compile_cs(state
, variant
);
622 assert(!"failed to compile shader");
626 sh
->variant
= *variant
;
628 copy_so_info(sh
, &state
->info
.stream_output
);
630 ilo_shader_state_add_shader(state
, sh
);
636 * Update state->shader to point to a variant. If the variant does not exist,
637 * it will be added first.
640 ilo_shader_state_use_variant(struct ilo_shader_state
*state
,
641 const struct ilo_shader_variant
*variant
)
643 struct ilo_shader
*sh
;
644 bool construct_cso
= false;
646 sh
= ilo_shader_state_search_variant(state
, variant
);
648 ilo_shader_state_gc(state
);
650 sh
= ilo_shader_state_add_variant(state
, variant
);
654 construct_cso
= true;
658 if (state
->variants
.next
!= &sh
->list
) {
660 list_add(&sh
->list
, &state
->variants
);
666 switch (state
->info
.type
) {
667 case PIPE_SHADER_VERTEX
:
668 ilo_gpe_init_vs_cso(state
->info
.dev
, state
, &sh
->cso
);
670 case PIPE_SHADER_GEOMETRY
:
671 ilo_gpe_init_gs_cso(state
->info
.dev
, state
, &sh
->cso
);
673 case PIPE_SHADER_FRAGMENT
:
674 ilo_gpe_init_fs_cso(state
->info
.dev
, state
, &sh
->cso
);
684 struct ilo_shader_state
*
685 ilo_shader_create_vs(const struct ilo_dev
*dev
,
686 const struct pipe_shader_state
*state
,
687 const struct ilo_state_vector
*precompile
)
689 struct ilo_shader_state
*shader
;
691 shader
= ilo_shader_state_create(dev
, precompile
,
692 PIPE_SHADER_VERTEX
, state
);
694 /* states used in ilo_shader_variant_init() */
695 shader
->info
.non_orthogonal_states
= ILO_DIRTY_VIEW_VS
|
696 ILO_DIRTY_RASTERIZER
|
702 struct ilo_shader_state
*
703 ilo_shader_create_gs(const struct ilo_dev
*dev
,
704 const struct pipe_shader_state
*state
,
705 const struct ilo_state_vector
*precompile
)
707 struct ilo_shader_state
*shader
;
709 shader
= ilo_shader_state_create(dev
, precompile
,
710 PIPE_SHADER_GEOMETRY
, state
);
712 /* states used in ilo_shader_variant_init() */
713 shader
->info
.non_orthogonal_states
= ILO_DIRTY_VIEW_GS
|
715 ILO_DIRTY_RASTERIZER
|
721 struct ilo_shader_state
*
722 ilo_shader_create_fs(const struct ilo_dev
*dev
,
723 const struct pipe_shader_state
*state
,
724 const struct ilo_state_vector
*precompile
)
726 struct ilo_shader_state
*shader
;
728 shader
= ilo_shader_state_create(dev
, precompile
,
729 PIPE_SHADER_FRAGMENT
, state
);
731 /* states used in ilo_shader_variant_init() */
732 shader
->info
.non_orthogonal_states
= ILO_DIRTY_VIEW_FS
|
733 ILO_DIRTY_RASTERIZER
|
740 struct ilo_shader_state
*
741 ilo_shader_create_cs(const struct ilo_dev
*dev
,
742 const struct pipe_compute_state
*state
,
743 const struct ilo_state_vector
*precompile
)
745 struct ilo_shader_state
*shader
;
747 shader
= ilo_shader_state_create(dev
, precompile
,
748 PIPE_SHADER_COMPUTE
, state
);
750 shader
->info
.non_orthogonal_states
= 0;
756 * Destroy a shader state.
759 ilo_shader_destroy(struct ilo_shader_state
*shader
)
761 struct ilo_shader
*sh
, *next
;
763 LIST_FOR_EACH_ENTRY_SAFE(sh
, next
, &shader
->variants
, list
)
764 ilo_shader_destroy_kernel(sh
);
766 FREE((struct tgsi_token
*) shader
->info
.tokens
);
771 * Return the type (PIPE_SHADER_x) of the shader.
774 ilo_shader_get_type(const struct ilo_shader_state
*shader
)
776 return shader
->info
.type
;
780 * Select a kernel for the given context. This will compile a new kernel if
781 * none of the existing kernels work with the context.
783 * \param ilo the context
784 * \param dirty states of the context that are considered changed
785 * \return true if a different kernel is selected
788 ilo_shader_select_kernel(struct ilo_shader_state
*shader
,
789 const struct ilo_state_vector
*vec
,
792 const struct ilo_shader
* const cur
= shader
->shader
;
793 struct ilo_shader_variant variant
;
795 if (!(shader
->info
.non_orthogonal_states
& dirty
))
798 ilo_shader_variant_init(&variant
, &shader
->info
, vec
);
799 ilo_shader_state_use_variant(shader
, &variant
);
801 return (shader
->shader
!= cur
);
805 route_attr(const int *semantics
, const int *indices
, int len
,
806 int semantic
, int index
)
810 for (i
= 0; i
< len
; i
++) {
811 if (semantics
[i
] == semantic
&& indices
[i
] == index
)
815 /* failed to match for COLOR, try BCOLOR */
816 if (semantic
== TGSI_SEMANTIC_COLOR
) {
817 for (i
= 0; i
< len
; i
++) {
818 if (semantics
[i
] == TGSI_SEMANTIC_BCOLOR
&& indices
[i
] == index
)
827 * Select a routing for the given source shader and rasterizer state.
829 * \return true if a different routing is selected
832 ilo_shader_select_kernel_routing(struct ilo_shader_state
*shader
,
833 const struct ilo_shader_state
*source
,
834 const struct ilo_rasterizer_state
*rasterizer
)
836 const uint32_t sprite_coord_enable
= rasterizer
->state
.sprite_coord_enable
;
837 const bool light_twoside
= rasterizer
->state
.light_twoside
;
838 struct ilo_shader
*kernel
= shader
->shader
;
839 struct ilo_kernel_routing
*routing
= &kernel
->routing
;
840 const int *src_semantics
, *src_indices
;
841 int src_len
, max_src_slot
;
842 int dst_len
, dst_slot
;
844 /* we are constructing 3DSTATE_SBE here */
845 ILO_DEV_ASSERT(shader
->info
.dev
, 6, 8);
850 assert(source
->shader
);
851 src_semantics
= source
->shader
->out
.semantic_names
;
852 src_indices
= source
->shader
->out
.semantic_indices
;
853 src_len
= source
->shader
->out
.count
;
856 src_semantics
= kernel
->in
.semantic_names
;
857 src_indices
= kernel
->in
.semantic_indices
;
858 src_len
= kernel
->in
.count
;
862 if (kernel
->routing_initialized
&&
863 routing
->source_skip
+ routing
->source_len
<= src_len
&&
864 kernel
->routing_sprite_coord_enable
== sprite_coord_enable
&&
865 !memcmp(kernel
->routing_src_semantics
,
866 &src_semantics
[routing
->source_skip
],
867 sizeof(kernel
->routing_src_semantics
[0]) * routing
->source_len
) &&
868 !memcmp(kernel
->routing_src_indices
,
869 &src_indices
[routing
->source_skip
],
870 sizeof(kernel
->routing_src_indices
[0]) * routing
->source_len
))
874 /* skip PSIZE and POSITION (how about the optional CLIPDISTs?) */
875 assert(src_semantics
[0] == TGSI_SEMANTIC_PSIZE
);
876 assert(src_semantics
[1] == TGSI_SEMANTIC_POSITION
);
877 routing
->source_skip
= 2;
879 routing
->source_len
= src_len
- routing
->source_skip
;
880 src_semantics
+= routing
->source_skip
;
881 src_indices
+= routing
->source_skip
;
884 routing
->source_skip
= 0;
885 routing
->source_len
= src_len
;
888 routing
->const_interp_enable
= kernel
->in
.const_interp_enable
;
889 routing
->point_sprite_enable
= 0;
890 routing
->swizzle_enable
= false;
892 assert(kernel
->in
.count
<= Elements(routing
->swizzles
));
893 dst_len
= MIN2(kernel
->in
.count
, Elements(routing
->swizzles
));
896 for (dst_slot
= 0; dst_slot
< dst_len
; dst_slot
++) {
897 const int semantic
= kernel
->in
.semantic_names
[dst_slot
];
898 const int index
= kernel
->in
.semantic_indices
[dst_slot
];
901 if (semantic
== TGSI_SEMANTIC_GENERIC
&&
902 (sprite_coord_enable
& (1 << index
)))
903 routing
->point_sprite_enable
|= 1 << dst_slot
;
906 src_slot
= route_attr(src_semantics
, src_indices
,
907 routing
->source_len
, semantic
, index
);
910 * The source shader stage does not output this attribute. The value
911 * is supposed to be undefined, unless the attribute goes through
912 * point sprite replacement or the attribute is
913 * TGSI_SEMANTIC_POSITION. In all cases, we do not care which source
914 * attribute is picked.
916 * We should update the kernel code and omit the output of
917 * TGSI_SEMANTIC_POSITION here.
926 routing
->swizzles
[dst_slot
] = src_slot
;
928 /* use the following slot for two-sided lighting */
929 if (semantic
== TGSI_SEMANTIC_COLOR
&& light_twoside
&&
930 src_slot
+ 1 < routing
->source_len
&&
931 src_semantics
[src_slot
+ 1] == TGSI_SEMANTIC_BCOLOR
&&
932 src_indices
[src_slot
+ 1] == index
) {
933 routing
->swizzles
[dst_slot
] |= GEN8_SBE_SWIZ_INPUTATTR_FACING
;
937 if (routing
->swizzles
[dst_slot
] != dst_slot
)
938 routing
->swizzle_enable
= true;
940 if (max_src_slot
< src_slot
)
941 max_src_slot
= src_slot
;
944 memset(&routing
->swizzles
[dst_slot
], 0, sizeof(routing
->swizzles
) -
945 sizeof(routing
->swizzles
[0]) * dst_slot
);
948 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
950 * "It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
951 * 0 indicating no Vertex URB data to be read.
953 * This field should be set to the minimum length required to read the
954 * maximum source attribute. The maximum source attribute is indicated
955 * by the maximum value of the enabled Attribute # Source Attribute if
956 * Attribute Swizzle Enable is set, Number of Output Attributes-1 if
959 * read_length = ceiling((max_source_attr+1)/2)
961 * [errata] Corruption/Hang possible if length programmed larger than
964 routing
->source_len
= max_src_slot
+ 1;
966 /* remember the states of the source */
967 kernel
->routing_initialized
= true;
968 kernel
->routing_sprite_coord_enable
= sprite_coord_enable
;
969 memcpy(kernel
->routing_src_semantics
, src_semantics
,
970 sizeof(kernel
->routing_src_semantics
[0]) * routing
->source_len
);
971 memcpy(kernel
->routing_src_indices
, src_indices
,
972 sizeof(kernel
->routing_src_indices
[0]) * routing
->source_len
);
978 * Return the cache offset of the selected kernel. This must be called after
979 * ilo_shader_select_kernel() and ilo_shader_cache_upload().
982 ilo_shader_get_kernel_offset(const struct ilo_shader_state
*shader
)
984 const struct ilo_shader
*kernel
= shader
->shader
;
986 assert(kernel
&& kernel
->uploaded
);
988 return kernel
->cache_offset
;
992 * Query a kernel parameter for the selected kernel.
995 ilo_shader_get_kernel_param(const struct ilo_shader_state
*shader
,
996 enum ilo_kernel_param param
)
998 const struct ilo_shader
*kernel
= shader
->shader
;
1004 case ILO_KERNEL_INPUT_COUNT
:
1005 val
= kernel
->in
.count
;
1007 case ILO_KERNEL_OUTPUT_COUNT
:
1008 val
= kernel
->out
.count
;
1010 case ILO_KERNEL_SAMPLER_COUNT
:
1011 val
= shader
->info
.num_samplers
;
1013 case ILO_KERNEL_URB_DATA_START_REG
:
1014 val
= kernel
->in
.start_grf
;
1016 case ILO_KERNEL_SKIP_CBUF0_UPLOAD
:
1017 val
= kernel
->skip_cbuf0_upload
;
1019 case ILO_KERNEL_PCB_CBUF0_SIZE
:
1020 val
= kernel
->pcb
.cbuf0_size
;
1023 case ILO_KERNEL_SURFACE_TOTAL_COUNT
:
1024 val
= kernel
->bt
.total_count
;
1026 case ILO_KERNEL_SURFACE_TEX_BASE
:
1027 val
= kernel
->bt
.tex_base
;
1029 case ILO_KERNEL_SURFACE_TEX_COUNT
:
1030 val
= kernel
->bt
.tex_count
;
1032 case ILO_KERNEL_SURFACE_CONST_BASE
:
1033 val
= kernel
->bt
.const_base
;
1035 case ILO_KERNEL_SURFACE_CONST_COUNT
:
1036 val
= kernel
->bt
.const_count
;
1038 case ILO_KERNEL_SURFACE_RES_BASE
:
1039 val
= kernel
->bt
.res_base
;
1041 case ILO_KERNEL_SURFACE_RES_COUNT
:
1042 val
= kernel
->bt
.res_count
;
1045 case ILO_KERNEL_VS_INPUT_INSTANCEID
:
1046 val
= shader
->info
.has_instanceid
;
1048 case ILO_KERNEL_VS_INPUT_VERTEXID
:
1049 val
= shader
->info
.has_vertexid
;
1051 case ILO_KERNEL_VS_INPUT_EDGEFLAG
:
1052 if (shader
->info
.edgeflag_in
>= 0) {
1053 /* we rely on the state tracker here */
1054 assert(shader
->info
.edgeflag_in
== kernel
->in
.count
- 1);
1061 case ILO_KERNEL_VS_PCB_UCP_SIZE
:
1062 val
= kernel
->pcb
.clip_state_size
;
1064 case ILO_KERNEL_VS_GEN6_SO
:
1065 val
= kernel
->stream_output
;
1067 case ILO_KERNEL_VS_GEN6_SO_START_REG
:
1068 val
= kernel
->gs_start_grf
;
1070 case ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET
:
1071 val
= kernel
->gs_offsets
[0];
1073 case ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET
:
1074 val
= kernel
->gs_offsets
[1];
1076 case ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET
:
1077 val
= kernel
->gs_offsets
[2];
1079 case ILO_KERNEL_VS_GEN6_SO_SURFACE_COUNT
:
1080 val
= kernel
->gs_bt_so_count
;
1083 case ILO_KERNEL_GS_DISCARD_ADJACENCY
:
1084 val
= kernel
->in
.discard_adj
;
1086 case ILO_KERNEL_GS_GEN6_SVBI_POST_INC
:
1087 val
= kernel
->svbi_post_inc
;
1089 case ILO_KERNEL_GS_GEN6_SURFACE_SO_BASE
:
1090 val
= kernel
->bt
.gen6_so_base
;
1092 case ILO_KERNEL_GS_GEN6_SURFACE_SO_COUNT
:
1093 val
= kernel
->bt
.gen6_so_count
;
1096 case ILO_KERNEL_FS_INPUT_Z
:
1097 case ILO_KERNEL_FS_INPUT_W
:
1098 val
= kernel
->in
.has_pos
;
1100 case ILO_KERNEL_FS_OUTPUT_Z
:
1101 val
= kernel
->out
.has_pos
;
1103 case ILO_KERNEL_FS_USE_KILL
:
1104 val
= kernel
->has_kill
;
1106 case ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS
:
1107 val
= kernel
->in
.barycentric_interpolation_mode
;
1109 case ILO_KERNEL_FS_DISPATCH_16_OFFSET
:
1112 case ILO_KERNEL_FS_SURFACE_RT_BASE
:
1113 val
= kernel
->bt
.rt_base
;
1115 case ILO_KERNEL_FS_SURFACE_RT_COUNT
:
1116 val
= kernel
->bt
.rt_count
;
1119 case ILO_KERNEL_CS_LOCAL_SIZE
:
1120 val
= shader
->info
.compute
.req_local_mem
;
1122 case ILO_KERNEL_CS_PRIVATE_SIZE
:
1123 val
= shader
->info
.compute
.req_private_mem
;
1125 case ILO_KERNEL_CS_INPUT_SIZE
:
1126 val
= shader
->info
.compute
.req_input_mem
;
1128 case ILO_KERNEL_CS_SIMD_SIZE
:
1131 case ILO_KERNEL_CS_SURFACE_GLOBAL_BASE
:
1132 val
= kernel
->bt
.global_base
;
1134 case ILO_KERNEL_CS_SURFACE_GLOBAL_COUNT
:
1135 val
= kernel
->bt
.global_count
;
1139 assert(!"unknown kernel parameter");
1148 * Return the CSO of the selected kernel.
1150 const struct ilo_shader_cso
*
1151 ilo_shader_get_kernel_cso(const struct ilo_shader_state
*shader
)
1153 const struct ilo_shader
*kernel
= shader
->shader
;
1157 return &kernel
->cso
;
1161 * Return the SO info of the selected kernel.
1163 const struct pipe_stream_output_info
*
1164 ilo_shader_get_kernel_so_info(const struct ilo_shader_state
*shader
)
1166 const struct ilo_shader
*kernel
= shader
->shader
;
1170 return &kernel
->so_info
;
1174 * Return the routing info of the selected kernel.
1176 const struct ilo_kernel_routing
*
1177 ilo_shader_get_kernel_routing(const struct ilo_shader_state
*shader
)
1179 const struct ilo_shader
*kernel
= shader
->shader
;
1183 return &kernel
->routing
;