2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "genhw/genhw.h" /* for SBE setup */
29 #include "tgsi/tgsi_parse.h"
30 #include "intel_winsys.h"
32 #include "shader/ilo_shader_internal.h"
33 #include "ilo_builder.h"
34 #include "ilo_state.h"
35 #include "ilo_shader.h"
37 struct ilo_shader_cache
{
38 struct list_head shaders
;
39 struct list_head changed
;
43 * Create a shader cache. A shader cache can manage shaders and upload them
46 struct ilo_shader_cache
*
47 ilo_shader_cache_create(void)
49 struct ilo_shader_cache
*shc
;
51 shc
= CALLOC_STRUCT(ilo_shader_cache
);
55 list_inithead(&shc
->shaders
);
56 list_inithead(&shc
->changed
);
62 * Destroy a shader cache.
65 ilo_shader_cache_destroy(struct ilo_shader_cache
*shc
)
71 * Add a shader to the cache.
74 ilo_shader_cache_add(struct ilo_shader_cache
*shc
,
75 struct ilo_shader_state
*shader
)
77 struct ilo_shader
*sh
;
80 LIST_FOR_EACH_ENTRY(sh
, &shader
->variants
, list
)
83 list_add(&shader
->list
, &shc
->changed
);
87 * Remove a shader from the cache.
90 ilo_shader_cache_remove(struct ilo_shader_cache
*shc
,
91 struct ilo_shader_state
*shader
)
93 list_del(&shader
->list
);
98 * Notify the cache that a managed shader has changed.
101 ilo_shader_cache_notify_change(struct ilo_shader_cache
*shc
,
102 struct ilo_shader_state
*shader
)
104 if (shader
->cache
== shc
) {
105 list_del(&shader
->list
);
106 list_add(&shader
->list
, &shc
->changed
);
111 * Upload managed shaders to the bo. Only shaders that are changed or added
112 * after the last upload are uploaded.
115 ilo_shader_cache_upload(struct ilo_shader_cache
*shc
,
116 struct ilo_builder
*builder
)
118 struct ilo_shader_state
*shader
, *next
;
120 LIST_FOR_EACH_ENTRY_SAFE(shader
, next
, &shc
->changed
, list
) {
121 struct ilo_shader
*sh
;
123 LIST_FOR_EACH_ENTRY(sh
, &shader
->variants
, list
) {
127 sh
->cache_offset
= ilo_builder_instruction_write(builder
,
128 sh
->kernel_size
, sh
->kernel
);
133 list_del(&shader
->list
);
134 list_add(&shader
->list
, &shc
->shaders
);
139 * Invalidate all shaders so that they get uploaded in next
140 * ilo_shader_cache_upload().
143 ilo_shader_cache_invalidate(struct ilo_shader_cache
*shc
)
145 struct ilo_shader_state
*shader
, *next
;
147 LIST_FOR_EACH_ENTRY_SAFE(shader
, next
, &shc
->shaders
, list
) {
148 list_del(&shader
->list
);
149 list_add(&shader
->list
, &shc
->changed
);
152 LIST_FOR_EACH_ENTRY(shader
, &shc
->changed
, list
) {
153 struct ilo_shader
*sh
;
155 LIST_FOR_EACH_ENTRY(sh
, &shader
->variants
, list
)
156 sh
->uploaded
= false;
161 * Initialize a shader variant.
164 ilo_shader_variant_init(struct ilo_shader_variant
*variant
,
165 const struct ilo_shader_info
*info
,
166 const struct ilo_context
*ilo
)
170 memset(variant
, 0, sizeof(*variant
));
172 switch (info
->type
) {
173 case PIPE_SHADER_VERTEX
:
174 variant
->u
.vs
.rasterizer_discard
=
175 ilo
->rasterizer
->state
.rasterizer_discard
;
176 variant
->u
.vs
.num_ucps
=
177 util_last_bit(ilo
->rasterizer
->state
.clip_plane_enable
);
179 case PIPE_SHADER_GEOMETRY
:
180 variant
->u
.gs
.rasterizer_discard
=
181 ilo
->rasterizer
->state
.rasterizer_discard
;
182 variant
->u
.gs
.num_inputs
= ilo
->vs
->shader
->out
.count
;
183 for (i
= 0; i
< ilo
->vs
->shader
->out
.count
; i
++) {
184 variant
->u
.gs
.semantic_names
[i
] =
185 ilo
->vs
->shader
->out
.semantic_names
[i
];
186 variant
->u
.gs
.semantic_indices
[i
] =
187 ilo
->vs
->shader
->out
.semantic_indices
[i
];
190 case PIPE_SHADER_FRAGMENT
:
191 variant
->u
.fs
.flatshade
=
192 (info
->has_color_interp
&& ilo
->rasterizer
->state
.flatshade
);
193 variant
->u
.fs
.fb_height
= (info
->has_pos
) ?
194 ilo
->fb
.state
.height
: 1;
195 variant
->u
.fs
.num_cbufs
= ilo
->fb
.state
.nr_cbufs
;
198 assert(!"unknown shader type");
202 /* use PCB unless constant buffer 0 is not in user buffer */
203 if ((ilo
->cbuf
[info
->type
].enabled_mask
& 0x1) &&
204 !ilo
->cbuf
[info
->type
].cso
[0].user_buffer
)
205 variant
->use_pcb
= false;
207 variant
->use_pcb
= true;
209 num_views
= ilo
->view
[info
->type
].count
;
210 assert(info
->num_samplers
<= num_views
);
212 variant
->num_sampler_views
= info
->num_samplers
;
213 for (i
= 0; i
< info
->num_samplers
; i
++) {
214 const struct pipe_sampler_view
*view
= ilo
->view
[info
->type
].states
[i
];
215 const struct ilo_sampler_cso
*sampler
= ilo
->sampler
[info
->type
].cso
[i
];
218 variant
->sampler_view_swizzles
[i
].r
= view
->swizzle_r
;
219 variant
->sampler_view_swizzles
[i
].g
= view
->swizzle_g
;
220 variant
->sampler_view_swizzles
[i
].b
= view
->swizzle_b
;
221 variant
->sampler_view_swizzles
[i
].a
= view
->swizzle_a
;
223 else if (info
->shadow_samplers
& (1 << i
)) {
224 variant
->sampler_view_swizzles
[i
].r
= PIPE_SWIZZLE_RED
;
225 variant
->sampler_view_swizzles
[i
].g
= PIPE_SWIZZLE_RED
;
226 variant
->sampler_view_swizzles
[i
].b
= PIPE_SWIZZLE_RED
;
227 variant
->sampler_view_swizzles
[i
].a
= PIPE_SWIZZLE_ONE
;
230 variant
->sampler_view_swizzles
[i
].r
= PIPE_SWIZZLE_RED
;
231 variant
->sampler_view_swizzles
[i
].g
= PIPE_SWIZZLE_GREEN
;
232 variant
->sampler_view_swizzles
[i
].b
= PIPE_SWIZZLE_BLUE
;
233 variant
->sampler_view_swizzles
[i
].a
= PIPE_SWIZZLE_ALPHA
;
237 * When non-nearest filter and PIPE_TEX_WRAP_CLAMP wrap mode is used,
238 * the HW wrap mode is set to GEN6_TEXCOORDMODE_CLAMP_BORDER, and we
239 * need to manually saturate the texture coordinates.
242 variant
->saturate_tex_coords
[0] |= sampler
->saturate_s
<< i
;
243 variant
->saturate_tex_coords
[1] |= sampler
->saturate_t
<< i
;
244 variant
->saturate_tex_coords
[2] |= sampler
->saturate_r
<< i
;
250 * Guess the shader variant, knowing that the context may still change.
253 ilo_shader_variant_guess(struct ilo_shader_variant
*variant
,
254 const struct ilo_shader_info
*info
,
255 const struct ilo_context
*ilo
)
259 memset(variant
, 0, sizeof(*variant
));
261 switch (info
->type
) {
262 case PIPE_SHADER_VERTEX
:
264 case PIPE_SHADER_GEOMETRY
:
266 case PIPE_SHADER_FRAGMENT
:
267 variant
->u
.fs
.flatshade
= false;
268 variant
->u
.fs
.fb_height
= (info
->has_pos
) ?
269 ilo
->fb
.state
.height
: 1;
270 variant
->u
.fs
.num_cbufs
= 1;
273 assert(!"unknown shader type");
277 variant
->use_pcb
= true;
279 variant
->num_sampler_views
= info
->num_samplers
;
280 for (i
= 0; i
< info
->num_samplers
; i
++) {
281 if (info
->shadow_samplers
& (1 << i
)) {
282 variant
->sampler_view_swizzles
[i
].r
= PIPE_SWIZZLE_RED
;
283 variant
->sampler_view_swizzles
[i
].g
= PIPE_SWIZZLE_RED
;
284 variant
->sampler_view_swizzles
[i
].b
= PIPE_SWIZZLE_RED
;
285 variant
->sampler_view_swizzles
[i
].a
= PIPE_SWIZZLE_ONE
;
288 variant
->sampler_view_swizzles
[i
].r
= PIPE_SWIZZLE_RED
;
289 variant
->sampler_view_swizzles
[i
].g
= PIPE_SWIZZLE_GREEN
;
290 variant
->sampler_view_swizzles
[i
].b
= PIPE_SWIZZLE_BLUE
;
291 variant
->sampler_view_swizzles
[i
].a
= PIPE_SWIZZLE_ALPHA
;
298 * Parse a TGSI instruction for the shader info.
301 ilo_shader_info_parse_inst(struct ilo_shader_info
*info
,
302 const struct tgsi_full_instruction
*inst
)
306 /* look for edgeflag passthrough */
307 if (info
->edgeflag_out
>= 0 &&
308 inst
->Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
309 inst
->Dst
[0].Register
.File
== TGSI_FILE_OUTPUT
&&
310 inst
->Dst
[0].Register
.Index
== info
->edgeflag_out
) {
312 assert(inst
->Src
[0].Register
.File
== TGSI_FILE_INPUT
);
313 info
->edgeflag_in
= inst
->Src
[0].Register
.Index
;
316 if (inst
->Instruction
.Texture
) {
319 switch (inst
->Texture
.Texture
) {
320 case TGSI_TEXTURE_SHADOW1D
:
321 case TGSI_TEXTURE_SHADOW2D
:
322 case TGSI_TEXTURE_SHADOWRECT
:
323 case TGSI_TEXTURE_SHADOW1D_ARRAY
:
324 case TGSI_TEXTURE_SHADOW2D_ARRAY
:
325 case TGSI_TEXTURE_SHADOWCUBE
:
326 case TGSI_TEXTURE_SHADOWCUBE_ARRAY
:
334 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
335 const struct tgsi_full_src_register
*src
= &inst
->Src
[i
];
337 if (src
->Register
.File
== TGSI_FILE_SAMPLER
) {
338 const int idx
= src
->Register
.Index
;
340 if (idx
>= info
->num_samplers
)
341 info
->num_samplers
= idx
+ 1;
344 info
->shadow_samplers
|= 1 << idx
;
351 * Parse a TGSI property for the shader info.
354 ilo_shader_info_parse_prop(struct ilo_shader_info
*info
,
355 const struct tgsi_full_property
*prop
)
357 switch (prop
->Property
.PropertyName
) {
358 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS
:
359 info
->fs_color0_writes_all_cbufs
= prop
->u
[0].Data
;
367 * Parse a TGSI declaration for the shader info.
370 ilo_shader_info_parse_decl(struct ilo_shader_info
*info
,
371 const struct tgsi_full_declaration
*decl
)
373 switch (decl
->Declaration
.File
) {
374 case TGSI_FILE_INPUT
:
375 if (decl
->Declaration
.Interpolate
&&
376 decl
->Interp
.Interpolate
== TGSI_INTERPOLATE_COLOR
)
377 info
->has_color_interp
= true;
378 if (decl
->Declaration
.Semantic
&&
379 decl
->Semantic
.Name
== TGSI_SEMANTIC_POSITION
)
380 info
->has_pos
= true;
382 case TGSI_FILE_OUTPUT
:
383 if (decl
->Declaration
.Semantic
&&
384 decl
->Semantic
.Name
== TGSI_SEMANTIC_EDGEFLAG
)
385 info
->edgeflag_out
= decl
->Range
.First
;
387 case TGSI_FILE_SYSTEM_VALUE
:
388 if (decl
->Declaration
.Semantic
&&
389 decl
->Semantic
.Name
== TGSI_SEMANTIC_INSTANCEID
)
390 info
->has_instanceid
= true;
391 if (decl
->Declaration
.Semantic
&&
392 decl
->Semantic
.Name
== TGSI_SEMANTIC_VERTEXID
)
393 info
->has_vertexid
= true;
401 ilo_shader_info_parse_tokens(struct ilo_shader_info
*info
)
403 struct tgsi_parse_context parse
;
405 info
->edgeflag_in
= -1;
406 info
->edgeflag_out
= -1;
408 tgsi_parse_init(&parse
, info
->tokens
);
409 while (!tgsi_parse_end_of_tokens(&parse
)) {
410 const union tgsi_full_token
*token
;
412 tgsi_parse_token(&parse
);
413 token
= &parse
.FullToken
;
415 switch (token
->Token
.Type
) {
416 case TGSI_TOKEN_TYPE_DECLARATION
:
417 ilo_shader_info_parse_decl(info
, &token
->FullDeclaration
);
419 case TGSI_TOKEN_TYPE_INSTRUCTION
:
420 ilo_shader_info_parse_inst(info
, &token
->FullInstruction
);
422 case TGSI_TOKEN_TYPE_PROPERTY
:
423 ilo_shader_info_parse_prop(info
, &token
->FullProperty
);
429 tgsi_parse_free(&parse
);
433 * Create a shader state.
435 static struct ilo_shader_state
*
436 ilo_shader_state_create(const struct ilo_context
*ilo
,
437 int type
, const void *templ
)
439 struct ilo_shader_state
*state
;
440 struct ilo_shader_variant variant
;
442 state
= CALLOC_STRUCT(ilo_shader_state
);
446 state
->info
.dev
= ilo
->dev
;
447 state
->info
.type
= type
;
449 if (type
== PIPE_SHADER_COMPUTE
) {
450 const struct pipe_compute_state
*c
=
451 (const struct pipe_compute_state
*) templ
;
453 state
->info
.tokens
= tgsi_dup_tokens(c
->prog
);
454 state
->info
.compute
.req_local_mem
= c
->req_local_mem
;
455 state
->info
.compute
.req_private_mem
= c
->req_private_mem
;
456 state
->info
.compute
.req_input_mem
= c
->req_input_mem
;
459 const struct pipe_shader_state
*s
=
460 (const struct pipe_shader_state
*) templ
;
462 state
->info
.tokens
= tgsi_dup_tokens(s
->tokens
);
463 state
->info
.stream_output
= s
->stream_output
;
466 list_inithead(&state
->variants
);
468 ilo_shader_info_parse_tokens(&state
->info
);
470 /* guess and compile now */
471 ilo_shader_variant_guess(&variant
, &state
->info
, ilo
);
472 if (!ilo_shader_state_use_variant(state
, &variant
)) {
473 ilo_shader_destroy(state
);
481 * Add a compiled shader to the shader state.
484 ilo_shader_state_add_shader(struct ilo_shader_state
*state
,
485 struct ilo_shader
*sh
)
487 list_add(&sh
->list
, &state
->variants
);
488 state
->num_variants
++;
489 state
->total_size
+= sh
->kernel_size
;
492 ilo_shader_cache_notify_change(state
->cache
, state
);
496 * Remove a compiled shader from the shader state.
499 ilo_shader_state_remove_shader(struct ilo_shader_state
*state
,
500 struct ilo_shader
*sh
)
503 state
->num_variants
--;
504 state
->total_size
-= sh
->kernel_size
;
508 * Garbage collect shader variants in the shader state.
511 ilo_shader_state_gc(struct ilo_shader_state
*state
)
513 /* activate when the variants take up more than 4KiB of space */
514 const int limit
= 4 * 1024;
515 struct ilo_shader
*sh
, *next
;
517 if (state
->total_size
< limit
)
520 /* remove from the tail as the most recently ones are at the head */
521 LIST_FOR_EACH_ENTRY_SAFE_REV(sh
, next
, &state
->variants
, list
) {
522 ilo_shader_state_remove_shader(state
, sh
);
523 ilo_shader_destroy_kernel(sh
);
525 if (state
->total_size
<= limit
/ 2)
531 * Search for a shader variant.
533 static struct ilo_shader
*
534 ilo_shader_state_search_variant(struct ilo_shader_state
*state
,
535 const struct ilo_shader_variant
*variant
)
537 struct ilo_shader
*sh
= NULL
, *tmp
;
539 LIST_FOR_EACH_ENTRY(tmp
, &state
->variants
, list
) {
540 if (memcmp(&tmp
->variant
, variant
, sizeof(*variant
)) == 0) {
550 copy_so_info(struct ilo_shader
*sh
,
551 const struct pipe_stream_output_info
*so_info
)
555 if (!so_info
->num_outputs
)
558 sh
->so_info
= *so_info
;
560 for (i
= 0; i
< so_info
->num_outputs
; i
++) {
561 /* figure out which attribute is sourced */
562 for (attr
= 0; attr
< sh
->out
.count
; attr
++) {
563 const int reg_idx
= sh
->out
.register_indices
[attr
];
564 if (reg_idx
== so_info
->output
[i
].register_index
)
568 if (attr
< sh
->out
.count
) {
569 sh
->so_info
.output
[i
].register_index
= attr
;
572 assert(!"stream output an undefined register");
573 sh
->so_info
.output
[i
].register_index
= 0;
576 /* PSIZE is at W channel */
577 if (sh
->out
.semantic_names
[attr
] == TGSI_SEMANTIC_PSIZE
) {
578 assert(so_info
->output
[i
].start_component
== 0);
579 assert(so_info
->output
[i
].num_components
== 1);
580 sh
->so_info
.output
[i
].start_component
= 3;
586 * Add a shader variant to the shader state.
588 static struct ilo_shader
*
589 ilo_shader_state_add_variant(struct ilo_shader_state
*state
,
590 const struct ilo_shader_variant
*variant
)
592 struct ilo_shader
*sh
;
594 switch (state
->info
.type
) {
595 case PIPE_SHADER_VERTEX
:
596 sh
= ilo_shader_compile_vs(state
, variant
);
598 case PIPE_SHADER_FRAGMENT
:
599 sh
= ilo_shader_compile_fs(state
, variant
);
601 case PIPE_SHADER_GEOMETRY
:
602 sh
= ilo_shader_compile_gs(state
, variant
);
604 case PIPE_SHADER_COMPUTE
:
605 sh
= ilo_shader_compile_cs(state
, variant
);
612 assert(!"failed to compile shader");
616 sh
->variant
= *variant
;
618 copy_so_info(sh
, &state
->info
.stream_output
);
620 ilo_shader_state_add_shader(state
, sh
);
626 * Update state->shader to point to a variant. If the variant does not exist,
627 * it will be added first.
630 ilo_shader_state_use_variant(struct ilo_shader_state
*state
,
631 const struct ilo_shader_variant
*variant
)
633 struct ilo_shader
*sh
;
634 bool construct_cso
= false;
636 sh
= ilo_shader_state_search_variant(state
, variant
);
638 ilo_shader_state_gc(state
);
640 sh
= ilo_shader_state_add_variant(state
, variant
);
644 construct_cso
= true;
648 if (state
->variants
.next
!= &sh
->list
) {
650 list_add(&sh
->list
, &state
->variants
);
656 switch (state
->info
.type
) {
657 case PIPE_SHADER_VERTEX
:
658 ilo_gpe_init_vs_cso(state
->info
.dev
, state
, &sh
->cso
);
660 case PIPE_SHADER_GEOMETRY
:
661 ilo_gpe_init_gs_cso(state
->info
.dev
, state
, &sh
->cso
);
663 case PIPE_SHADER_FRAGMENT
:
664 ilo_gpe_init_fs_cso(state
->info
.dev
, state
, &sh
->cso
);
674 struct ilo_shader_state
*
675 ilo_shader_create_vs(const struct ilo_dev_info
*dev
,
676 const struct pipe_shader_state
*state
,
677 const struct ilo_context
*precompile
)
679 struct ilo_shader_state
*shader
;
681 shader
= ilo_shader_state_create(precompile
, PIPE_SHADER_VERTEX
, state
);
683 /* states used in ilo_shader_variant_init() */
684 shader
->info
.non_orthogonal_states
= ILO_DIRTY_VIEW_VS
|
685 ILO_DIRTY_RASTERIZER
|
691 struct ilo_shader_state
*
692 ilo_shader_create_gs(const struct ilo_dev_info
*dev
,
693 const struct pipe_shader_state
*state
,
694 const struct ilo_context
*precompile
)
696 struct ilo_shader_state
*shader
;
698 shader
= ilo_shader_state_create(precompile
, PIPE_SHADER_GEOMETRY
, state
);
700 /* states used in ilo_shader_variant_init() */
701 shader
->info
.non_orthogonal_states
= ILO_DIRTY_VIEW_GS
|
703 ILO_DIRTY_RASTERIZER
|
709 struct ilo_shader_state
*
710 ilo_shader_create_fs(const struct ilo_dev_info
*dev
,
711 const struct pipe_shader_state
*state
,
712 const struct ilo_context
*precompile
)
714 struct ilo_shader_state
*shader
;
716 shader
= ilo_shader_state_create(precompile
, PIPE_SHADER_FRAGMENT
, state
);
718 /* states used in ilo_shader_variant_init() */
719 shader
->info
.non_orthogonal_states
= ILO_DIRTY_VIEW_FS
|
720 ILO_DIRTY_RASTERIZER
|
727 struct ilo_shader_state
*
728 ilo_shader_create_cs(const struct ilo_dev_info
*dev
,
729 const struct pipe_compute_state
*state
,
730 const struct ilo_context
*precompile
)
732 struct ilo_shader_state
*shader
;
734 shader
= ilo_shader_state_create(precompile
, PIPE_SHADER_COMPUTE
, state
);
736 shader
->info
.non_orthogonal_states
= 0;
742 * Destroy a shader state.
745 ilo_shader_destroy(struct ilo_shader_state
*shader
)
747 struct ilo_shader
*sh
, *next
;
749 LIST_FOR_EACH_ENTRY_SAFE(sh
, next
, &shader
->variants
, list
)
750 ilo_shader_destroy_kernel(sh
);
752 FREE((struct tgsi_token
*) shader
->info
.tokens
);
757 * Return the type (PIPE_SHADER_x) of the shader.
760 ilo_shader_get_type(const struct ilo_shader_state
*shader
)
762 return shader
->info
.type
;
766 * Select a kernel for the given context. This will compile a new kernel if
767 * none of the existing kernels work with the context.
769 * \param ilo the context
770 * \param dirty states of the context that are considered changed
771 * \return true if a different kernel is selected
774 ilo_shader_select_kernel(struct ilo_shader_state
*shader
,
775 const struct ilo_context
*ilo
,
778 const struct ilo_shader
* const cur
= shader
->shader
;
779 struct ilo_shader_variant variant
;
781 if (!(shader
->info
.non_orthogonal_states
& dirty
))
784 ilo_shader_variant_init(&variant
, &shader
->info
, ilo
);
785 ilo_shader_state_use_variant(shader
, &variant
);
787 return (shader
->shader
!= cur
);
791 route_attr(const int *semantics
, const int *indices
, int len
,
792 int semantic
, int index
)
796 for (i
= 0; i
< len
; i
++) {
797 if (semantics
[i
] == semantic
&& indices
[i
] == index
)
801 /* failed to match for COLOR, try BCOLOR */
802 if (semantic
== TGSI_SEMANTIC_COLOR
) {
803 for (i
= 0; i
< len
; i
++) {
804 if (semantics
[i
] == TGSI_SEMANTIC_BCOLOR
&& indices
[i
] == index
)
813 * Select a routing for the given source shader and rasterizer state.
815 * \return true if a different routing is selected
818 ilo_shader_select_kernel_routing(struct ilo_shader_state
*shader
,
819 const struct ilo_shader_state
*source
,
820 const struct ilo_rasterizer_state
*rasterizer
)
822 const uint32_t sprite_coord_enable
= rasterizer
->state
.sprite_coord_enable
;
823 const bool light_twoside
= rasterizer
->state
.light_twoside
;
824 struct ilo_shader
*kernel
= shader
->shader
;
825 struct ilo_kernel_routing
*routing
= &kernel
->routing
;
826 const int *src_semantics
, *src_indices
;
827 int src_len
, max_src_slot
;
828 int dst_len
, dst_slot
;
830 /* we are constructing 3DSTATE_SBE here */
831 assert(ilo_dev_gen(shader
->info
.dev
) >= ILO_GEN(6) &&
832 ilo_dev_gen(shader
->info
.dev
) <= ILO_GEN(7.5));
837 assert(source
->shader
);
838 src_semantics
= source
->shader
->out
.semantic_names
;
839 src_indices
= source
->shader
->out
.semantic_indices
;
840 src_len
= source
->shader
->out
.count
;
843 src_semantics
= kernel
->in
.semantic_names
;
844 src_indices
= kernel
->in
.semantic_indices
;
845 src_len
= kernel
->in
.count
;
849 if (kernel
->routing_initialized
&&
850 routing
->source_skip
+ routing
->source_len
<= src_len
&&
851 kernel
->routing_sprite_coord_enable
== sprite_coord_enable
&&
852 !memcmp(kernel
->routing_src_semantics
,
853 &src_semantics
[routing
->source_skip
],
854 sizeof(kernel
->routing_src_semantics
[0]) * routing
->source_len
) &&
855 !memcmp(kernel
->routing_src_indices
,
856 &src_indices
[routing
->source_skip
],
857 sizeof(kernel
->routing_src_indices
[0]) * routing
->source_len
))
861 /* skip PSIZE and POSITION (how about the optional CLIPDISTs?) */
862 assert(src_semantics
[0] == TGSI_SEMANTIC_PSIZE
);
863 assert(src_semantics
[1] == TGSI_SEMANTIC_POSITION
);
864 routing
->source_skip
= 2;
866 routing
->source_len
= src_len
- routing
->source_skip
;
867 src_semantics
+= routing
->source_skip
;
868 src_indices
+= routing
->source_skip
;
871 routing
->source_skip
= 0;
872 routing
->source_len
= src_len
;
875 routing
->const_interp_enable
= kernel
->in
.const_interp_enable
;
876 routing
->point_sprite_enable
= 0;
877 routing
->swizzle_enable
= false;
879 assert(kernel
->in
.count
<= Elements(routing
->swizzles
));
880 dst_len
= MIN2(kernel
->in
.count
, Elements(routing
->swizzles
));
883 for (dst_slot
= 0; dst_slot
< dst_len
; dst_slot
++) {
884 const int semantic
= kernel
->in
.semantic_names
[dst_slot
];
885 const int index
= kernel
->in
.semantic_indices
[dst_slot
];
888 if (semantic
== TGSI_SEMANTIC_GENERIC
&&
889 (sprite_coord_enable
& (1 << index
)))
890 routing
->point_sprite_enable
|= 1 << dst_slot
;
893 src_slot
= route_attr(src_semantics
, src_indices
,
894 routing
->source_len
, semantic
, index
);
897 * The source shader stage does not output this attribute. The value
898 * is supposed to be undefined, unless the attribute goes through
899 * point sprite replacement or the attribute is
900 * TGSI_SEMANTIC_POSITION. In all cases, we do not care which source
901 * attribute is picked.
903 * We should update the kernel code and omit the output of
904 * TGSI_SEMANTIC_POSITION here.
913 routing
->swizzles
[dst_slot
] = src_slot
;
915 /* use the following slot for two-sided lighting */
916 if (semantic
== TGSI_SEMANTIC_COLOR
&& light_twoside
&&
917 src_slot
+ 1 < routing
->source_len
&&
918 src_semantics
[src_slot
+ 1] == TGSI_SEMANTIC_BCOLOR
&&
919 src_indices
[src_slot
+ 1] == index
) {
920 routing
->swizzles
[dst_slot
] |= GEN7_SBE_ATTR_INPUTATTR_FACING
;
924 if (routing
->swizzles
[dst_slot
] != dst_slot
)
925 routing
->swizzle_enable
= true;
927 if (max_src_slot
< src_slot
)
928 max_src_slot
= src_slot
;
931 memset(&routing
->swizzles
[dst_slot
], 0, sizeof(routing
->swizzles
) -
932 sizeof(routing
->swizzles
[0]) * dst_slot
);
935 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
937 * "It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
938 * 0 indicating no Vertex URB data to be read.
940 * This field should be set to the minimum length required to read the
941 * maximum source attribute. The maximum source attribute is indicated
942 * by the maximum value of the enabled Attribute # Source Attribute if
943 * Attribute Swizzle Enable is set, Number of Output Attributes-1 if
946 * read_length = ceiling((max_source_attr+1)/2)
948 * [errata] Corruption/Hang possible if length programmed larger than
951 routing
->source_len
= max_src_slot
+ 1;
953 /* remember the states of the source */
954 kernel
->routing_initialized
= true;
955 kernel
->routing_sprite_coord_enable
= sprite_coord_enable
;
956 memcpy(kernel
->routing_src_semantics
, src_semantics
,
957 sizeof(kernel
->routing_src_semantics
[0]) * routing
->source_len
);
958 memcpy(kernel
->routing_src_indices
, src_indices
,
959 sizeof(kernel
->routing_src_indices
[0]) * routing
->source_len
);
965 * Return the cache offset of the selected kernel. This must be called after
966 * ilo_shader_select_kernel() and ilo_shader_cache_upload().
969 ilo_shader_get_kernel_offset(const struct ilo_shader_state
*shader
)
971 const struct ilo_shader
*kernel
= shader
->shader
;
973 assert(kernel
&& kernel
->uploaded
);
975 return kernel
->cache_offset
;
979 * Query a kernel parameter for the selected kernel.
982 ilo_shader_get_kernel_param(const struct ilo_shader_state
*shader
,
983 enum ilo_kernel_param param
)
985 const struct ilo_shader
*kernel
= shader
->shader
;
991 case ILO_KERNEL_INPUT_COUNT
:
992 val
= kernel
->in
.count
;
994 case ILO_KERNEL_OUTPUT_COUNT
:
995 val
= kernel
->out
.count
;
997 case ILO_KERNEL_URB_DATA_START_REG
:
998 val
= kernel
->in
.start_grf
;
1000 case ILO_KERNEL_SKIP_CBUF0_UPLOAD
:
1001 val
= kernel
->skip_cbuf0_upload
;
1003 case ILO_KERNEL_PCB_CBUF0_SIZE
:
1004 val
= kernel
->pcb
.cbuf0_size
;
1007 case ILO_KERNEL_VS_INPUT_INSTANCEID
:
1008 val
= shader
->info
.has_instanceid
;
1010 case ILO_KERNEL_VS_INPUT_VERTEXID
:
1011 val
= shader
->info
.has_vertexid
;
1013 case ILO_KERNEL_VS_INPUT_EDGEFLAG
:
1014 if (shader
->info
.edgeflag_in
>= 0) {
1015 /* we rely on the state tracker here */
1016 assert(shader
->info
.edgeflag_in
== kernel
->in
.count
- 1);
1023 case ILO_KERNEL_VS_PCB_UCP_SIZE
:
1024 val
= kernel
->pcb
.clip_state_size
;
1026 case ILO_KERNEL_VS_GEN6_SO
:
1027 val
= kernel
->stream_output
;
1029 case ILO_KERNEL_VS_GEN6_SO_START_REG
:
1030 val
= kernel
->gs_start_grf
;
1032 case ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET
:
1033 val
= kernel
->gs_offsets
[0];
1035 case ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET
:
1036 val
= kernel
->gs_offsets
[1];
1038 case ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET
:
1039 val
= kernel
->gs_offsets
[2];
1042 case ILO_KERNEL_GS_DISCARD_ADJACENCY
:
1043 val
= kernel
->in
.discard_adj
;
1045 case ILO_KERNEL_GS_GEN6_SVBI_POST_INC
:
1046 val
= kernel
->svbi_post_inc
;
1049 case ILO_KERNEL_FS_INPUT_Z
:
1050 case ILO_KERNEL_FS_INPUT_W
:
1051 val
= kernel
->in
.has_pos
;
1053 case ILO_KERNEL_FS_OUTPUT_Z
:
1054 val
= kernel
->out
.has_pos
;
1056 case ILO_KERNEL_FS_USE_KILL
:
1057 val
= kernel
->has_kill
;
1059 case ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS
:
1060 val
= kernel
->in
.barycentric_interpolation_mode
;
1062 case ILO_KERNEL_FS_DISPATCH_16_OFFSET
:
1067 assert(!"unknown kernel parameter");
1076 * Return the CSO of the selected kernel.
1078 const struct ilo_shader_cso
*
1079 ilo_shader_get_kernel_cso(const struct ilo_shader_state
*shader
)
1081 const struct ilo_shader
*kernel
= shader
->shader
;
1085 return &kernel
->cso
;
1089 * Return the SO info of the selected kernel.
1091 const struct pipe_stream_output_info
*
1092 ilo_shader_get_kernel_so_info(const struct ilo_shader_state
*shader
)
1094 const struct ilo_shader
*kernel
= shader
->shader
;
1098 return &kernel
->so_info
;
1102 * Return the routing info of the selected kernel.
1104 const struct ilo_kernel_routing
*
1105 ilo_shader_get_kernel_routing(const struct ilo_shader_state
*shader
)
1107 const struct ilo_shader
*kernel
= shader
->shader
;
1111 return &kernel
->routing
;