2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "genhw/genhw.h" /* for SBE setup */
29 #include "core/ilo_builder.h"
30 #include "core/intel_winsys.h"
31 #include "shader/ilo_shader_internal.h"
32 #include "tgsi/tgsi_parse.h"
34 #include "ilo_state.h"
35 #include "ilo_shader.h"
37 struct ilo_shader_cache
{
38 struct list_head shaders
;
39 struct list_head changed
;
43 * Create a shader cache. A shader cache can manage shaders and upload them
46 struct ilo_shader_cache
*
47 ilo_shader_cache_create(void)
49 struct ilo_shader_cache
*shc
;
51 shc
= CALLOC_STRUCT(ilo_shader_cache
);
55 list_inithead(&shc
->shaders
);
56 list_inithead(&shc
->changed
);
62 * Destroy a shader cache.
65 ilo_shader_cache_destroy(struct ilo_shader_cache
*shc
)
71 * Add a shader to the cache.
74 ilo_shader_cache_add(struct ilo_shader_cache
*shc
,
75 struct ilo_shader_state
*shader
)
77 struct ilo_shader
*sh
;
80 LIST_FOR_EACH_ENTRY(sh
, &shader
->variants
, list
)
83 list_add(&shader
->list
, &shc
->changed
);
87 * Remove a shader from the cache.
90 ilo_shader_cache_remove(struct ilo_shader_cache
*shc
,
91 struct ilo_shader_state
*shader
)
93 list_del(&shader
->list
);
98 * Notify the cache that a managed shader has changed.
101 ilo_shader_cache_notify_change(struct ilo_shader_cache
*shc
,
102 struct ilo_shader_state
*shader
)
104 if (shader
->cache
== shc
) {
105 list_del(&shader
->list
);
106 list_add(&shader
->list
, &shc
->changed
);
111 * Upload managed shaders to the bo. Only shaders that are changed or added
112 * after the last upload are uploaded.
115 ilo_shader_cache_upload(struct ilo_shader_cache
*shc
,
116 struct ilo_builder
*builder
)
118 struct ilo_shader_state
*shader
, *next
;
120 LIST_FOR_EACH_ENTRY_SAFE(shader
, next
, &shc
->changed
, list
) {
121 struct ilo_shader
*sh
;
123 LIST_FOR_EACH_ENTRY(sh
, &shader
->variants
, list
) {
127 sh
->cache_offset
= ilo_builder_instruction_write(builder
,
128 sh
->kernel_size
, sh
->kernel
);
133 list_del(&shader
->list
);
134 list_add(&shader
->list
, &shc
->shaders
);
139 * Invalidate all shaders so that they get uploaded in next
140 * ilo_shader_cache_upload().
143 ilo_shader_cache_invalidate(struct ilo_shader_cache
*shc
)
145 struct ilo_shader_state
*shader
, *next
;
147 LIST_FOR_EACH_ENTRY_SAFE(shader
, next
, &shc
->shaders
, list
) {
148 list_del(&shader
->list
);
149 list_add(&shader
->list
, &shc
->changed
);
152 LIST_FOR_EACH_ENTRY(shader
, &shc
->changed
, list
) {
153 struct ilo_shader
*sh
;
155 LIST_FOR_EACH_ENTRY(sh
, &shader
->variants
, list
)
156 sh
->uploaded
= false;
161 * Initialize a shader variant.
164 ilo_shader_variant_init(struct ilo_shader_variant
*variant
,
165 const struct ilo_shader_info
*info
,
166 const struct ilo_state_vector
*vec
)
170 memset(variant
, 0, sizeof(*variant
));
172 switch (info
->type
) {
173 case PIPE_SHADER_VERTEX
:
174 variant
->u
.vs
.rasterizer_discard
=
175 vec
->rasterizer
->state
.rasterizer_discard
;
176 variant
->u
.vs
.num_ucps
=
177 util_last_bit(vec
->rasterizer
->state
.clip_plane_enable
);
179 case PIPE_SHADER_GEOMETRY
:
180 variant
->u
.gs
.rasterizer_discard
=
181 vec
->rasterizer
->state
.rasterizer_discard
;
182 variant
->u
.gs
.num_inputs
= vec
->vs
->shader
->out
.count
;
183 for (i
= 0; i
< vec
->vs
->shader
->out
.count
; i
++) {
184 variant
->u
.gs
.semantic_names
[i
] =
185 vec
->vs
->shader
->out
.semantic_names
[i
];
186 variant
->u
.gs
.semantic_indices
[i
] =
187 vec
->vs
->shader
->out
.semantic_indices
[i
];
190 case PIPE_SHADER_FRAGMENT
:
191 variant
->u
.fs
.flatshade
=
192 (info
->has_color_interp
&& vec
->rasterizer
->state
.flatshade
);
193 variant
->u
.fs
.fb_height
= (info
->has_pos
) ?
194 vec
->fb
.state
.height
: 1;
195 variant
->u
.fs
.num_cbufs
= vec
->fb
.state
.nr_cbufs
;
198 assert(!"unknown shader type");
202 /* use PCB unless constant buffer 0 is not in user buffer */
203 if ((vec
->cbuf
[info
->type
].enabled_mask
& 0x1) &&
204 !vec
->cbuf
[info
->type
].cso
[0].user_buffer
)
205 variant
->use_pcb
= false;
207 variant
->use_pcb
= true;
209 num_views
= vec
->view
[info
->type
].count
;
210 assert(info
->num_samplers
<= num_views
);
212 variant
->num_sampler_views
= info
->num_samplers
;
213 for (i
= 0; i
< info
->num_samplers
; i
++) {
214 const struct pipe_sampler_view
*view
= vec
->view
[info
->type
].states
[i
];
215 const struct ilo_sampler_cso
*sampler
= vec
->sampler
[info
->type
].cso
[i
];
218 variant
->sampler_view_swizzles
[i
].r
= view
->swizzle_r
;
219 variant
->sampler_view_swizzles
[i
].g
= view
->swizzle_g
;
220 variant
->sampler_view_swizzles
[i
].b
= view
->swizzle_b
;
221 variant
->sampler_view_swizzles
[i
].a
= view
->swizzle_a
;
223 else if (info
->shadow_samplers
& (1 << i
)) {
224 variant
->sampler_view_swizzles
[i
].r
= PIPE_SWIZZLE_RED
;
225 variant
->sampler_view_swizzles
[i
].g
= PIPE_SWIZZLE_RED
;
226 variant
->sampler_view_swizzles
[i
].b
= PIPE_SWIZZLE_RED
;
227 variant
->sampler_view_swizzles
[i
].a
= PIPE_SWIZZLE_ONE
;
230 variant
->sampler_view_swizzles
[i
].r
= PIPE_SWIZZLE_RED
;
231 variant
->sampler_view_swizzles
[i
].g
= PIPE_SWIZZLE_GREEN
;
232 variant
->sampler_view_swizzles
[i
].b
= PIPE_SWIZZLE_BLUE
;
233 variant
->sampler_view_swizzles
[i
].a
= PIPE_SWIZZLE_ALPHA
;
237 * When non-nearest filter and PIPE_TEX_WRAP_CLAMP wrap mode is used,
238 * the HW wrap mode is set to GEN6_TEXCOORDMODE_CLAMP_BORDER, and we
239 * need to manually saturate the texture coordinates.
242 variant
->saturate_tex_coords
[0] |= sampler
->saturate_s
<< i
;
243 variant
->saturate_tex_coords
[1] |= sampler
->saturate_t
<< i
;
244 variant
->saturate_tex_coords
[2] |= sampler
->saturate_r
<< i
;
250 * Guess the shader variant, knowing that the context may still change.
253 ilo_shader_variant_guess(struct ilo_shader_variant
*variant
,
254 const struct ilo_shader_info
*info
,
255 const struct ilo_state_vector
*vec
)
259 memset(variant
, 0, sizeof(*variant
));
261 switch (info
->type
) {
262 case PIPE_SHADER_VERTEX
:
264 case PIPE_SHADER_GEOMETRY
:
266 case PIPE_SHADER_FRAGMENT
:
267 variant
->u
.fs
.flatshade
= false;
268 variant
->u
.fs
.fb_height
= (info
->has_pos
) ?
269 vec
->fb
.state
.height
: 1;
270 variant
->u
.fs
.num_cbufs
= 1;
273 assert(!"unknown shader type");
277 variant
->use_pcb
= true;
279 variant
->num_sampler_views
= info
->num_samplers
;
280 for (i
= 0; i
< info
->num_samplers
; i
++) {
281 if (info
->shadow_samplers
& (1 << i
)) {
282 variant
->sampler_view_swizzles
[i
].r
= PIPE_SWIZZLE_RED
;
283 variant
->sampler_view_swizzles
[i
].g
= PIPE_SWIZZLE_RED
;
284 variant
->sampler_view_swizzles
[i
].b
= PIPE_SWIZZLE_RED
;
285 variant
->sampler_view_swizzles
[i
].a
= PIPE_SWIZZLE_ONE
;
288 variant
->sampler_view_swizzles
[i
].r
= PIPE_SWIZZLE_RED
;
289 variant
->sampler_view_swizzles
[i
].g
= PIPE_SWIZZLE_GREEN
;
290 variant
->sampler_view_swizzles
[i
].b
= PIPE_SWIZZLE_BLUE
;
291 variant
->sampler_view_swizzles
[i
].a
= PIPE_SWIZZLE_ALPHA
;
298 * Parse a TGSI instruction for the shader info.
301 ilo_shader_info_parse_inst(struct ilo_shader_info
*info
,
302 const struct tgsi_full_instruction
*inst
)
306 /* look for edgeflag passthrough */
307 if (info
->edgeflag_out
>= 0 &&
308 inst
->Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
309 inst
->Dst
[0].Register
.File
== TGSI_FILE_OUTPUT
&&
310 inst
->Dst
[0].Register
.Index
== info
->edgeflag_out
) {
312 assert(inst
->Src
[0].Register
.File
== TGSI_FILE_INPUT
);
313 info
->edgeflag_in
= inst
->Src
[0].Register
.Index
;
316 if (inst
->Instruction
.Texture
) {
319 switch (inst
->Texture
.Texture
) {
320 case TGSI_TEXTURE_SHADOW1D
:
321 case TGSI_TEXTURE_SHADOW2D
:
322 case TGSI_TEXTURE_SHADOWRECT
:
323 case TGSI_TEXTURE_SHADOW1D_ARRAY
:
324 case TGSI_TEXTURE_SHADOW2D_ARRAY
:
325 case TGSI_TEXTURE_SHADOWCUBE
:
326 case TGSI_TEXTURE_SHADOWCUBE_ARRAY
:
334 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
335 const struct tgsi_full_src_register
*src
= &inst
->Src
[i
];
337 if (src
->Register
.File
== TGSI_FILE_SAMPLER
) {
338 const int idx
= src
->Register
.Index
;
340 if (idx
>= info
->num_samplers
)
341 info
->num_samplers
= idx
+ 1;
344 info
->shadow_samplers
|= 1 << idx
;
351 * Parse a TGSI property for the shader info.
354 ilo_shader_info_parse_prop(struct ilo_shader_info
*info
,
355 const struct tgsi_full_property
*prop
)
357 switch (prop
->Property
.PropertyName
) {
358 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS
:
359 info
->fs_color0_writes_all_cbufs
= prop
->u
[0].Data
;
367 * Parse a TGSI declaration for the shader info.
370 ilo_shader_info_parse_decl(struct ilo_shader_info
*info
,
371 const struct tgsi_full_declaration
*decl
)
373 switch (decl
->Declaration
.File
) {
374 case TGSI_FILE_INPUT
:
375 if (decl
->Declaration
.Interpolate
&&
376 decl
->Interp
.Interpolate
== TGSI_INTERPOLATE_COLOR
)
377 info
->has_color_interp
= true;
378 if (decl
->Declaration
.Semantic
&&
379 decl
->Semantic
.Name
== TGSI_SEMANTIC_POSITION
)
380 info
->has_pos
= true;
382 case TGSI_FILE_OUTPUT
:
383 if (decl
->Declaration
.Semantic
&&
384 decl
->Semantic
.Name
== TGSI_SEMANTIC_EDGEFLAG
)
385 info
->edgeflag_out
= decl
->Range
.First
;
387 case TGSI_FILE_CONSTANT
:
389 const int idx
= (decl
->Declaration
.Dimension
) ?
390 decl
->Dim
.Index2D
: 0;
391 if (info
->constant_buffer_count
<= idx
)
392 info
->constant_buffer_count
= idx
+ 1;
395 case TGSI_FILE_SYSTEM_VALUE
:
396 if (decl
->Declaration
.Semantic
&&
397 decl
->Semantic
.Name
== TGSI_SEMANTIC_INSTANCEID
)
398 info
->has_instanceid
= true;
399 if (decl
->Declaration
.Semantic
&&
400 decl
->Semantic
.Name
== TGSI_SEMANTIC_VERTEXID
)
401 info
->has_vertexid
= true;
409 ilo_shader_info_parse_tokens(struct ilo_shader_info
*info
)
411 struct tgsi_parse_context parse
;
413 info
->edgeflag_in
= -1;
414 info
->edgeflag_out
= -1;
416 tgsi_parse_init(&parse
, info
->tokens
);
417 while (!tgsi_parse_end_of_tokens(&parse
)) {
418 const union tgsi_full_token
*token
;
420 tgsi_parse_token(&parse
);
421 token
= &parse
.FullToken
;
423 switch (token
->Token
.Type
) {
424 case TGSI_TOKEN_TYPE_DECLARATION
:
425 ilo_shader_info_parse_decl(info
, &token
->FullDeclaration
);
427 case TGSI_TOKEN_TYPE_INSTRUCTION
:
428 ilo_shader_info_parse_inst(info
, &token
->FullInstruction
);
430 case TGSI_TOKEN_TYPE_PROPERTY
:
431 ilo_shader_info_parse_prop(info
, &token
->FullProperty
);
437 tgsi_parse_free(&parse
);
441 * Create a shader state.
443 static struct ilo_shader_state
*
444 ilo_shader_state_create(const struct ilo_dev
*dev
,
445 const struct ilo_state_vector
*vec
,
446 int type
, const void *templ
)
448 struct ilo_shader_state
*state
;
449 struct ilo_shader_variant variant
;
451 state
= CALLOC_STRUCT(ilo_shader_state
);
455 state
->info
.dev
= dev
;
456 state
->info
.type
= type
;
458 if (type
== PIPE_SHADER_COMPUTE
) {
459 const struct pipe_compute_state
*c
=
460 (const struct pipe_compute_state
*) templ
;
462 state
->info
.tokens
= tgsi_dup_tokens(c
->prog
);
463 state
->info
.compute
.req_local_mem
= c
->req_local_mem
;
464 state
->info
.compute
.req_private_mem
= c
->req_private_mem
;
465 state
->info
.compute
.req_input_mem
= c
->req_input_mem
;
468 const struct pipe_shader_state
*s
=
469 (const struct pipe_shader_state
*) templ
;
471 state
->info
.tokens
= tgsi_dup_tokens(s
->tokens
);
472 state
->info
.stream_output
= s
->stream_output
;
475 list_inithead(&state
->variants
);
477 ilo_shader_info_parse_tokens(&state
->info
);
479 /* guess and compile now */
480 ilo_shader_variant_guess(&variant
, &state
->info
, vec
);
481 if (!ilo_shader_state_use_variant(state
, &variant
)) {
482 ilo_shader_destroy(state
);
490 * Add a compiled shader to the shader state.
493 ilo_shader_state_add_shader(struct ilo_shader_state
*state
,
494 struct ilo_shader
*sh
)
496 list_add(&sh
->list
, &state
->variants
);
497 state
->num_variants
++;
498 state
->total_size
+= sh
->kernel_size
;
501 ilo_shader_cache_notify_change(state
->cache
, state
);
505 * Remove a compiled shader from the shader state.
508 ilo_shader_state_remove_shader(struct ilo_shader_state
*state
,
509 struct ilo_shader
*sh
)
512 state
->num_variants
--;
513 state
->total_size
-= sh
->kernel_size
;
517 * Garbage collect shader variants in the shader state.
520 ilo_shader_state_gc(struct ilo_shader_state
*state
)
522 /* activate when the variants take up more than 4KiB of space */
523 const int limit
= 4 * 1024;
524 struct ilo_shader
*sh
, *next
;
526 if (state
->total_size
< limit
)
529 /* remove from the tail as the most recently ones are at the head */
530 LIST_FOR_EACH_ENTRY_SAFE_REV(sh
, next
, &state
->variants
, list
) {
531 ilo_shader_state_remove_shader(state
, sh
);
532 ilo_shader_destroy_kernel(sh
);
534 if (state
->total_size
<= limit
/ 2)
540 * Search for a shader variant.
542 static struct ilo_shader
*
543 ilo_shader_state_search_variant(struct ilo_shader_state
*state
,
544 const struct ilo_shader_variant
*variant
)
546 struct ilo_shader
*sh
= NULL
, *tmp
;
548 LIST_FOR_EACH_ENTRY(tmp
, &state
->variants
, list
) {
549 if (memcmp(&tmp
->variant
, variant
, sizeof(*variant
)) == 0) {
559 init_shader_urb(const struct ilo_shader
*kernel
,
560 const struct ilo_shader_state
*state
,
561 struct ilo_state_shader_urb_info
*urb
)
563 urb
->cv_input_attr_count
= kernel
->in
.count
;
565 urb
->read_count
= kernel
->in
.count
;
567 urb
->output_attr_count
= kernel
->out
.count
;
568 urb
->user_cull_enables
= 0x0;
569 urb
->user_clip_enables
= 0x0;
573 init_shader_kernel(const struct ilo_shader
*kernel
,
574 const struct ilo_shader_state
*state
,
575 struct ilo_state_shader_kernel_info
*kern
)
578 kern
->grf_start
= kernel
->in
.start_grf
;
579 kern
->pcb_attr_count
=
580 (kernel
->pcb
.cbuf0_size
+ kernel
->pcb
.clip_state_size
+ 15) / 16;
581 kern
->scratch_size
= 0;
585 init_shader_resource(const struct ilo_shader
*kernel
,
586 const struct ilo_shader_state
*state
,
587 struct ilo_state_shader_resource_info
*resource
)
589 resource
->sampler_count
= state
->info
.num_samplers
;
590 resource
->surface_count
= 0;
591 resource
->has_uav
= false;
595 init_vs(struct ilo_shader
*kernel
,
596 const struct ilo_shader_state
*state
)
598 struct ilo_state_vs_info info
;
600 memset(&info
, 0, sizeof(info
));
602 init_shader_urb(kernel
, state
, &info
.urb
);
603 init_shader_kernel(kernel
, state
, &info
.kernel
);
604 init_shader_resource(kernel
, state
, &info
.resource
);
605 info
.dispatch_enable
= true;
606 info
.stats_enable
= true;
608 if (ilo_dev_gen(state
->info
.dev
) == ILO_GEN(6) && kernel
->stream_output
) {
609 struct ilo_state_gs_info gs_info
;
611 memset(&gs_info
, 0, sizeof(gs_info
));
613 gs_info
.urb
.cv_input_attr_count
= kernel
->out
.count
;
614 gs_info
.urb
.read_count
= kernel
->out
.count
;
615 gs_info
.kernel
.grf_start
= kernel
->gs_start_grf
;
616 gs_info
.sol
.sol_enable
= true;
617 gs_info
.sol
.stats_enable
= true;
618 gs_info
.sol
.render_disable
= kernel
->variant
.u
.vs
.rasterizer_discard
;
619 gs_info
.sol
.svbi_post_inc
= kernel
->svbi_post_inc
;
620 gs_info
.sol
.tristrip_reorder
= GEN7_REORDER_LEADING
;
621 gs_info
.dispatch_enable
= true;
622 gs_info
.stats_enable
= true;
624 ilo_state_vs_init(&kernel
->cso
.vs_sol
.vs
, state
->info
.dev
, &info
);
625 ilo_state_gs_init(&kernel
->cso
.vs_sol
.sol
, state
->info
.dev
, &gs_info
);
627 ilo_state_vs_init(&kernel
->cso
.vs
, state
->info
.dev
, &info
);
632 init_gs(struct ilo_shader
*kernel
,
633 const struct ilo_shader_state
*state
)
635 const struct pipe_stream_output_info
*so_info
= &state
->info
.stream_output
;
636 struct ilo_state_gs_info info
;
638 memset(&info
, 0, sizeof(info
));
640 init_shader_urb(kernel
, state
, &info
.urb
);
641 init_shader_kernel(kernel
, state
, &info
.kernel
);
642 init_shader_resource(kernel
, state
, &info
.resource
);
643 info
.dispatch_enable
= true;
644 info
.stats_enable
= true;
646 if (so_info
->num_outputs
> 0) {
647 info
.sol
.sol_enable
= true;
648 info
.sol
.stats_enable
= true;
649 info
.sol
.render_disable
= kernel
->variant
.u
.gs
.rasterizer_discard
;
650 info
.sol
.tristrip_reorder
= GEN7_REORDER_LEADING
;
653 ilo_state_gs_init(&kernel
->cso
.gs
, state
->info
.dev
, &info
);
657 init_ps(struct ilo_shader
*kernel
,
658 const struct ilo_shader_state
*state
)
660 struct ilo_state_ps_info info
;
662 memset(&info
, 0, sizeof(info
));
664 init_shader_kernel(kernel
, state
, &info
.kernel_8
);
665 init_shader_resource(kernel
, state
, &info
.resource
);
667 info
.io
.has_rt_write
= true;
668 info
.io
.posoffset
= GEN6_POSOFFSET_NONE
;
669 info
.io
.attr_count
= kernel
->in
.count
;
670 info
.io
.use_z
= kernel
->in
.has_pos
;
671 info
.io
.use_w
= kernel
->in
.has_pos
;
672 info
.io
.use_coverage_mask
= false;
673 info
.io
.pscdepth
= (kernel
->out
.has_pos
) ?
674 GEN7_PSCDEPTH_ON
: GEN7_PSCDEPTH_OFF
;
675 info
.io
.write_pixel_mask
= kernel
->has_kill
;
676 info
.io
.write_omask
= false;
678 info
.params
.sample_mask
= 0x1;
679 info
.params
.earlyz_control_psexec
= false;
680 info
.params
.alpha_may_kill
= false;
681 info
.params
.dual_source_blending
= false;
682 info
.params
.has_writeable_rt
= true;
684 info
.valid_kernels
= GEN6_PS_DISPATCH_8
;
687 * From the Sandy Bridge PRM, volume 2 part 1, page 284:
689 * "(MSDISPMODE_PERSAMPLE) This is the high-quality multisample mode
690 * where (over and above PERPIXEL mode) the PS is run for each covered
691 * sample. This mode is also used for "normal" non-multisample
692 * rendering (aka 1X), given Number of Multisamples is programmed to
695 info
.per_sample_dispatch
= true;
697 info
.rt_clear_enable
= false;
698 info
.rt_resolve_enable
= false;
699 info
.cv_per_sample_interp
= false;
700 info
.cv_has_earlyz_op
= false;
701 info
.sample_count_one
= true;
702 info
.cv_has_depth_buffer
= true;
704 ilo_state_ps_init(&kernel
->cso
.ps
, state
->info
.dev
, &info
);
706 /* remember current parameters */
707 kernel
->ps_params
= info
.params
;
711 init_sol(struct ilo_shader
*kernel
,
712 const struct ilo_dev
*dev
,
713 const struct pipe_stream_output_info
*so_info
,
714 bool rasterizer_discard
)
716 struct ilo_state_sol_decl_info decls
[4][PIPE_MAX_SO_OUTPUTS
];
717 unsigned buf_offsets
[PIPE_MAX_SO_BUFFERS
];
718 struct ilo_state_sol_info info
;
721 if (!so_info
->num_outputs
) {
722 ilo_state_sol_init_disabled(&kernel
->sol
, dev
, rasterizer_discard
);
726 memset(&info
, 0, sizeof(info
));
727 info
.data
= kernel
->sol_data
;
728 info
.data_size
= sizeof(kernel
->sol_data
);
729 info
.sol_enable
= true;
730 info
.stats_enable
= true;
731 info
.tristrip_reorder
= GEN7_REORDER_TRAILING
;
732 info
.render_disable
= rasterizer_discard
;
733 info
.render_stream
= 0;
735 for (i
= 0; i
< 4; i
++) {
736 info
.buffer_strides
[i
] = so_info
->stride
[i
] * 4;
738 info
.streams
[i
].cv_vue_attr_count
= kernel
->out
.count
;
739 info
.streams
[i
].decls
= decls
[i
];
742 memset(decls
, 0, sizeof(decls
));
743 memset(buf_offsets
, 0, sizeof(buf_offsets
));
744 for (i
= 0; i
< so_info
->num_outputs
; i
++) {
745 const unsigned stream
= so_info
->output
[i
].stream
;
746 const unsigned buffer
= so_info
->output
[i
].output_buffer
;
747 struct ilo_state_sol_decl_info
*decl
;
750 /* figure out which attribute is sourced */
751 for (attr
= 0; attr
< kernel
->out
.count
; attr
++) {
752 const int reg_idx
= kernel
->out
.register_indices
[attr
];
753 if (reg_idx
== so_info
->output
[i
].register_index
)
756 if (attr
>= kernel
->out
.count
) {
757 assert(!"stream output an undefined register");
761 if (info
.streams
[stream
].vue_read_count
< attr
+ 1)
762 info
.streams
[stream
].vue_read_count
= attr
+ 1;
764 /* pad with holes first */
765 while (buf_offsets
[buffer
] < so_info
->output
[i
].dst_offset
) {
768 num_dwords
= so_info
->output
[i
].dst_offset
- buf_offsets
[buffer
];
772 assert(info
.streams
[stream
].decl_count
< ARRAY_SIZE(decls
[stream
]));
773 decl
= &decls
[stream
][info
.streams
[stream
].decl_count
];
776 decl
->is_hole
= true;
777 decl
->component_base
= 0;
778 decl
->component_count
= num_dwords
;
779 decl
->buffer
= buffer
;
781 info
.streams
[stream
].decl_count
++;
782 buf_offsets
[buffer
] += num_dwords
;
784 assert(buf_offsets
[buffer
] == so_info
->output
[i
].dst_offset
);
786 assert(info
.streams
[stream
].decl_count
< ARRAY_SIZE(decls
[stream
]));
787 decl
= &decls
[stream
][info
.streams
[stream
].decl_count
];
790 decl
->is_hole
= false;
791 /* PSIZE is at W channel */
792 if (kernel
->out
.semantic_names
[attr
] == TGSI_SEMANTIC_PSIZE
) {
793 assert(so_info
->output
[i
].start_component
== 0);
794 assert(so_info
->output
[i
].num_components
== 1);
795 decl
->component_base
= 3;
796 decl
->component_count
= 1;
798 decl
->component_base
= so_info
->output
[i
].start_component
;
799 decl
->component_count
= so_info
->output
[i
].num_components
;
801 decl
->buffer
= buffer
;
803 info
.streams
[stream
].decl_count
++;
804 buf_offsets
[buffer
] += so_info
->output
[i
].num_components
;
807 ilo_state_sol_init(&kernel
->sol
, dev
, &info
);
811 * Add a shader variant to the shader state.
813 static struct ilo_shader
*
814 ilo_shader_state_add_variant(struct ilo_shader_state
*state
,
815 const struct ilo_shader_variant
*variant
)
817 bool rasterizer_discard
= false;
818 struct ilo_shader
*sh
;
820 switch (state
->info
.type
) {
821 case PIPE_SHADER_VERTEX
:
822 sh
= ilo_shader_compile_vs(state
, variant
);
823 rasterizer_discard
= variant
->u
.vs
.rasterizer_discard
;
825 case PIPE_SHADER_FRAGMENT
:
826 sh
= ilo_shader_compile_fs(state
, variant
);
828 case PIPE_SHADER_GEOMETRY
:
829 sh
= ilo_shader_compile_gs(state
, variant
);
830 rasterizer_discard
= variant
->u
.gs
.rasterizer_discard
;
832 case PIPE_SHADER_COMPUTE
:
833 sh
= ilo_shader_compile_cs(state
, variant
);
840 assert(!"failed to compile shader");
844 sh
->variant
= *variant
;
846 init_sol(sh
, state
->info
.dev
, &state
->info
.stream_output
,
849 ilo_shader_state_add_shader(state
, sh
);
855 * Update state->shader to point to a variant. If the variant does not exist,
856 * it will be added first.
859 ilo_shader_state_use_variant(struct ilo_shader_state
*state
,
860 const struct ilo_shader_variant
*variant
)
862 struct ilo_shader
*sh
;
863 bool construct_cso
= false;
865 sh
= ilo_shader_state_search_variant(state
, variant
);
867 ilo_shader_state_gc(state
);
869 sh
= ilo_shader_state_add_variant(state
, variant
);
873 construct_cso
= true;
877 if (state
->variants
.next
!= &sh
->list
) {
879 list_add(&sh
->list
, &state
->variants
);
885 switch (state
->info
.type
) {
886 case PIPE_SHADER_VERTEX
:
889 case PIPE_SHADER_GEOMETRY
:
892 case PIPE_SHADER_FRAGMENT
:
903 struct ilo_shader_state
*
904 ilo_shader_create_vs(const struct ilo_dev
*dev
,
905 const struct pipe_shader_state
*state
,
906 const struct ilo_state_vector
*precompile
)
908 struct ilo_shader_state
*shader
;
910 shader
= ilo_shader_state_create(dev
, precompile
,
911 PIPE_SHADER_VERTEX
, state
);
913 /* states used in ilo_shader_variant_init() */
914 shader
->info
.non_orthogonal_states
= ILO_DIRTY_VIEW_VS
|
915 ILO_DIRTY_RASTERIZER
|
921 struct ilo_shader_state
*
922 ilo_shader_create_gs(const struct ilo_dev
*dev
,
923 const struct pipe_shader_state
*state
,
924 const struct ilo_state_vector
*precompile
)
926 struct ilo_shader_state
*shader
;
928 shader
= ilo_shader_state_create(dev
, precompile
,
929 PIPE_SHADER_GEOMETRY
, state
);
931 /* states used in ilo_shader_variant_init() */
932 shader
->info
.non_orthogonal_states
= ILO_DIRTY_VIEW_GS
|
934 ILO_DIRTY_RASTERIZER
|
940 struct ilo_shader_state
*
941 ilo_shader_create_fs(const struct ilo_dev
*dev
,
942 const struct pipe_shader_state
*state
,
943 const struct ilo_state_vector
*precompile
)
945 struct ilo_shader_state
*shader
;
947 shader
= ilo_shader_state_create(dev
, precompile
,
948 PIPE_SHADER_FRAGMENT
, state
);
950 /* states used in ilo_shader_variant_init() */
951 shader
->info
.non_orthogonal_states
= ILO_DIRTY_VIEW_FS
|
952 ILO_DIRTY_RASTERIZER
|
959 struct ilo_shader_state
*
960 ilo_shader_create_cs(const struct ilo_dev
*dev
,
961 const struct pipe_compute_state
*state
,
962 const struct ilo_state_vector
*precompile
)
964 struct ilo_shader_state
*shader
;
966 shader
= ilo_shader_state_create(dev
, precompile
,
967 PIPE_SHADER_COMPUTE
, state
);
969 shader
->info
.non_orthogonal_states
= 0;
975 * Destroy a shader state.
978 ilo_shader_destroy(struct ilo_shader_state
*shader
)
980 struct ilo_shader
*sh
, *next
;
982 LIST_FOR_EACH_ENTRY_SAFE(sh
, next
, &shader
->variants
, list
)
983 ilo_shader_destroy_kernel(sh
);
985 FREE((struct tgsi_token
*) shader
->info
.tokens
);
990 * Return the type (PIPE_SHADER_x) of the shader.
993 ilo_shader_get_type(const struct ilo_shader_state
*shader
)
995 return shader
->info
.type
;
999 * Select a kernel for the given context. This will compile a new kernel if
1000 * none of the existing kernels work with the context.
1002 * \param ilo the context
1003 * \param dirty states of the context that are considered changed
1004 * \return true if a different kernel is selected
1007 ilo_shader_select_kernel(struct ilo_shader_state
*shader
,
1008 const struct ilo_state_vector
*vec
,
1011 struct ilo_shader_variant variant
;
1012 bool changed
= false;
1014 if (shader
->info
.non_orthogonal_states
& dirty
) {
1015 const struct ilo_shader
* const old
= shader
->shader
;
1017 ilo_shader_variant_init(&variant
, &shader
->info
, vec
);
1018 ilo_shader_state_use_variant(shader
, &variant
);
1019 changed
= (shader
->shader
!= old
);
1022 if (shader
->info
.type
== PIPE_SHADER_FRAGMENT
) {
1023 struct ilo_shader
*kernel
= shader
->shader
;
1025 if (kernel
->ps_params
.sample_mask
!= vec
->sample_mask
||
1026 kernel
->ps_params
.alpha_may_kill
!= vec
->blend
->alpha_may_kill
) {
1027 kernel
->ps_params
.sample_mask
= vec
->sample_mask
;
1028 kernel
->ps_params
.alpha_may_kill
= vec
->blend
->alpha_may_kill
;
1030 ilo_state_ps_set_params(&kernel
->cso
.ps
, shader
->info
.dev
,
1031 &kernel
->ps_params
);
1041 route_attr(const int *semantics
, const int *indices
, int len
,
1042 int semantic
, int index
)
1046 for (i
= 0; i
< len
; i
++) {
1047 if (semantics
[i
] == semantic
&& indices
[i
] == index
)
1051 /* failed to match for COLOR, try BCOLOR */
1052 if (semantic
== TGSI_SEMANTIC_COLOR
) {
1053 for (i
= 0; i
< len
; i
++) {
1054 if (semantics
[i
] == TGSI_SEMANTIC_BCOLOR
&& indices
[i
] == index
)
1063 * Select a routing for the given source shader and rasterizer state.
1065 * \return true if a different routing is selected
1068 ilo_shader_select_kernel_sbe(struct ilo_shader_state
*shader
,
1069 const struct ilo_shader_state
*source
,
1070 const struct ilo_rasterizer_state
*rasterizer
)
1072 const bool is_point
= true;
1073 const bool light_twoside
= rasterizer
->state
.light_twoside
;
1074 const uint32_t sprite_coord_enable
= rasterizer
->state
.sprite_coord_enable
;
1075 const int sprite_coord_mode
= rasterizer
->state
.sprite_coord_mode
;
1076 struct ilo_shader
*kernel
= shader
->shader
;
1077 struct ilo_kernel_routing
*routing
= &kernel
->routing
;
1078 struct ilo_state_sbe_swizzle_info swizzles
[ILO_STATE_SBE_MAX_SWIZZLE_COUNT
];
1079 struct ilo_state_sbe_info info
;
1080 const int *src_semantics
, *src_indices
;
1081 int src_skip
, src_len
, src_slot
;
1082 int dst_len
, dst_slot
;
1087 assert(source
->shader
);
1089 src_semantics
= source
->shader
->out
.semantic_names
;
1090 src_indices
= source
->shader
->out
.semantic_indices
;
1091 src_len
= source
->shader
->out
.count
;
1093 assert(src_len
>= 2 &&
1094 src_semantics
[0] == TGSI_SEMANTIC_PSIZE
&&
1095 src_semantics
[1] == TGSI_SEMANTIC_POSITION
);
1098 * skip PSIZE and POSITION (how about the optional CLIPDISTs?), unless
1099 * they are all the source shader has and FS needs to read some
1102 if (src_len
> 2 || !kernel
->in
.count
) {
1109 src_semantics
= kernel
->in
.semantic_names
;
1110 src_indices
= kernel
->in
.semantic_indices
;
1111 src_len
= kernel
->in
.count
;
1116 if (routing
->initialized
&&
1117 routing
->is_point
== is_point
&&
1118 routing
->light_twoside
== light_twoside
&&
1119 routing
->sprite_coord_enable
== sprite_coord_enable
&&
1120 routing
->sprite_coord_mode
== sprite_coord_mode
&&
1121 routing
->src_len
<= src_len
&&
1122 !memcmp(routing
->src_semantics
, src_semantics
,
1123 sizeof(src_semantics
[0]) * routing
->src_len
) &&
1124 !memcmp(routing
->src_indices
, src_indices
,
1125 sizeof(src_indices
[0]) * routing
->src_len
))
1128 routing
->is_point
= is_point
;
1129 routing
->light_twoside
= light_twoside
;
1130 routing
->sprite_coord_enable
= sprite_coord_enable
;
1131 routing
->sprite_coord_mode
= sprite_coord_mode
;
1133 assert(kernel
->in
.count
<= Elements(swizzles
));
1134 dst_len
= MIN2(kernel
->in
.count
, Elements(swizzles
));
1136 memset(&swizzles
, 0, sizeof(swizzles
));
1137 memset(&info
, 0, sizeof(info
));
1139 info
.attr_count
= dst_len
;
1140 info
.cv_vue_attr_count
= src_skip
+ src_len
;
1141 info
.vue_read_base
= src_skip
;
1142 info
.vue_read_count
= 0;
1143 info
.has_min_read_count
= true;
1144 info
.swizzle_enable
= false;
1145 info
.swizzle_16_31
= false;
1146 info
.swizzle_count
= 0;
1147 info
.swizzles
= swizzles
;
1148 info
.const_interp_enables
= kernel
->in
.const_interp_enable
;
1149 info
.point_sprite_enables
= 0x0;
1150 info
.point_sprite_origin_lower_left
=
1151 (sprite_coord_mode
== PIPE_SPRITE_COORD_LOWER_LEFT
);
1152 info
.cv_is_point
= is_point
;
1154 for (dst_slot
= 0; dst_slot
< dst_len
; dst_slot
++) {
1155 const int semantic
= kernel
->in
.semantic_names
[dst_slot
];
1156 const int index
= kernel
->in
.semantic_indices
[dst_slot
];
1158 if (semantic
== TGSI_SEMANTIC_GENERIC
&&
1159 (sprite_coord_enable
& (1 << index
)))
1160 info
.point_sprite_enables
|= 1 << dst_slot
;
1163 src_slot
= route_attr(src_semantics
, src_indices
, src_len
,
1167 * The source shader stage does not output this attribute. The value
1168 * is supposed to be undefined, unless the attribute goes through
1169 * point sprite replacement or the attribute is
1170 * TGSI_SEMANTIC_POSITION. In all cases, we do not care which source
1171 * attribute is picked.
1173 * We should update the kernel code and omit the output of
1174 * TGSI_SEMANTIC_POSITION here.
1179 src_slot
= dst_slot
;
1182 /* use the following slot for two-sided lighting */
1183 if (semantic
== TGSI_SEMANTIC_COLOR
&& light_twoside
&&
1184 src_slot
+ 1 < src_len
&&
1185 src_semantics
[src_slot
+ 1] == TGSI_SEMANTIC_BCOLOR
&&
1186 src_indices
[src_slot
+ 1] == index
) {
1187 swizzles
[dst_slot
].attr_select
= GEN6_INPUTATTR_FACING
;
1188 swizzles
[dst_slot
].attr
= src_slot
;
1189 info
.swizzle_enable
= true;
1192 swizzles
[dst_slot
].attr_select
= GEN6_INPUTATTR_NORMAL
;
1193 swizzles
[dst_slot
].attr
= src_slot
;
1194 if (src_slot
!= dst_slot
)
1195 info
.swizzle_enable
= true;
1198 swizzles
[dst_slot
].force_zeros
= false;
1200 if (info
.vue_read_count
< src_slot
+ 1)
1201 info
.vue_read_count
= src_slot
+ 1;
1204 if (info
.swizzle_enable
)
1205 info
.swizzle_count
= dst_len
;
1207 if (routing
->initialized
)
1208 ilo_state_sbe_set_info(&routing
->sbe
, shader
->info
.dev
, &info
);
1210 ilo_state_sbe_init(&routing
->sbe
, shader
->info
.dev
, &info
);
1212 routing
->src_len
= info
.vue_read_count
;
1213 memcpy(routing
->src_semantics
, src_semantics
,
1214 sizeof(src_semantics
[0]) * routing
->src_len
);
1215 memcpy(routing
->src_indices
, src_indices
,
1216 sizeof(src_indices
[0]) * routing
->src_len
);
1218 routing
->initialized
= true;
1224 * Return the cache offset of the selected kernel. This must be called after
1225 * ilo_shader_select_kernel() and ilo_shader_cache_upload().
1228 ilo_shader_get_kernel_offset(const struct ilo_shader_state
*shader
)
1230 const struct ilo_shader
*kernel
= shader
->shader
;
1232 assert(kernel
&& kernel
->uploaded
);
1234 return kernel
->cache_offset
;
1238 * Query a kernel parameter for the selected kernel.
1241 ilo_shader_get_kernel_param(const struct ilo_shader_state
*shader
,
1242 enum ilo_kernel_param param
)
1244 const struct ilo_shader
*kernel
= shader
->shader
;
1250 case ILO_KERNEL_INPUT_COUNT
:
1251 val
= kernel
->in
.count
;
1253 case ILO_KERNEL_OUTPUT_COUNT
:
1254 val
= kernel
->out
.count
;
1256 case ILO_KERNEL_SAMPLER_COUNT
:
1257 val
= shader
->info
.num_samplers
;
1259 case ILO_KERNEL_URB_DATA_START_REG
:
1260 val
= kernel
->in
.start_grf
;
1262 case ILO_KERNEL_SKIP_CBUF0_UPLOAD
:
1263 val
= kernel
->skip_cbuf0_upload
;
1265 case ILO_KERNEL_PCB_CBUF0_SIZE
:
1266 val
= kernel
->pcb
.cbuf0_size
;
1269 case ILO_KERNEL_SURFACE_TOTAL_COUNT
:
1270 val
= kernel
->bt
.total_count
;
1272 case ILO_KERNEL_SURFACE_TEX_BASE
:
1273 val
= kernel
->bt
.tex_base
;
1275 case ILO_KERNEL_SURFACE_TEX_COUNT
:
1276 val
= kernel
->bt
.tex_count
;
1278 case ILO_KERNEL_SURFACE_CONST_BASE
:
1279 val
= kernel
->bt
.const_base
;
1281 case ILO_KERNEL_SURFACE_CONST_COUNT
:
1282 val
= kernel
->bt
.const_count
;
1284 case ILO_KERNEL_SURFACE_RES_BASE
:
1285 val
= kernel
->bt
.res_base
;
1287 case ILO_KERNEL_SURFACE_RES_COUNT
:
1288 val
= kernel
->bt
.res_count
;
1291 case ILO_KERNEL_VS_INPUT_INSTANCEID
:
1292 val
= shader
->info
.has_instanceid
;
1294 case ILO_KERNEL_VS_INPUT_VERTEXID
:
1295 val
= shader
->info
.has_vertexid
;
1297 case ILO_KERNEL_VS_INPUT_EDGEFLAG
:
1298 if (shader
->info
.edgeflag_in
>= 0) {
1299 /* we rely on the state tracker here */
1300 assert(shader
->info
.edgeflag_in
== kernel
->in
.count
- 1);
1307 case ILO_KERNEL_VS_PCB_UCP_SIZE
:
1308 val
= kernel
->pcb
.clip_state_size
;
1310 case ILO_KERNEL_VS_GEN6_SO
:
1311 val
= kernel
->stream_output
;
1313 case ILO_KERNEL_VS_GEN6_SO_START_REG
:
1314 val
= kernel
->gs_start_grf
;
1316 case ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET
:
1317 val
= kernel
->gs_offsets
[0];
1319 case ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET
:
1320 val
= kernel
->gs_offsets
[1];
1322 case ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET
:
1323 val
= kernel
->gs_offsets
[2];
1325 case ILO_KERNEL_VS_GEN6_SO_SURFACE_COUNT
:
1326 val
= kernel
->gs_bt_so_count
;
1329 case ILO_KERNEL_GS_DISCARD_ADJACENCY
:
1330 val
= kernel
->in
.discard_adj
;
1332 case ILO_KERNEL_GS_GEN6_SVBI_POST_INC
:
1333 val
= kernel
->svbi_post_inc
;
1335 case ILO_KERNEL_GS_GEN6_SURFACE_SO_BASE
:
1336 val
= kernel
->bt
.gen6_so_base
;
1338 case ILO_KERNEL_GS_GEN6_SURFACE_SO_COUNT
:
1339 val
= kernel
->bt
.gen6_so_count
;
1342 case ILO_KERNEL_FS_INPUT_Z
:
1343 case ILO_KERNEL_FS_INPUT_W
:
1344 val
= kernel
->in
.has_pos
;
1346 case ILO_KERNEL_FS_OUTPUT_Z
:
1347 val
= kernel
->out
.has_pos
;
1349 case ILO_KERNEL_FS_USE_KILL
:
1350 val
= kernel
->has_kill
;
1352 case ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS
:
1353 val
= kernel
->in
.barycentric_interpolation_mode
;
1355 case ILO_KERNEL_FS_DISPATCH_16_OFFSET
:
1358 case ILO_KERNEL_FS_SURFACE_RT_BASE
:
1359 val
= kernel
->bt
.rt_base
;
1361 case ILO_KERNEL_FS_SURFACE_RT_COUNT
:
1362 val
= kernel
->bt
.rt_count
;
1365 case ILO_KERNEL_CS_LOCAL_SIZE
:
1366 val
= shader
->info
.compute
.req_local_mem
;
1368 case ILO_KERNEL_CS_PRIVATE_SIZE
:
1369 val
= shader
->info
.compute
.req_private_mem
;
1371 case ILO_KERNEL_CS_INPUT_SIZE
:
1372 val
= shader
->info
.compute
.req_input_mem
;
1374 case ILO_KERNEL_CS_SIMD_SIZE
:
1377 case ILO_KERNEL_CS_SURFACE_GLOBAL_BASE
:
1378 val
= kernel
->bt
.global_base
;
1380 case ILO_KERNEL_CS_SURFACE_GLOBAL_COUNT
:
1381 val
= kernel
->bt
.global_count
;
1385 assert(!"unknown kernel parameter");
1394 * Return the CSO of the selected kernel.
1396 const union ilo_shader_cso
*
1397 ilo_shader_get_kernel_cso(const struct ilo_shader_state
*shader
)
1399 const struct ilo_shader
*kernel
= shader
->shader
;
1403 return &kernel
->cso
;
1407 * Return the SO info of the selected kernel.
1409 const struct pipe_stream_output_info
*
1410 ilo_shader_get_kernel_so_info(const struct ilo_shader_state
*shader
)
1412 return &shader
->info
.stream_output
;
1415 const struct ilo_state_sol
*
1416 ilo_shader_get_kernel_sol(const struct ilo_shader_state
*shader
)
1418 const struct ilo_shader
*kernel
= shader
->shader
;
1422 return &kernel
->sol
;
1426 * Return the routing info of the selected kernel.
1428 const struct ilo_state_sbe
*
1429 ilo_shader_get_kernel_sbe(const struct ilo_shader_state
*shader
)
1431 const struct ilo_shader
*kernel
= shader
->shader
;
1435 return &kernel
->routing
.sbe
;