2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "tgsi/tgsi_parse.h"
29 #include "intel_winsys.h"
31 #include "ilo_shader.h"
33 struct ilo_shader_cache
{
34 struct list_head shaders
;
35 struct list_head changed
;
39 * Create a shader cache. A shader cache can manage shaders and upload them
42 struct ilo_shader_cache
*
43 ilo_shader_cache_create(void)
45 struct ilo_shader_cache
*shc
;
47 shc
= CALLOC_STRUCT(ilo_shader_cache
);
51 list_inithead(&shc
->shaders
);
52 list_inithead(&shc
->changed
);
58 * Destroy a shader cache.
61 ilo_shader_cache_destroy(struct ilo_shader_cache
*shc
)
67 * Add a shader to the cache.
70 ilo_shader_cache_add(struct ilo_shader_cache
*shc
,
71 struct ilo_shader_state
*shader
)
73 struct ilo_shader
*sh
;
76 LIST_FOR_EACH_ENTRY(sh
, &shader
->variants
, list
)
79 list_add(&shader
->list
, &shc
->changed
);
83 * Remove a shader from the cache.
86 ilo_shader_cache_remove(struct ilo_shader_cache
*shc
,
87 struct ilo_shader_state
*shader
)
89 list_del(&shader
->list
);
94 * Notify the cache that a managed shader has changed.
97 ilo_shader_cache_notify_change(struct ilo_shader_cache
*shc
,
98 struct ilo_shader_state
*shader
)
100 if (shader
->cache
== shc
) {
101 list_del(&shader
->list
);
102 list_add(&shader
->list
, &shc
->changed
);
107 * Upload a managed shader to the bo.
110 ilo_shader_cache_upload_shader(struct ilo_shader_cache
*shc
,
111 struct ilo_shader_state
*shader
,
112 struct intel_bo
*bo
, unsigned offset
,
115 const unsigned base
= offset
;
116 struct ilo_shader
*sh
;
118 LIST_FOR_EACH_ENTRY(sh
, &shader
->variants
, list
) {
121 if (incremental
&& sh
->uploaded
)
124 /* kernels must be aligned to 64-byte */
125 offset
= align(offset
, 64);
127 err
= intel_bo_pwrite(bo
, offset
, sh
->kernel_size
, sh
->kernel
);
132 sh
->cache_offset
= offset
;
134 offset
+= sh
->kernel_size
;
137 return (int) (offset
- base
);
141 * Similar to ilo_shader_cache_upload(), except no upload happens.
144 ilo_shader_cache_get_upload_size(struct ilo_shader_cache
*shc
,
148 const unsigned base
= offset
;
149 struct ilo_shader_state
*shader
;
152 LIST_FOR_EACH_ENTRY(shader
, &shc
->shaders
, list
) {
153 struct ilo_shader
*sh
;
155 /* see ilo_shader_cache_upload_shader() */
156 LIST_FOR_EACH_ENTRY(sh
, &shader
->variants
, list
) {
157 if (!incremental
|| !sh
->uploaded
)
158 offset
= align(offset
, 64) + sh
->kernel_size
;
163 LIST_FOR_EACH_ENTRY(shader
, &shc
->changed
, list
) {
164 struct ilo_shader
*sh
;
166 /* see ilo_shader_cache_upload_shader() */
167 LIST_FOR_EACH_ENTRY(sh
, &shader
->variants
, list
) {
168 if (!incremental
|| !sh
->uploaded
)
169 offset
= align(offset
, 64) + sh
->kernel_size
;
174 * From the Sandy Bridge PRM, volume 4 part 2, page 112:
176 * "Due to prefetch of the instruction stream, the EUs may attempt to
177 * access up to 8 instructions (128 bytes) beyond the end of the
178 * kernel program - possibly into the next memory page. Although
179 * these instructions will not be executed, software must account for
180 * the prefetch in order to avoid invalid page access faults."
185 return (int) (offset
- base
);
189 * Upload managed shaders to the bo. When incremental is true, only shaders
190 * that are changed or added after the last upload are uploaded.
193 ilo_shader_cache_upload(struct ilo_shader_cache
*shc
,
194 struct intel_bo
*bo
, unsigned offset
,
197 struct ilo_shader_state
*shader
, *next
;
201 return ilo_shader_cache_get_upload_size(shc
, offset
, incremental
);
204 LIST_FOR_EACH_ENTRY(shader
, &shc
->shaders
, list
) {
205 s
= ilo_shader_cache_upload_shader(shc
, shader
,
206 bo
, offset
, incremental
);
215 LIST_FOR_EACH_ENTRY_SAFE(shader
, next
, &shc
->changed
, list
) {
216 s
= ilo_shader_cache_upload_shader(shc
, shader
,
217 bo
, offset
, incremental
);
224 list_del(&shader
->list
);
225 list_add(&shader
->list
, &shc
->shaders
);
232 * Initialize a shader variant.
235 ilo_shader_variant_init(struct ilo_shader_variant
*variant
,
236 const struct ilo_shader_info
*info
,
237 const struct ilo_context
*ilo
)
241 memset(variant
, 0, sizeof(*variant
));
243 switch (info
->type
) {
244 case PIPE_SHADER_VERTEX
:
245 variant
->u
.vs
.rasterizer_discard
=
246 ilo
->rasterizer
->state
.rasterizer_discard
;
247 variant
->u
.vs
.num_ucps
=
248 util_last_bit(ilo
->rasterizer
->state
.clip_plane_enable
);
250 case PIPE_SHADER_GEOMETRY
:
251 variant
->u
.gs
.rasterizer_discard
=
252 ilo
->rasterizer
->state
.rasterizer_discard
;
253 variant
->u
.gs
.num_inputs
= ilo
->vs
->shader
->out
.count
;
254 for (i
= 0; i
< ilo
->vs
->shader
->out
.count
; i
++) {
255 variant
->u
.gs
.semantic_names
[i
] =
256 ilo
->vs
->shader
->out
.semantic_names
[i
];
257 variant
->u
.gs
.semantic_indices
[i
] =
258 ilo
->vs
->shader
->out
.semantic_indices
[i
];
261 case PIPE_SHADER_FRAGMENT
:
262 variant
->u
.fs
.flatshade
=
263 (info
->has_color_interp
&& ilo
->rasterizer
->state
.flatshade
);
264 variant
->u
.fs
.fb_height
= (info
->has_pos
) ?
265 ilo
->fb
.state
.height
: 1;
266 variant
->u
.fs
.num_cbufs
= ilo
->fb
.state
.nr_cbufs
;
269 assert(!"unknown shader type");
273 num_views
= ilo
->view
[info
->type
].count
;
274 assert(info
->num_samplers
<= num_views
);
276 variant
->num_sampler_views
= info
->num_samplers
;
277 for (i
= 0; i
< info
->num_samplers
; i
++) {
278 const struct pipe_sampler_view
*view
= ilo
->view
[info
->type
].states
[i
];
279 const struct ilo_sampler_cso
*sampler
= ilo
->sampler
[info
->type
].cso
[i
];
282 variant
->sampler_view_swizzles
[i
].r
= view
->swizzle_r
;
283 variant
->sampler_view_swizzles
[i
].g
= view
->swizzle_g
;
284 variant
->sampler_view_swizzles
[i
].b
= view
->swizzle_b
;
285 variant
->sampler_view_swizzles
[i
].a
= view
->swizzle_a
;
287 else if (info
->shadow_samplers
& (1 << i
)) {
288 variant
->sampler_view_swizzles
[i
].r
= PIPE_SWIZZLE_RED
;
289 variant
->sampler_view_swizzles
[i
].g
= PIPE_SWIZZLE_RED
;
290 variant
->sampler_view_swizzles
[i
].b
= PIPE_SWIZZLE_RED
;
291 variant
->sampler_view_swizzles
[i
].a
= PIPE_SWIZZLE_ONE
;
294 variant
->sampler_view_swizzles
[i
].r
= PIPE_SWIZZLE_RED
;
295 variant
->sampler_view_swizzles
[i
].g
= PIPE_SWIZZLE_GREEN
;
296 variant
->sampler_view_swizzles
[i
].b
= PIPE_SWIZZLE_BLUE
;
297 variant
->sampler_view_swizzles
[i
].a
= PIPE_SWIZZLE_ALPHA
;
301 * When non-nearest filter and PIPE_TEX_WRAP_CLAMP wrap mode is used,
302 * the HW wrap mode is set to BRW_TEXCOORDMODE_CLAMP_BORDER, and we need
303 * to manually saturate the texture coordinates.
306 variant
->saturate_tex_coords
[0] |= sampler
->saturate_s
<< i
;
307 variant
->saturate_tex_coords
[1] |= sampler
->saturate_t
<< i
;
308 variant
->saturate_tex_coords
[2] |= sampler
->saturate_r
<< i
;
314 * Guess the shader variant, knowing that the context may still change.
317 ilo_shader_variant_guess(struct ilo_shader_variant
*variant
,
318 const struct ilo_shader_info
*info
,
319 const struct ilo_context
*ilo
)
323 memset(variant
, 0, sizeof(*variant
));
325 switch (info
->type
) {
326 case PIPE_SHADER_VERTEX
:
328 case PIPE_SHADER_GEOMETRY
:
330 case PIPE_SHADER_FRAGMENT
:
331 variant
->u
.fs
.flatshade
= false;
332 variant
->u
.fs
.fb_height
= (info
->has_pos
) ?
333 ilo
->fb
.state
.height
: 1;
334 variant
->u
.fs
.num_cbufs
= 1;
337 assert(!"unknown shader type");
341 variant
->num_sampler_views
= info
->num_samplers
;
342 for (i
= 0; i
< info
->num_samplers
; i
++) {
343 if (info
->shadow_samplers
& (1 << i
)) {
344 variant
->sampler_view_swizzles
[i
].r
= PIPE_SWIZZLE_RED
;
345 variant
->sampler_view_swizzles
[i
].g
= PIPE_SWIZZLE_RED
;
346 variant
->sampler_view_swizzles
[i
].b
= PIPE_SWIZZLE_RED
;
347 variant
->sampler_view_swizzles
[i
].a
= PIPE_SWIZZLE_ONE
;
350 variant
->sampler_view_swizzles
[i
].r
= PIPE_SWIZZLE_RED
;
351 variant
->sampler_view_swizzles
[i
].g
= PIPE_SWIZZLE_GREEN
;
352 variant
->sampler_view_swizzles
[i
].b
= PIPE_SWIZZLE_BLUE
;
353 variant
->sampler_view_swizzles
[i
].a
= PIPE_SWIZZLE_ALPHA
;
360 * Parse a TGSI instruction for the shader info.
363 ilo_shader_info_parse_inst(struct ilo_shader_info
*info
,
364 const struct tgsi_full_instruction
*inst
)
368 /* look for edgeflag passthrough */
369 if (info
->edgeflag_out
>= 0 &&
370 inst
->Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
371 inst
->Dst
[0].Register
.File
== TGSI_FILE_OUTPUT
&&
372 inst
->Dst
[0].Register
.Index
== info
->edgeflag_out
) {
374 assert(inst
->Src
[0].Register
.File
== TGSI_FILE_INPUT
);
375 info
->edgeflag_in
= inst
->Src
[0].Register
.Index
;
378 if (inst
->Instruction
.Texture
) {
381 switch (inst
->Texture
.Texture
) {
382 case TGSI_TEXTURE_SHADOW1D
:
383 case TGSI_TEXTURE_SHADOW2D
:
384 case TGSI_TEXTURE_SHADOWRECT
:
385 case TGSI_TEXTURE_SHADOW1D_ARRAY
:
386 case TGSI_TEXTURE_SHADOW2D_ARRAY
:
387 case TGSI_TEXTURE_SHADOWCUBE
:
388 case TGSI_TEXTURE_SHADOWCUBE_ARRAY
:
396 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
397 const struct tgsi_full_src_register
*src
= &inst
->Src
[i
];
399 if (src
->Register
.File
== TGSI_FILE_SAMPLER
) {
400 const int idx
= src
->Register
.Index
;
402 if (idx
>= info
->num_samplers
)
403 info
->num_samplers
= idx
+ 1;
406 info
->shadow_samplers
|= 1 << idx
;
413 * Parse a TGSI property for the shader info.
416 ilo_shader_info_parse_prop(struct ilo_shader_info
*info
,
417 const struct tgsi_full_property
*prop
)
419 switch (prop
->Property
.PropertyName
) {
420 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS
:
421 info
->fs_color0_writes_all_cbufs
= prop
->u
[0].Data
;
429 * Parse a TGSI declaration for the shader info.
432 ilo_shader_info_parse_decl(struct ilo_shader_info
*info
,
433 const struct tgsi_full_declaration
*decl
)
435 switch (decl
->Declaration
.File
) {
436 case TGSI_FILE_INPUT
:
437 if (decl
->Declaration
.Interpolate
&&
438 decl
->Interp
.Interpolate
== TGSI_INTERPOLATE_COLOR
)
439 info
->has_color_interp
= true;
440 if (decl
->Declaration
.Semantic
&&
441 decl
->Semantic
.Name
== TGSI_SEMANTIC_POSITION
)
442 info
->has_pos
= true;
444 case TGSI_FILE_OUTPUT
:
445 if (decl
->Declaration
.Semantic
&&
446 decl
->Semantic
.Name
== TGSI_SEMANTIC_EDGEFLAG
)
447 info
->edgeflag_out
= decl
->Range
.First
;
449 case TGSI_FILE_SYSTEM_VALUE
:
450 if (decl
->Declaration
.Semantic
&&
451 decl
->Semantic
.Name
== TGSI_SEMANTIC_INSTANCEID
)
452 info
->has_instanceid
= true;
453 if (decl
->Declaration
.Semantic
&&
454 decl
->Semantic
.Name
== TGSI_SEMANTIC_VERTEXID
)
455 info
->has_vertexid
= true;
463 ilo_shader_info_parse_tokens(struct ilo_shader_info
*info
)
465 struct tgsi_parse_context parse
;
467 info
->edgeflag_in
= -1;
468 info
->edgeflag_out
= -1;
470 tgsi_parse_init(&parse
, info
->tokens
);
471 while (!tgsi_parse_end_of_tokens(&parse
)) {
472 const union tgsi_full_token
*token
;
474 tgsi_parse_token(&parse
);
475 token
= &parse
.FullToken
;
477 switch (token
->Token
.Type
) {
478 case TGSI_TOKEN_TYPE_DECLARATION
:
479 ilo_shader_info_parse_decl(info
, &token
->FullDeclaration
);
481 case TGSI_TOKEN_TYPE_INSTRUCTION
:
482 ilo_shader_info_parse_inst(info
, &token
->FullInstruction
);
484 case TGSI_TOKEN_TYPE_PROPERTY
:
485 ilo_shader_info_parse_prop(info
, &token
->FullProperty
);
491 tgsi_parse_free(&parse
);
495 * Create a shader state.
497 struct ilo_shader_state
*
498 ilo_shader_state_create(const struct ilo_context
*ilo
,
499 int type
, const void *templ
)
501 struct ilo_shader_state
*state
;
502 struct ilo_shader_variant variant
;
504 state
= CALLOC_STRUCT(ilo_shader_state
);
508 state
->info
.dev
= ilo
->dev
;
509 state
->info
.type
= type
;
511 if (type
== PIPE_SHADER_COMPUTE
) {
512 const struct pipe_compute_state
*c
=
513 (const struct pipe_compute_state
*) templ
;
515 state
->info
.tokens
= tgsi_dup_tokens(c
->prog
);
516 state
->info
.compute
.req_local_mem
= c
->req_local_mem
;
517 state
->info
.compute
.req_private_mem
= c
->req_private_mem
;
518 state
->info
.compute
.req_input_mem
= c
->req_input_mem
;
521 const struct pipe_shader_state
*s
=
522 (const struct pipe_shader_state
*) templ
;
524 state
->info
.tokens
= tgsi_dup_tokens(s
->tokens
);
525 state
->info
.stream_output
= s
->stream_output
;
528 list_inithead(&state
->variants
);
530 ilo_shader_info_parse_tokens(&state
->info
);
532 /* guess and compile now */
533 ilo_shader_variant_guess(&variant
, &state
->info
, ilo
);
534 if (!ilo_shader_state_use_variant(state
, &variant
)) {
535 ilo_shader_state_destroy(state
);
543 * Destroy a shader state.
546 ilo_shader_state_destroy(struct ilo_shader_state
*state
)
548 struct ilo_shader
*sh
, *next
;
550 LIST_FOR_EACH_ENTRY_SAFE(sh
, next
, &state
->variants
, list
)
551 ilo_shader_destroy(sh
);
553 FREE((struct tgsi_token
*) state
->info
.tokens
);
558 * Add a compiled shader to the shader state.
561 ilo_shader_state_add_shader(struct ilo_shader_state
*state
,
562 struct ilo_shader
*sh
)
564 list_add(&sh
->list
, &state
->variants
);
565 state
->num_variants
++;
566 state
->total_size
+= sh
->kernel_size
;
569 ilo_shader_cache_notify_change(state
->cache
, state
);
573 * Remove a compiled shader from the shader state.
576 ilo_shader_state_remove_shader(struct ilo_shader_state
*state
,
577 struct ilo_shader
*sh
)
580 state
->num_variants
--;
581 state
->total_size
-= sh
->kernel_size
;
585 * Garbage collect shader variants in the shader state.
588 ilo_shader_state_gc(struct ilo_shader_state
*state
)
590 /* activate when the variants take up more than 4KiB of space */
591 const int limit
= 4 * 1024;
592 struct ilo_shader
*sh
, *next
;
594 if (state
->total_size
< limit
)
597 /* remove from the tail as the most recently ones are at the head */
598 LIST_FOR_EACH_ENTRY_SAFE_REV(sh
, next
, &state
->variants
, list
) {
599 ilo_shader_state_remove_shader(state
, sh
);
600 ilo_shader_destroy(sh
);
602 if (state
->total_size
<= limit
/ 2)
608 * Search for a shader variant.
610 static struct ilo_shader
*
611 ilo_shader_state_search_variant(struct ilo_shader_state
*state
,
612 const struct ilo_shader_variant
*variant
)
614 struct ilo_shader
*sh
= NULL
, *tmp
;
616 LIST_FOR_EACH_ENTRY(tmp
, &state
->variants
, list
) {
617 if (memcmp(&tmp
->variant
, variant
, sizeof(*variant
)) == 0) {
627 * Add a shader variant to the shader state.
630 ilo_shader_state_add_variant(struct ilo_shader_state
*state
,
631 const struct ilo_shader_variant
*variant
)
633 struct ilo_shader
*sh
;
635 sh
= ilo_shader_state_search_variant(state
, variant
);
639 ilo_shader_state_gc(state
);
641 switch (state
->info
.type
) {
642 case PIPE_SHADER_VERTEX
:
643 sh
= ilo_shader_compile_vs(state
, variant
);
645 case PIPE_SHADER_FRAGMENT
:
646 sh
= ilo_shader_compile_fs(state
, variant
);
648 case PIPE_SHADER_GEOMETRY
:
649 sh
= ilo_shader_compile_gs(state
, variant
);
651 case PIPE_SHADER_COMPUTE
:
652 sh
= ilo_shader_compile_cs(state
, variant
);
659 assert(!"failed to compile shader");
663 sh
->variant
= *variant
;
665 ilo_shader_state_add_shader(state
, sh
);
671 * Update state->shader to point to a variant. If the variant does not exist,
672 * it will be added first.
675 ilo_shader_state_use_variant(struct ilo_shader_state
*state
,
676 const struct ilo_shader_variant
*variant
)
678 struct ilo_shader
*sh
;
680 sh
= ilo_shader_state_add_variant(state
, variant
);
685 if (state
->variants
.next
!= &sh
->list
) {
687 list_add(&sh
->list
, &state
->variants
);