2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "tgsi/tgsi_parse.h"
29 #include "intel_winsys.h"
31 #include "ilo_shader.h"
34 * Initialize a shader variant.
37 ilo_shader_variant_init(struct ilo_shader_variant
*variant
,
38 const struct ilo_shader_info
*info
,
39 const struct ilo_context
*ilo
)
43 memset(variant
, 0, sizeof(*variant
));
46 case PIPE_SHADER_VERTEX
:
47 variant
->u
.vs
.rasterizer_discard
=
48 ilo
->rasterizer
->state
.rasterizer_discard
;
49 variant
->u
.vs
.num_ucps
=
50 util_last_bit(ilo
->rasterizer
->state
.clip_plane_enable
);
52 case PIPE_SHADER_GEOMETRY
:
53 variant
->u
.gs
.rasterizer_discard
=
54 ilo
->rasterizer
->state
.rasterizer_discard
;
55 variant
->u
.gs
.num_inputs
= ilo
->vs
->shader
->out
.count
;
56 for (i
= 0; i
< ilo
->vs
->shader
->out
.count
; i
++) {
57 variant
->u
.gs
.semantic_names
[i
] =
58 ilo
->vs
->shader
->out
.semantic_names
[i
];
59 variant
->u
.gs
.semantic_indices
[i
] =
60 ilo
->vs
->shader
->out
.semantic_indices
[i
];
63 case PIPE_SHADER_FRAGMENT
:
64 variant
->u
.fs
.flatshade
=
65 (info
->has_color_interp
&& ilo
->rasterizer
->state
.flatshade
);
66 variant
->u
.fs
.fb_height
= (info
->has_pos
) ?
67 ilo
->fb
.state
.height
: 1;
68 variant
->u
.fs
.num_cbufs
= ilo
->fb
.state
.nr_cbufs
;
71 assert(!"unknown shader type");
75 num_views
= ilo
->view
[info
->type
].count
;
76 assert(info
->num_samplers
<= num_views
);
78 variant
->num_sampler_views
= info
->num_samplers
;
79 for (i
= 0; i
< info
->num_samplers
; i
++) {
80 const struct pipe_sampler_view
*view
= ilo
->view
[info
->type
].states
[i
];
81 const struct ilo_sampler_cso
*sampler
= ilo
->sampler
[info
->type
].cso
[i
];
84 variant
->sampler_view_swizzles
[i
].r
= view
->swizzle_r
;
85 variant
->sampler_view_swizzles
[i
].g
= view
->swizzle_g
;
86 variant
->sampler_view_swizzles
[i
].b
= view
->swizzle_b
;
87 variant
->sampler_view_swizzles
[i
].a
= view
->swizzle_a
;
89 else if (info
->shadow_samplers
& (1 << i
)) {
90 variant
->sampler_view_swizzles
[i
].r
= PIPE_SWIZZLE_RED
;
91 variant
->sampler_view_swizzles
[i
].g
= PIPE_SWIZZLE_RED
;
92 variant
->sampler_view_swizzles
[i
].b
= PIPE_SWIZZLE_RED
;
93 variant
->sampler_view_swizzles
[i
].a
= PIPE_SWIZZLE_ONE
;
96 variant
->sampler_view_swizzles
[i
].r
= PIPE_SWIZZLE_RED
;
97 variant
->sampler_view_swizzles
[i
].g
= PIPE_SWIZZLE_GREEN
;
98 variant
->sampler_view_swizzles
[i
].b
= PIPE_SWIZZLE_BLUE
;
99 variant
->sampler_view_swizzles
[i
].a
= PIPE_SWIZZLE_ALPHA
;
103 * When non-nearest filter and PIPE_TEX_WRAP_CLAMP wrap mode is used,
104 * the HW wrap mode is set to BRW_TEXCOORDMODE_CLAMP_BORDER, and we need
105 * to manually saturate the texture coordinates.
108 variant
->saturate_tex_coords
[0] |= sampler
->saturate_s
<< i
;
109 variant
->saturate_tex_coords
[1] |= sampler
->saturate_t
<< i
;
110 variant
->saturate_tex_coords
[2] |= sampler
->saturate_r
<< i
;
116 * Guess the shader variant, knowing that the context may still change.
119 ilo_shader_variant_guess(struct ilo_shader_variant
*variant
,
120 const struct ilo_shader_info
*info
,
121 const struct ilo_context
*ilo
)
125 memset(variant
, 0, sizeof(*variant
));
127 switch (info
->type
) {
128 case PIPE_SHADER_VERTEX
:
130 case PIPE_SHADER_GEOMETRY
:
132 case PIPE_SHADER_FRAGMENT
:
133 variant
->u
.fs
.flatshade
= false;
134 variant
->u
.fs
.fb_height
= (info
->has_pos
) ?
135 ilo
->fb
.state
.height
: 1;
136 variant
->u
.fs
.num_cbufs
= 1;
139 assert(!"unknown shader type");
143 variant
->num_sampler_views
= info
->num_samplers
;
144 for (i
= 0; i
< info
->num_samplers
; i
++) {
145 if (info
->shadow_samplers
& (1 << i
)) {
146 variant
->sampler_view_swizzles
[i
].r
= PIPE_SWIZZLE_RED
;
147 variant
->sampler_view_swizzles
[i
].g
= PIPE_SWIZZLE_RED
;
148 variant
->sampler_view_swizzles
[i
].b
= PIPE_SWIZZLE_RED
;
149 variant
->sampler_view_swizzles
[i
].a
= PIPE_SWIZZLE_ONE
;
152 variant
->sampler_view_swizzles
[i
].r
= PIPE_SWIZZLE_RED
;
153 variant
->sampler_view_swizzles
[i
].g
= PIPE_SWIZZLE_GREEN
;
154 variant
->sampler_view_swizzles
[i
].b
= PIPE_SWIZZLE_BLUE
;
155 variant
->sampler_view_swizzles
[i
].a
= PIPE_SWIZZLE_ALPHA
;
162 * Parse a TGSI instruction for the shader info.
165 ilo_shader_info_parse_inst(struct ilo_shader_info
*info
,
166 const struct tgsi_full_instruction
*inst
)
170 /* look for edgeflag passthrough */
171 if (info
->edgeflag_out
>= 0 &&
172 inst
->Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
173 inst
->Dst
[0].Register
.File
== TGSI_FILE_OUTPUT
&&
174 inst
->Dst
[0].Register
.Index
== info
->edgeflag_out
) {
176 assert(inst
->Src
[0].Register
.File
== TGSI_FILE_INPUT
);
177 info
->edgeflag_in
= inst
->Src
[0].Register
.Index
;
180 if (inst
->Instruction
.Texture
) {
183 switch (inst
->Texture
.Texture
) {
184 case TGSI_TEXTURE_SHADOW1D
:
185 case TGSI_TEXTURE_SHADOW2D
:
186 case TGSI_TEXTURE_SHADOWRECT
:
187 case TGSI_TEXTURE_SHADOW1D_ARRAY
:
188 case TGSI_TEXTURE_SHADOW2D_ARRAY
:
189 case TGSI_TEXTURE_SHADOWCUBE
:
190 case TGSI_TEXTURE_SHADOWCUBE_ARRAY
:
198 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
199 const struct tgsi_full_src_register
*src
= &inst
->Src
[i
];
201 if (src
->Register
.File
== TGSI_FILE_SAMPLER
) {
202 const int idx
= src
->Register
.Index
;
204 if (idx
>= info
->num_samplers
)
205 info
->num_samplers
= idx
+ 1;
208 info
->shadow_samplers
|= 1 << idx
;
215 * Parse a TGSI property for the shader info.
218 ilo_shader_info_parse_prop(struct ilo_shader_info
*info
,
219 const struct tgsi_full_property
*prop
)
221 switch (prop
->Property
.PropertyName
) {
222 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS
:
223 info
->fs_color0_writes_all_cbufs
= prop
->u
[0].Data
;
231 * Parse a TGSI declaration for the shader info.
234 ilo_shader_info_parse_decl(struct ilo_shader_info
*info
,
235 const struct tgsi_full_declaration
*decl
)
237 switch (decl
->Declaration
.File
) {
238 case TGSI_FILE_INPUT
:
239 if (decl
->Declaration
.Interpolate
&&
240 decl
->Interp
.Interpolate
== TGSI_INTERPOLATE_COLOR
)
241 info
->has_color_interp
= true;
242 if (decl
->Declaration
.Semantic
&&
243 decl
->Semantic
.Name
== TGSI_SEMANTIC_POSITION
)
244 info
->has_pos
= true;
246 case TGSI_FILE_OUTPUT
:
247 if (decl
->Declaration
.Semantic
&&
248 decl
->Semantic
.Name
== TGSI_SEMANTIC_EDGEFLAG
)
249 info
->edgeflag_out
= decl
->Range
.First
;
251 case TGSI_FILE_SYSTEM_VALUE
:
252 if (decl
->Declaration
.Semantic
&&
253 decl
->Semantic
.Name
== TGSI_SEMANTIC_INSTANCEID
)
254 info
->has_instanceid
= true;
255 if (decl
->Declaration
.Semantic
&&
256 decl
->Semantic
.Name
== TGSI_SEMANTIC_VERTEXID
)
257 info
->has_vertexid
= true;
265 ilo_shader_info_parse_tokens(struct ilo_shader_info
*info
)
267 struct tgsi_parse_context parse
;
269 info
->edgeflag_in
= -1;
270 info
->edgeflag_out
= -1;
272 tgsi_parse_init(&parse
, info
->tokens
);
273 while (!tgsi_parse_end_of_tokens(&parse
)) {
274 const union tgsi_full_token
*token
;
276 tgsi_parse_token(&parse
);
277 token
= &parse
.FullToken
;
279 switch (token
->Token
.Type
) {
280 case TGSI_TOKEN_TYPE_DECLARATION
:
281 ilo_shader_info_parse_decl(info
, &token
->FullDeclaration
);
283 case TGSI_TOKEN_TYPE_INSTRUCTION
:
284 ilo_shader_info_parse_inst(info
, &token
->FullInstruction
);
286 case TGSI_TOKEN_TYPE_PROPERTY
:
287 ilo_shader_info_parse_prop(info
, &token
->FullProperty
);
293 tgsi_parse_free(&parse
);
297 * Create a shader state.
299 struct ilo_shader_state
*
300 ilo_shader_state_create(const struct ilo_context
*ilo
,
301 int type
, const void *templ
)
303 struct ilo_shader_state
*state
;
304 struct ilo_shader_variant variant
;
306 state
= CALLOC_STRUCT(ilo_shader_state
);
310 state
->info
.dev
= ilo
->dev
;
311 state
->info
.type
= type
;
313 if (type
== PIPE_SHADER_COMPUTE
) {
314 const struct pipe_compute_state
*c
=
315 (const struct pipe_compute_state
*) templ
;
317 state
->info
.tokens
= tgsi_dup_tokens(c
->prog
);
318 state
->info
.compute
.req_local_mem
= c
->req_local_mem
;
319 state
->info
.compute
.req_private_mem
= c
->req_private_mem
;
320 state
->info
.compute
.req_input_mem
= c
->req_input_mem
;
323 const struct pipe_shader_state
*s
=
324 (const struct pipe_shader_state
*) templ
;
326 state
->info
.tokens
= tgsi_dup_tokens(s
->tokens
);
327 state
->info
.stream_output
= s
->stream_output
;
330 list_inithead(&state
->variants
);
332 ilo_shader_info_parse_tokens(&state
->info
);
334 /* guess and compile now */
335 ilo_shader_variant_guess(&variant
, &state
->info
, ilo
);
336 if (!ilo_shader_state_use_variant(state
, &variant
)) {
337 ilo_shader_state_destroy(state
);
345 * Destroy a shader state.
348 ilo_shader_state_destroy(struct ilo_shader_state
*state
)
350 struct ilo_shader
*sh
, *next
;
352 LIST_FOR_EACH_ENTRY_SAFE(sh
, next
, &state
->variants
, list
)
353 ilo_shader_destroy(sh
);
355 FREE((struct tgsi_token
*) state
->info
.tokens
);
360 * Add a compiled shader to the shader state.
363 ilo_shader_state_add_shader(struct ilo_shader_state
*state
,
364 struct ilo_shader
*sh
)
366 list_add(&sh
->list
, &state
->variants
);
367 state
->num_variants
++;
368 state
->total_size
+= sh
->kernel_size
;
372 * Remove a compiled shader from the shader state.
375 ilo_shader_state_remove_shader(struct ilo_shader_state
*state
,
376 struct ilo_shader
*sh
)
379 state
->num_variants
--;
380 state
->total_size
-= sh
->kernel_size
;
384 * Garbage collect shader variants in the shader state.
387 ilo_shader_state_gc(struct ilo_shader_state
*state
)
389 /* activate when the variants take up more than 4KiB of space */
390 const int limit
= 4 * 1024;
391 struct ilo_shader
*sh
, *next
;
393 if (state
->total_size
< limit
)
396 /* remove from the tail as the most recently ones are at the head */
397 LIST_FOR_EACH_ENTRY_SAFE_REV(sh
, next
, &state
->variants
, list
) {
398 ilo_shader_state_remove_shader(state
, sh
);
399 ilo_shader_destroy(sh
);
401 if (state
->total_size
<= limit
/ 2)
407 * Search for a shader variant.
409 static struct ilo_shader
*
410 ilo_shader_state_search_variant(struct ilo_shader_state
*state
,
411 const struct ilo_shader_variant
*variant
)
413 struct ilo_shader
*sh
= NULL
, *tmp
;
415 LIST_FOR_EACH_ENTRY(tmp
, &state
->variants
, list
) {
416 if (memcmp(&tmp
->variant
, variant
, sizeof(*variant
)) == 0) {
426 * Add a shader variant to the shader state.
429 ilo_shader_state_add_variant(struct ilo_shader_state
*state
,
430 const struct ilo_shader_variant
*variant
)
432 struct ilo_shader
*sh
;
434 sh
= ilo_shader_state_search_variant(state
, variant
);
438 ilo_shader_state_gc(state
);
440 switch (state
->info
.type
) {
441 case PIPE_SHADER_VERTEX
:
442 sh
= ilo_shader_compile_vs(state
, variant
);
444 case PIPE_SHADER_FRAGMENT
:
445 sh
= ilo_shader_compile_fs(state
, variant
);
447 case PIPE_SHADER_GEOMETRY
:
448 sh
= ilo_shader_compile_gs(state
, variant
);
450 case PIPE_SHADER_COMPUTE
:
451 sh
= ilo_shader_compile_cs(state
, variant
);
458 assert(!"failed to compile shader");
462 sh
->variant
= *variant
;
464 ilo_shader_state_add_shader(state
, sh
);
470 * Update state->shader to point to a variant. If the variant does not exist,
471 * it will be added first.
474 ilo_shader_state_use_variant(struct ilo_shader_state
*state
,
475 const struct ilo_shader_variant
*variant
)
477 struct ilo_shader
*sh
;
479 sh
= ilo_shader_state_add_variant(state
, variant
);
484 if (state
->variants
.next
!= &sh
->list
) {
486 list_add(&sh
->list
, &state
->variants
);
495 * Reset the shader cache.
498 ilo_shader_cache_reset(struct ilo_shader_cache
*shc
)
501 intel_bo_unreference(shc
->bo
);
503 shc
->bo
= intel_winsys_alloc_buffer(shc
->winsys
,
504 "shader cache", shc
->size
, 0);
513 * Create a shader cache. A shader cache is a bo holding all compiled shaders.
514 * When the bo is full, a larger bo is allocated and all cached shaders are
515 * invalidated. This is how outdated shaders get dropped. Active shaders
516 * will be added to the new bo when used.
518 struct ilo_shader_cache
*
519 ilo_shader_cache_create(struct intel_winsys
*winsys
)
521 struct ilo_shader_cache
*shc
;
523 shc
= CALLOC_STRUCT(ilo_shader_cache
);
527 shc
->winsys
= winsys
;
528 /* initial cache size */
531 ilo_shader_cache_reset(shc
);
537 * Destroy a shader cache.
540 ilo_shader_cache_destroy(struct ilo_shader_cache
*shc
)
543 intel_bo_unreference(shc
->bo
);
549 * Add shaders to the cache. This may invalidate all other shaders in the
553 ilo_shader_cache_set(struct ilo_shader_cache
*shc
,
554 struct ilo_shader
**shaders
,
559 /* calculate the space needed */
561 for (i
= 0; i
< num_shaders
; i
++) {
562 if (shaders
[i
]->cache_seqno
!= shc
->seqno
)
563 new_cur
= align(new_cur
, 64) + shaders
[i
]->kernel_size
;
566 /* all shaders are already in the cache */
567 if (new_cur
== shc
->cur
)
571 * From the Sandy Bridge PRM, volume 4 part 2, page 112:
573 * "Due to prefetch of the instruction stream, the EUs may attempt to
574 * access up to 8 instructions (128 bytes) beyond the end of the kernel
575 * program - possibly into the next memory page. Although these
576 * instructions will not be executed, software must account for the
577 * prefetch in order to avoid invalid page access faults."
582 * we should be able to append data without being blocked even the bo
586 /* reallocate when the cache is full or busy */
587 if (new_cur
> shc
->size
|| shc
->busy
) {
588 while (new_cur
> shc
->size
)
591 ilo_shader_cache_reset(shc
);
595 for (i
= 0; i
< num_shaders
; i
++) {
596 if (shaders
[i
]->cache_seqno
!= shc
->seqno
) {
597 /* kernels must be aligned to 64-byte */
598 shc
->cur
= align(shc
->cur
, 64);
599 intel_bo_pwrite(shc
->bo
, shc
->cur
,
600 shaders
[i
]->kernel_size
, shaders
[i
]->kernel
);
602 shaders
[i
]->cache_seqno
= shc
->seqno
;
603 shaders
[i
]->cache_offset
= shc
->cur
;
605 shc
->cur
+= shaders
[i
]->kernel_size
;