2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "tgsi/tgsi_parse.h"
29 #include "intel_winsys.h"
31 #include "ilo_shader.h"
34 * Initialize a shader variant.
37 ilo_shader_variant_init(struct ilo_shader_variant
*variant
,
38 const struct ilo_shader_info
*info
,
39 const struct ilo_context
*ilo
)
43 memset(variant
, 0, sizeof(*variant
));
46 case PIPE_SHADER_VERTEX
:
47 variant
->u
.vs
.rasterizer_discard
=
48 ilo
->rasterizer
->state
.rasterizer_discard
;
49 variant
->u
.vs
.num_ucps
=
50 util_last_bit(ilo
->rasterizer
->state
.clip_plane_enable
);
52 case PIPE_SHADER_GEOMETRY
:
53 variant
->u
.gs
.rasterizer_discard
=
54 ilo
->rasterizer
->state
.rasterizer_discard
;
55 variant
->u
.gs
.num_inputs
= ilo
->vs
->shader
->out
.count
;
56 for (i
= 0; i
< ilo
->vs
->shader
->out
.count
; i
++) {
57 variant
->u
.gs
.semantic_names
[i
] =
58 ilo
->vs
->shader
->out
.semantic_names
[i
];
59 variant
->u
.gs
.semantic_indices
[i
] =
60 ilo
->vs
->shader
->out
.semantic_indices
[i
];
63 case PIPE_SHADER_FRAGMENT
:
64 variant
->u
.fs
.flatshade
=
65 (info
->has_color_interp
&& ilo
->rasterizer
->state
.flatshade
);
66 variant
->u
.fs
.fb_height
= (info
->has_pos
) ?
67 ilo
->fb
.state
.height
: 1;
68 variant
->u
.fs
.num_cbufs
= ilo
->fb
.state
.nr_cbufs
;
71 assert(!"unknown shader type");
75 num_views
= ilo
->view
[info
->type
].count
;
76 assert(info
->num_samplers
<= num_views
);
78 variant
->num_sampler_views
= info
->num_samplers
;
79 for (i
= 0; i
< info
->num_samplers
; i
++) {
80 const struct pipe_sampler_view
*view
=
81 ilo
->view
[info
->type
].states
[i
];
82 const struct pipe_sampler_state
*sampler
=
83 ilo
->sampler
[info
->type
].states
[i
];
86 variant
->sampler_view_swizzles
[i
].r
= view
->swizzle_r
;
87 variant
->sampler_view_swizzles
[i
].g
= view
->swizzle_g
;
88 variant
->sampler_view_swizzles
[i
].b
= view
->swizzle_b
;
89 variant
->sampler_view_swizzles
[i
].a
= view
->swizzle_a
;
91 else if (info
->shadow_samplers
& (1 << i
)) {
92 variant
->sampler_view_swizzles
[i
].r
= PIPE_SWIZZLE_RED
;
93 variant
->sampler_view_swizzles
[i
].g
= PIPE_SWIZZLE_RED
;
94 variant
->sampler_view_swizzles
[i
].b
= PIPE_SWIZZLE_RED
;
95 variant
->sampler_view_swizzles
[i
].a
= PIPE_SWIZZLE_ONE
;
98 variant
->sampler_view_swizzles
[i
].r
= PIPE_SWIZZLE_RED
;
99 variant
->sampler_view_swizzles
[i
].g
= PIPE_SWIZZLE_GREEN
;
100 variant
->sampler_view_swizzles
[i
].b
= PIPE_SWIZZLE_BLUE
;
101 variant
->sampler_view_swizzles
[i
].a
= PIPE_SWIZZLE_ALPHA
;
105 * When non-nearest filter and PIPE_TEX_WRAP_CLAMP wrap mode is used,
106 * the HW wrap mode is set to BRW_TEXCOORDMODE_CLAMP_BORDER, and we need
107 * to manually saturate the texture coordinates.
109 if (sampler
&& sampler
->min_img_filter
!= PIPE_TEX_FILTER_NEAREST
) {
110 if (sampler
->wrap_s
== PIPE_TEX_WRAP_CLAMP
)
111 variant
->saturate_tex_coords
[0] |= 1 << i
;
112 if (sampler
->wrap_t
== PIPE_TEX_WRAP_CLAMP
)
113 variant
->saturate_tex_coords
[1] |= 1 << i
;
114 if (sampler
->wrap_r
== PIPE_TEX_WRAP_CLAMP
)
115 variant
->saturate_tex_coords
[2] |= 1 << i
;
121 * Guess the shader variant, knowing that the context may still change.
124 ilo_shader_variant_guess(struct ilo_shader_variant
*variant
,
125 const struct ilo_shader_info
*info
,
126 const struct ilo_context
*ilo
)
130 memset(variant
, 0, sizeof(*variant
));
132 switch (info
->type
) {
133 case PIPE_SHADER_VERTEX
:
135 case PIPE_SHADER_GEOMETRY
:
137 case PIPE_SHADER_FRAGMENT
:
138 variant
->u
.fs
.flatshade
= false;
139 variant
->u
.fs
.fb_height
= (info
->has_pos
) ?
140 ilo
->fb
.state
.height
: 1;
141 variant
->u
.fs
.num_cbufs
= 1;
144 assert(!"unknown shader type");
148 variant
->num_sampler_views
= info
->num_samplers
;
149 for (i
= 0; i
< info
->num_samplers
; i
++) {
150 if (info
->shadow_samplers
& (1 << i
)) {
151 variant
->sampler_view_swizzles
[i
].r
= PIPE_SWIZZLE_RED
;
152 variant
->sampler_view_swizzles
[i
].g
= PIPE_SWIZZLE_RED
;
153 variant
->sampler_view_swizzles
[i
].b
= PIPE_SWIZZLE_RED
;
154 variant
->sampler_view_swizzles
[i
].a
= PIPE_SWIZZLE_ONE
;
157 variant
->sampler_view_swizzles
[i
].r
= PIPE_SWIZZLE_RED
;
158 variant
->sampler_view_swizzles
[i
].g
= PIPE_SWIZZLE_GREEN
;
159 variant
->sampler_view_swizzles
[i
].b
= PIPE_SWIZZLE_BLUE
;
160 variant
->sampler_view_swizzles
[i
].a
= PIPE_SWIZZLE_ALPHA
;
167 * Parse a TGSI instruction for the shader info.
170 ilo_shader_info_parse_inst(struct ilo_shader_info
*info
,
171 const struct tgsi_full_instruction
*inst
)
175 /* look for edgeflag passthrough */
176 if (info
->edgeflag_out
>= 0 &&
177 inst
->Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
178 inst
->Dst
[0].Register
.File
== TGSI_FILE_OUTPUT
&&
179 inst
->Dst
[0].Register
.Index
== info
->edgeflag_out
) {
181 assert(inst
->Src
[0].Register
.File
== TGSI_FILE_INPUT
);
182 info
->edgeflag_in
= inst
->Src
[0].Register
.Index
;
185 if (inst
->Instruction
.Texture
) {
188 switch (inst
->Texture
.Texture
) {
189 case TGSI_TEXTURE_SHADOW1D
:
190 case TGSI_TEXTURE_SHADOW2D
:
191 case TGSI_TEXTURE_SHADOWRECT
:
192 case TGSI_TEXTURE_SHADOW1D_ARRAY
:
193 case TGSI_TEXTURE_SHADOW2D_ARRAY
:
194 case TGSI_TEXTURE_SHADOWCUBE
:
195 case TGSI_TEXTURE_SHADOWCUBE_ARRAY
:
203 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
204 const struct tgsi_full_src_register
*src
= &inst
->Src
[i
];
206 if (src
->Register
.File
== TGSI_FILE_SAMPLER
) {
207 const int idx
= src
->Register
.Index
;
209 if (idx
>= info
->num_samplers
)
210 info
->num_samplers
= idx
+ 1;
213 info
->shadow_samplers
|= 1 << idx
;
220 * Parse a TGSI property for the shader info.
223 ilo_shader_info_parse_prop(struct ilo_shader_info
*info
,
224 const struct tgsi_full_property
*prop
)
226 switch (prop
->Property
.PropertyName
) {
227 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS
:
228 info
->fs_color0_writes_all_cbufs
= prop
->u
[0].Data
;
236 * Parse a TGSI declaration for the shader info.
239 ilo_shader_info_parse_decl(struct ilo_shader_info
*info
,
240 const struct tgsi_full_declaration
*decl
)
242 switch (decl
->Declaration
.File
) {
243 case TGSI_FILE_INPUT
:
244 if (decl
->Declaration
.Interpolate
&&
245 decl
->Interp
.Interpolate
== TGSI_INTERPOLATE_COLOR
)
246 info
->has_color_interp
= true;
247 if (decl
->Declaration
.Semantic
&&
248 decl
->Semantic
.Name
== TGSI_SEMANTIC_POSITION
)
249 info
->has_pos
= true;
251 case TGSI_FILE_OUTPUT
:
252 if (decl
->Declaration
.Semantic
&&
253 decl
->Semantic
.Name
== TGSI_SEMANTIC_EDGEFLAG
)
254 info
->edgeflag_out
= decl
->Range
.First
;
256 case TGSI_FILE_SYSTEM_VALUE
:
257 if (decl
->Declaration
.Semantic
&&
258 decl
->Semantic
.Name
== TGSI_SEMANTIC_INSTANCEID
)
259 info
->has_instanceid
= true;
260 if (decl
->Declaration
.Semantic
&&
261 decl
->Semantic
.Name
== TGSI_SEMANTIC_VERTEXID
)
262 info
->has_vertexid
= true;
270 ilo_shader_info_parse_tokens(struct ilo_shader_info
*info
)
272 struct tgsi_parse_context parse
;
274 info
->edgeflag_in
= -1;
275 info
->edgeflag_out
= -1;
277 tgsi_parse_init(&parse
, info
->tokens
);
278 while (!tgsi_parse_end_of_tokens(&parse
)) {
279 const union tgsi_full_token
*token
;
281 tgsi_parse_token(&parse
);
282 token
= &parse
.FullToken
;
284 switch (token
->Token
.Type
) {
285 case TGSI_TOKEN_TYPE_DECLARATION
:
286 ilo_shader_info_parse_decl(info
, &token
->FullDeclaration
);
288 case TGSI_TOKEN_TYPE_INSTRUCTION
:
289 ilo_shader_info_parse_inst(info
, &token
->FullInstruction
);
291 case TGSI_TOKEN_TYPE_PROPERTY
:
292 ilo_shader_info_parse_prop(info
, &token
->FullProperty
);
298 tgsi_parse_free(&parse
);
302 * Create a shader state.
304 struct ilo_shader_state
*
305 ilo_shader_state_create(const struct ilo_context
*ilo
,
306 int type
, const void *templ
)
308 struct ilo_shader_state
*state
;
309 struct ilo_shader_variant variant
;
311 state
= CALLOC_STRUCT(ilo_shader_state
);
315 state
->info
.dev
= ilo
->dev
;
316 state
->info
.type
= type
;
318 if (type
== PIPE_SHADER_COMPUTE
) {
319 const struct pipe_compute_state
*c
=
320 (const struct pipe_compute_state
*) templ
;
322 state
->info
.tokens
= tgsi_dup_tokens(c
->prog
);
323 state
->info
.compute
.req_local_mem
= c
->req_local_mem
;
324 state
->info
.compute
.req_private_mem
= c
->req_private_mem
;
325 state
->info
.compute
.req_input_mem
= c
->req_input_mem
;
328 const struct pipe_shader_state
*s
=
329 (const struct pipe_shader_state
*) templ
;
331 state
->info
.tokens
= tgsi_dup_tokens(s
->tokens
);
332 state
->info
.stream_output
= s
->stream_output
;
335 list_inithead(&state
->variants
);
337 ilo_shader_info_parse_tokens(&state
->info
);
339 /* guess and compile now */
340 ilo_shader_variant_guess(&variant
, &state
->info
, ilo
);
341 if (!ilo_shader_state_use_variant(state
, &variant
)) {
342 ilo_shader_state_destroy(state
);
350 * Destroy a shader state.
353 ilo_shader_state_destroy(struct ilo_shader_state
*state
)
355 struct ilo_shader
*sh
, *next
;
357 LIST_FOR_EACH_ENTRY_SAFE(sh
, next
, &state
->variants
, list
)
358 ilo_shader_destroy(sh
);
360 FREE((struct tgsi_token
*) state
->info
.tokens
);
365 * Add a compiled shader to the shader state.
368 ilo_shader_state_add_shader(struct ilo_shader_state
*state
,
369 struct ilo_shader
*sh
)
371 list_add(&sh
->list
, &state
->variants
);
372 state
->num_variants
++;
373 state
->total_size
+= sh
->kernel_size
;
377 * Remove a compiled shader from the shader state.
380 ilo_shader_state_remove_shader(struct ilo_shader_state
*state
,
381 struct ilo_shader
*sh
)
384 state
->num_variants
--;
385 state
->total_size
-= sh
->kernel_size
;
389 * Garbage collect shader variants in the shader state.
392 ilo_shader_state_gc(struct ilo_shader_state
*state
)
394 /* activate when the variants take up more than 4KiB of space */
395 const int limit
= 4 * 1024;
396 struct ilo_shader
*sh
, *next
;
398 if (state
->total_size
< limit
)
401 /* remove from the tail as the most recently ones are at the head */
402 LIST_FOR_EACH_ENTRY_SAFE_REV(sh
, next
, &state
->variants
, list
) {
403 ilo_shader_state_remove_shader(state
, sh
);
404 ilo_shader_destroy(sh
);
406 if (state
->total_size
<= limit
/ 2)
412 * Search for a shader variant.
414 static struct ilo_shader
*
415 ilo_shader_state_search_variant(struct ilo_shader_state
*state
,
416 const struct ilo_shader_variant
*variant
)
418 struct ilo_shader
*sh
= NULL
, *tmp
;
420 LIST_FOR_EACH_ENTRY(tmp
, &state
->variants
, list
) {
421 if (memcmp(&tmp
->variant
, variant
, sizeof(*variant
)) == 0) {
431 * Add a shader variant to the shader state.
434 ilo_shader_state_add_variant(struct ilo_shader_state
*state
,
435 const struct ilo_shader_variant
*variant
)
437 struct ilo_shader
*sh
;
439 sh
= ilo_shader_state_search_variant(state
, variant
);
443 ilo_shader_state_gc(state
);
445 switch (state
->info
.type
) {
446 case PIPE_SHADER_VERTEX
:
447 sh
= ilo_shader_compile_vs(state
, variant
);
449 case PIPE_SHADER_FRAGMENT
:
450 sh
= ilo_shader_compile_fs(state
, variant
);
452 case PIPE_SHADER_GEOMETRY
:
453 sh
= ilo_shader_compile_gs(state
, variant
);
455 case PIPE_SHADER_COMPUTE
:
456 sh
= ilo_shader_compile_cs(state
, variant
);
463 assert(!"failed to compile shader");
467 sh
->variant
= *variant
;
469 ilo_shader_state_add_shader(state
, sh
);
475 * Update state->shader to point to a variant. If the variant does not exist,
476 * it will be added first.
479 ilo_shader_state_use_variant(struct ilo_shader_state
*state
,
480 const struct ilo_shader_variant
*variant
)
482 struct ilo_shader
*sh
;
484 sh
= ilo_shader_state_add_variant(state
, variant
);
489 if (state
->variants
.next
!= &sh
->list
) {
491 list_add(&sh
->list
, &state
->variants
);
500 * Reset the shader cache.
503 ilo_shader_cache_reset(struct ilo_shader_cache
*shc
)
506 shc
->bo
->unreference(shc
->bo
);
508 shc
->bo
= shc
->winsys
->alloc_buffer(shc
->winsys
,
509 "shader cache", shc
->size
, 0);
518 * Create a shader cache. A shader cache is a bo holding all compiled shaders.
519 * When the bo is full, a larger bo is allocated and all cached shaders are
520 * invalidated. This is how outdated shaders get dropped. Active shaders
521 * will be added to the new bo when used.
523 struct ilo_shader_cache
*
524 ilo_shader_cache_create(struct intel_winsys
*winsys
)
526 struct ilo_shader_cache
*shc
;
528 shc
= CALLOC_STRUCT(ilo_shader_cache
);
532 shc
->winsys
= winsys
;
533 /* initial cache size */
536 ilo_shader_cache_reset(shc
);
542 * Destroy a shader cache.
545 ilo_shader_cache_destroy(struct ilo_shader_cache
*shc
)
548 shc
->bo
->unreference(shc
->bo
);
554 * Add shaders to the cache. This may invalidate all other shaders in the
558 ilo_shader_cache_set(struct ilo_shader_cache
*shc
,
559 struct ilo_shader
**shaders
,
564 /* calculate the space needed */
566 for (i
= 0; i
< num_shaders
; i
++) {
567 if (shaders
[i
]->cache_seqno
!= shc
->seqno
)
568 new_cur
= align(new_cur
, 64) + shaders
[i
]->kernel_size
;
571 /* all shaders are already in the cache */
572 if (new_cur
== shc
->cur
)
576 * From the Sandy Bridge PRM, volume 4 part 2, page 112:
578 * "Due to prefetch of the instruction stream, the EUs may attempt to
579 * access up to 8 instructions (128 bytes) beyond the end of the kernel
580 * program - possibly into the next memory page. Although these
581 * instructions will not be executed, software must account for the
582 * prefetch in order to avoid invalid page access faults."
587 * we should be able to append data without being blocked even the bo
591 /* reallocate when the cache is full or busy */
592 if (new_cur
> shc
->size
|| shc
->busy
) {
593 while (new_cur
> shc
->size
)
596 ilo_shader_cache_reset(shc
);
600 for (i
= 0; i
< num_shaders
; i
++) {
601 if (shaders
[i
]->cache_seqno
!= shc
->seqno
) {
602 /* kernels must be aligned to 64-byte */
603 shc
->cur
= align(shc
->cur
, 64);
604 shc
->bo
->pwrite(shc
->bo
, shc
->cur
,
605 shaders
[i
]->kernel_size
, shaders
[i
]->kernel
);
607 shaders
[i
]->cache_seqno
= shc
->seqno
;
608 shaders
[i
]->cache_offset
= shc
->cur
;
610 shc
->cur
+= shaders
[i
]->kernel_size
;