ilo: introduce sampler CSO
[mesa.git] / src / gallium / drivers / ilo / ilo_shader.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "tgsi/tgsi_parse.h"
29 #include "intel_winsys.h"
30
31 #include "ilo_shader.h"
32
33 /**
34 * Initialize a shader variant.
35 */
36 void
37 ilo_shader_variant_init(struct ilo_shader_variant *variant,
38 const struct ilo_shader_info *info,
39 const struct ilo_context *ilo)
40 {
41 int num_views, i;
42
43 memset(variant, 0, sizeof(*variant));
44
45 switch (info->type) {
46 case PIPE_SHADER_VERTEX:
47 variant->u.vs.rasterizer_discard =
48 ilo->rasterizer->state.rasterizer_discard;
49 variant->u.vs.num_ucps =
50 util_last_bit(ilo->rasterizer->state.clip_plane_enable);
51 break;
52 case PIPE_SHADER_GEOMETRY:
53 variant->u.gs.rasterizer_discard =
54 ilo->rasterizer->state.rasterizer_discard;
55 variant->u.gs.num_inputs = ilo->vs->shader->out.count;
56 for (i = 0; i < ilo->vs->shader->out.count; i++) {
57 variant->u.gs.semantic_names[i] =
58 ilo->vs->shader->out.semantic_names[i];
59 variant->u.gs.semantic_indices[i] =
60 ilo->vs->shader->out.semantic_indices[i];
61 }
62 break;
63 case PIPE_SHADER_FRAGMENT:
64 variant->u.fs.flatshade =
65 (info->has_color_interp && ilo->rasterizer->state.flatshade);
66 variant->u.fs.fb_height = (info->has_pos) ?
67 ilo->fb.state.height : 1;
68 variant->u.fs.num_cbufs = ilo->fb.state.nr_cbufs;
69 break;
70 default:
71 assert(!"unknown shader type");
72 break;
73 }
74
75 num_views = ilo->view[info->type].count;
76 assert(info->num_samplers <= num_views);
77
78 variant->num_sampler_views = info->num_samplers;
79 for (i = 0; i < info->num_samplers; i++) {
80 const struct pipe_sampler_view *view =
81 ilo->view[info->type].states[i];
82 const struct ilo_sampler_cso *sampler =
83 ilo->sampler[info->type].cso[i];
84
85 if (view) {
86 variant->sampler_view_swizzles[i].r = view->swizzle_r;
87 variant->sampler_view_swizzles[i].g = view->swizzle_g;
88 variant->sampler_view_swizzles[i].b = view->swizzle_b;
89 variant->sampler_view_swizzles[i].a = view->swizzle_a;
90 }
91 else if (info->shadow_samplers & (1 << i)) {
92 variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_RED;
93 variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_RED;
94 variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_RED;
95 variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_ONE;
96 }
97 else {
98 variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_RED;
99 variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_GREEN;
100 variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_BLUE;
101 variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_ALPHA;
102 }
103
104 /*
105 * When non-nearest filter and PIPE_TEX_WRAP_CLAMP wrap mode is used,
106 * the HW wrap mode is set to BRW_TEXCOORDMODE_CLAMP_BORDER, and we need
107 * to manually saturate the texture coordinates.
108 */
109 if (sampler) {
110 variant->saturate_tex_coords[0] |= sampler->saturate_s << i;
111 variant->saturate_tex_coords[1] |= sampler->saturate_t << i;
112 variant->saturate_tex_coords[2] |= sampler->saturate_r << i;
113 }
114 }
115 }
116
117 /**
118 * Guess the shader variant, knowing that the context may still change.
119 */
120 static void
121 ilo_shader_variant_guess(struct ilo_shader_variant *variant,
122 const struct ilo_shader_info *info,
123 const struct ilo_context *ilo)
124 {
125 int i;
126
127 memset(variant, 0, sizeof(*variant));
128
129 switch (info->type) {
130 case PIPE_SHADER_VERTEX:
131 break;
132 case PIPE_SHADER_GEOMETRY:
133 break;
134 case PIPE_SHADER_FRAGMENT:
135 variant->u.fs.flatshade = false;
136 variant->u.fs.fb_height = (info->has_pos) ?
137 ilo->fb.state.height : 1;
138 variant->u.fs.num_cbufs = 1;
139 break;
140 default:
141 assert(!"unknown shader type");
142 break;
143 }
144
145 variant->num_sampler_views = info->num_samplers;
146 for (i = 0; i < info->num_samplers; i++) {
147 if (info->shadow_samplers & (1 << i)) {
148 variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_RED;
149 variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_RED;
150 variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_RED;
151 variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_ONE;
152 }
153 else {
154 variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_RED;
155 variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_GREEN;
156 variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_BLUE;
157 variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_ALPHA;
158 }
159 }
160 }
161
162
163 /**
164 * Parse a TGSI instruction for the shader info.
165 */
166 static void
167 ilo_shader_info_parse_inst(struct ilo_shader_info *info,
168 const struct tgsi_full_instruction *inst)
169 {
170 int i;
171
172 /* look for edgeflag passthrough */
173 if (info->edgeflag_out >= 0 &&
174 inst->Instruction.Opcode == TGSI_OPCODE_MOV &&
175 inst->Dst[0].Register.File == TGSI_FILE_OUTPUT &&
176 inst->Dst[0].Register.Index == info->edgeflag_out) {
177
178 assert(inst->Src[0].Register.File == TGSI_FILE_INPUT);
179 info->edgeflag_in = inst->Src[0].Register.Index;
180 }
181
182 if (inst->Instruction.Texture) {
183 bool shadow;
184
185 switch (inst->Texture.Texture) {
186 case TGSI_TEXTURE_SHADOW1D:
187 case TGSI_TEXTURE_SHADOW2D:
188 case TGSI_TEXTURE_SHADOWRECT:
189 case TGSI_TEXTURE_SHADOW1D_ARRAY:
190 case TGSI_TEXTURE_SHADOW2D_ARRAY:
191 case TGSI_TEXTURE_SHADOWCUBE:
192 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
193 shadow = true;
194 break;
195 default:
196 shadow = false;
197 break;
198 }
199
200 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
201 const struct tgsi_full_src_register *src = &inst->Src[i];
202
203 if (src->Register.File == TGSI_FILE_SAMPLER) {
204 const int idx = src->Register.Index;
205
206 if (idx >= info->num_samplers)
207 info->num_samplers = idx + 1;
208
209 if (shadow)
210 info->shadow_samplers |= 1 << idx;
211 }
212 }
213 }
214 }
215
216 /**
217 * Parse a TGSI property for the shader info.
218 */
219 static void
220 ilo_shader_info_parse_prop(struct ilo_shader_info *info,
221 const struct tgsi_full_property *prop)
222 {
223 switch (prop->Property.PropertyName) {
224 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
225 info->fs_color0_writes_all_cbufs = prop->u[0].Data;
226 break;
227 default:
228 break;
229 }
230 }
231
232 /**
233 * Parse a TGSI declaration for the shader info.
234 */
235 static void
236 ilo_shader_info_parse_decl(struct ilo_shader_info *info,
237 const struct tgsi_full_declaration *decl)
238 {
239 switch (decl->Declaration.File) {
240 case TGSI_FILE_INPUT:
241 if (decl->Declaration.Interpolate &&
242 decl->Interp.Interpolate == TGSI_INTERPOLATE_COLOR)
243 info->has_color_interp = true;
244 if (decl->Declaration.Semantic &&
245 decl->Semantic.Name == TGSI_SEMANTIC_POSITION)
246 info->has_pos = true;
247 break;
248 case TGSI_FILE_OUTPUT:
249 if (decl->Declaration.Semantic &&
250 decl->Semantic.Name == TGSI_SEMANTIC_EDGEFLAG)
251 info->edgeflag_out = decl->Range.First;
252 break;
253 case TGSI_FILE_SYSTEM_VALUE:
254 if (decl->Declaration.Semantic &&
255 decl->Semantic.Name == TGSI_SEMANTIC_INSTANCEID)
256 info->has_instanceid = true;
257 if (decl->Declaration.Semantic &&
258 decl->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
259 info->has_vertexid = true;
260 break;
261 default:
262 break;
263 }
264 }
265
266 static void
267 ilo_shader_info_parse_tokens(struct ilo_shader_info *info)
268 {
269 struct tgsi_parse_context parse;
270
271 info->edgeflag_in = -1;
272 info->edgeflag_out = -1;
273
274 tgsi_parse_init(&parse, info->tokens);
275 while (!tgsi_parse_end_of_tokens(&parse)) {
276 const union tgsi_full_token *token;
277
278 tgsi_parse_token(&parse);
279 token = &parse.FullToken;
280
281 switch (token->Token.Type) {
282 case TGSI_TOKEN_TYPE_DECLARATION:
283 ilo_shader_info_parse_decl(info, &token->FullDeclaration);
284 break;
285 case TGSI_TOKEN_TYPE_INSTRUCTION:
286 ilo_shader_info_parse_inst(info, &token->FullInstruction);
287 break;
288 case TGSI_TOKEN_TYPE_PROPERTY:
289 ilo_shader_info_parse_prop(info, &token->FullProperty);
290 break;
291 default:
292 break;
293 }
294 }
295 tgsi_parse_free(&parse);
296 }
297
298 /**
299 * Create a shader state.
300 */
301 struct ilo_shader_state *
302 ilo_shader_state_create(const struct ilo_context *ilo,
303 int type, const void *templ)
304 {
305 struct ilo_shader_state *state;
306 struct ilo_shader_variant variant;
307
308 state = CALLOC_STRUCT(ilo_shader_state);
309 if (!state)
310 return NULL;
311
312 state->info.dev = ilo->dev;
313 state->info.type = type;
314
315 if (type == PIPE_SHADER_COMPUTE) {
316 const struct pipe_compute_state *c =
317 (const struct pipe_compute_state *) templ;
318
319 state->info.tokens = tgsi_dup_tokens(c->prog);
320 state->info.compute.req_local_mem = c->req_local_mem;
321 state->info.compute.req_private_mem = c->req_private_mem;
322 state->info.compute.req_input_mem = c->req_input_mem;
323 }
324 else {
325 const struct pipe_shader_state *s =
326 (const struct pipe_shader_state *) templ;
327
328 state->info.tokens = tgsi_dup_tokens(s->tokens);
329 state->info.stream_output = s->stream_output;
330 }
331
332 list_inithead(&state->variants);
333
334 ilo_shader_info_parse_tokens(&state->info);
335
336 /* guess and compile now */
337 ilo_shader_variant_guess(&variant, &state->info, ilo);
338 if (!ilo_shader_state_use_variant(state, &variant)) {
339 ilo_shader_state_destroy(state);
340 return NULL;
341 }
342
343 return state;
344 }
345
346 /**
347 * Destroy a shader state.
348 */
349 void
350 ilo_shader_state_destroy(struct ilo_shader_state *state)
351 {
352 struct ilo_shader *sh, *next;
353
354 LIST_FOR_EACH_ENTRY_SAFE(sh, next, &state->variants, list)
355 ilo_shader_destroy(sh);
356
357 FREE((struct tgsi_token *) state->info.tokens);
358 FREE(state);
359 }
360
361 /**
362 * Add a compiled shader to the shader state.
363 */
364 static void
365 ilo_shader_state_add_shader(struct ilo_shader_state *state,
366 struct ilo_shader *sh)
367 {
368 list_add(&sh->list, &state->variants);
369 state->num_variants++;
370 state->total_size += sh->kernel_size;
371 }
372
373 /**
374 * Remove a compiled shader from the shader state.
375 */
376 static void
377 ilo_shader_state_remove_shader(struct ilo_shader_state *state,
378 struct ilo_shader *sh)
379 {
380 list_del(&sh->list);
381 state->num_variants--;
382 state->total_size -= sh->kernel_size;
383 }
384
385 /**
386 * Garbage collect shader variants in the shader state.
387 */
388 static void
389 ilo_shader_state_gc(struct ilo_shader_state *state)
390 {
391 /* activate when the variants take up more than 4KiB of space */
392 const int limit = 4 * 1024;
393 struct ilo_shader *sh, *next;
394
395 if (state->total_size < limit)
396 return;
397
398 /* remove from the tail as the most recently ones are at the head */
399 LIST_FOR_EACH_ENTRY_SAFE_REV(sh, next, &state->variants, list) {
400 ilo_shader_state_remove_shader(state, sh);
401 ilo_shader_destroy(sh);
402
403 if (state->total_size <= limit / 2)
404 break;
405 }
406 }
407
408 /**
409 * Search for a shader variant.
410 */
411 static struct ilo_shader *
412 ilo_shader_state_search_variant(struct ilo_shader_state *state,
413 const struct ilo_shader_variant *variant)
414 {
415 struct ilo_shader *sh = NULL, *tmp;
416
417 LIST_FOR_EACH_ENTRY(tmp, &state->variants, list) {
418 if (memcmp(&tmp->variant, variant, sizeof(*variant)) == 0) {
419 sh = tmp;
420 break;
421 }
422 }
423
424 return sh;
425 }
426
427 /**
428 * Add a shader variant to the shader state.
429 */
430 struct ilo_shader *
431 ilo_shader_state_add_variant(struct ilo_shader_state *state,
432 const struct ilo_shader_variant *variant)
433 {
434 struct ilo_shader *sh;
435
436 sh = ilo_shader_state_search_variant(state, variant);
437 if (sh)
438 return sh;
439
440 ilo_shader_state_gc(state);
441
442 switch (state->info.type) {
443 case PIPE_SHADER_VERTEX:
444 sh = ilo_shader_compile_vs(state, variant);
445 break;
446 case PIPE_SHADER_FRAGMENT:
447 sh = ilo_shader_compile_fs(state, variant);
448 break;
449 case PIPE_SHADER_GEOMETRY:
450 sh = ilo_shader_compile_gs(state, variant);
451 break;
452 case PIPE_SHADER_COMPUTE:
453 sh = ilo_shader_compile_cs(state, variant);
454 break;
455 default:
456 sh = NULL;
457 break;
458 }
459 if (!sh) {
460 assert(!"failed to compile shader");
461 return NULL;
462 }
463
464 sh->variant = *variant;
465
466 ilo_shader_state_add_shader(state, sh);
467
468 return sh;
469 }
470
471 /**
472 * Update state->shader to point to a variant. If the variant does not exist,
473 * it will be added first.
474 */
475 bool
476 ilo_shader_state_use_variant(struct ilo_shader_state *state,
477 const struct ilo_shader_variant *variant)
478 {
479 struct ilo_shader *sh;
480
481 sh = ilo_shader_state_add_variant(state, variant);
482 if (!sh)
483 return false;
484
485 /* move to head */
486 if (state->variants.next != &sh->list) {
487 list_del(&sh->list);
488 list_add(&sh->list, &state->variants);
489 }
490
491 state->shader = sh;
492
493 return true;
494 }
495
496 /**
497 * Reset the shader cache.
498 */
499 static void
500 ilo_shader_cache_reset(struct ilo_shader_cache *shc)
501 {
502 if (shc->bo)
503 shc->bo->unreference(shc->bo);
504
505 shc->bo = shc->winsys->alloc_buffer(shc->winsys,
506 "shader cache", shc->size, 0);
507 shc->busy = false;
508 shc->cur = 0;
509 shc->seqno++;
510 if (!shc->seqno)
511 shc->seqno = 1;
512 }
513
514 /**
515 * Create a shader cache. A shader cache is a bo holding all compiled shaders.
516 * When the bo is full, a larger bo is allocated and all cached shaders are
517 * invalidated. This is how outdated shaders get dropped. Active shaders
518 * will be added to the new bo when used.
519 */
520 struct ilo_shader_cache *
521 ilo_shader_cache_create(struct intel_winsys *winsys)
522 {
523 struct ilo_shader_cache *shc;
524
525 shc = CALLOC_STRUCT(ilo_shader_cache);
526 if (!shc)
527 return NULL;
528
529 shc->winsys = winsys;
530 /* initial cache size */
531 shc->size = 4096;
532
533 ilo_shader_cache_reset(shc);
534
535 return shc;
536 }
537
538 /**
539 * Destroy a shader cache.
540 */
541 void
542 ilo_shader_cache_destroy(struct ilo_shader_cache *shc)
543 {
544 if (shc->bo)
545 shc->bo->unreference(shc->bo);
546
547 FREE(shc);
548 }
549
550 /**
551 * Add shaders to the cache. This may invalidate all other shaders in the
552 * cache.
553 */
554 void
555 ilo_shader_cache_set(struct ilo_shader_cache *shc,
556 struct ilo_shader **shaders,
557 int num_shaders)
558 {
559 int new_cur, i;
560
561 /* calculate the space needed */
562 new_cur = shc->cur;
563 for (i = 0; i < num_shaders; i++) {
564 if (shaders[i]->cache_seqno != shc->seqno)
565 new_cur = align(new_cur, 64) + shaders[i]->kernel_size;
566 }
567
568 /* all shaders are already in the cache */
569 if (new_cur == shc->cur)
570 return;
571
572 /*
573 * From the Sandy Bridge PRM, volume 4 part 2, page 112:
574 *
575 * "Due to prefetch of the instruction stream, the EUs may attempt to
576 * access up to 8 instructions (128 bytes) beyond the end of the kernel
577 * program - possibly into the next memory page. Although these
578 * instructions will not be executed, software must account for the
579 * prefetch in order to avoid invalid page access faults."
580 */
581 new_cur += 128;
582
583 /*
584 * we should be able to append data without being blocked even the bo
585 * is busy...
586 */
587
588 /* reallocate when the cache is full or busy */
589 if (new_cur > shc->size || shc->busy) {
590 while (new_cur > shc->size)
591 shc->size <<= 1;
592
593 ilo_shader_cache_reset(shc);
594 }
595
596 /* upload now */
597 for (i = 0; i < num_shaders; i++) {
598 if (shaders[i]->cache_seqno != shc->seqno) {
599 /* kernels must be aligned to 64-byte */
600 shc->cur = align(shc->cur, 64);
601 shc->bo->pwrite(shc->bo, shc->cur,
602 shaders[i]->kernel_size, shaders[i]->kernel);
603
604 shaders[i]->cache_seqno = shc->seqno;
605 shaders[i]->cache_offset = shc->cur;
606
607 shc->cur += shaders[i]->kernel_size;
608 }
609 }
610 }