ilo: switch to ilo states for shaders and resources
[mesa.git] / src / gallium / drivers / ilo / ilo_shader.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "tgsi/tgsi_parse.h"
29 #include "intel_winsys.h"
30
31 #include "ilo_shader.h"
32
33 /**
34 * Initialize a shader variant.
35 */
36 void
37 ilo_shader_variant_init(struct ilo_shader_variant *variant,
38 const struct ilo_shader_info *info,
39 const struct ilo_context *ilo)
40 {
41 int num_views, i;
42
43 memset(variant, 0, sizeof(*variant));
44
45 switch (info->type) {
46 case PIPE_SHADER_VERTEX:
47 variant->u.vs.rasterizer_discard =
48 ilo->rasterizer->state.rasterizer_discard;
49 variant->u.vs.num_ucps =
50 util_last_bit(ilo->rasterizer->state.clip_plane_enable);
51 break;
52 case PIPE_SHADER_GEOMETRY:
53 variant->u.gs.rasterizer_discard =
54 ilo->rasterizer->state.rasterizer_discard;
55 variant->u.gs.num_inputs = ilo->vs->shader->out.count;
56 for (i = 0; i < ilo->vs->shader->out.count; i++) {
57 variant->u.gs.semantic_names[i] =
58 ilo->vs->shader->out.semantic_names[i];
59 variant->u.gs.semantic_indices[i] =
60 ilo->vs->shader->out.semantic_indices[i];
61 }
62 break;
63 case PIPE_SHADER_FRAGMENT:
64 variant->u.fs.flatshade =
65 (info->has_color_interp && ilo->rasterizer->state.flatshade);
66 variant->u.fs.fb_height = (info->has_pos) ?
67 ilo->fb.state.height : 1;
68 variant->u.fs.num_cbufs = ilo->fb.state.nr_cbufs;
69 break;
70 default:
71 assert(!"unknown shader type");
72 break;
73 }
74
75 num_views = ilo->view[info->type].count;
76 assert(info->num_samplers <= num_views);
77
78 variant->num_sampler_views = info->num_samplers;
79 for (i = 0; i < info->num_samplers; i++) {
80 const struct pipe_sampler_view *view =
81 ilo->view[info->type].states[i];
82 const struct pipe_sampler_state *sampler =
83 ilo->sampler[info->type].states[i];
84
85 if (view) {
86 variant->sampler_view_swizzles[i].r = view->swizzle_r;
87 variant->sampler_view_swizzles[i].g = view->swizzle_g;
88 variant->sampler_view_swizzles[i].b = view->swizzle_b;
89 variant->sampler_view_swizzles[i].a = view->swizzle_a;
90 }
91 else if (info->shadow_samplers & (1 << i)) {
92 variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_RED;
93 variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_RED;
94 variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_RED;
95 variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_ONE;
96 }
97 else {
98 variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_RED;
99 variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_GREEN;
100 variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_BLUE;
101 variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_ALPHA;
102 }
103
104 /*
105 * When non-nearest filter and PIPE_TEX_WRAP_CLAMP wrap mode is used,
106 * the HW wrap mode is set to BRW_TEXCOORDMODE_CLAMP_BORDER, and we need
107 * to manually saturate the texture coordinates.
108 */
109 if (sampler && sampler->min_img_filter != PIPE_TEX_FILTER_NEAREST) {
110 if (sampler->wrap_s == PIPE_TEX_WRAP_CLAMP)
111 variant->saturate_tex_coords[0] |= 1 << i;
112 if (sampler->wrap_t == PIPE_TEX_WRAP_CLAMP)
113 variant->saturate_tex_coords[1] |= 1 << i;
114 if (sampler->wrap_r == PIPE_TEX_WRAP_CLAMP)
115 variant->saturate_tex_coords[2] |= 1 << i;
116 }
117 }
118 }
119
120 /**
121 * Guess the shader variant, knowing that the context may still change.
122 */
123 static void
124 ilo_shader_variant_guess(struct ilo_shader_variant *variant,
125 const struct ilo_shader_info *info,
126 const struct ilo_context *ilo)
127 {
128 int i;
129
130 memset(variant, 0, sizeof(*variant));
131
132 switch (info->type) {
133 case PIPE_SHADER_VERTEX:
134 break;
135 case PIPE_SHADER_GEOMETRY:
136 break;
137 case PIPE_SHADER_FRAGMENT:
138 variant->u.fs.flatshade = false;
139 variant->u.fs.fb_height = (info->has_pos) ?
140 ilo->fb.state.height : 1;
141 variant->u.fs.num_cbufs = 1;
142 break;
143 default:
144 assert(!"unknown shader type");
145 break;
146 }
147
148 variant->num_sampler_views = info->num_samplers;
149 for (i = 0; i < info->num_samplers; i++) {
150 if (info->shadow_samplers & (1 << i)) {
151 variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_RED;
152 variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_RED;
153 variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_RED;
154 variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_ONE;
155 }
156 else {
157 variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_RED;
158 variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_GREEN;
159 variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_BLUE;
160 variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_ALPHA;
161 }
162 }
163 }
164
165
166 /**
167 * Parse a TGSI instruction for the shader info.
168 */
169 static void
170 ilo_shader_info_parse_inst(struct ilo_shader_info *info,
171 const struct tgsi_full_instruction *inst)
172 {
173 int i;
174
175 /* look for edgeflag passthrough */
176 if (info->edgeflag_out >= 0 &&
177 inst->Instruction.Opcode == TGSI_OPCODE_MOV &&
178 inst->Dst[0].Register.File == TGSI_FILE_OUTPUT &&
179 inst->Dst[0].Register.Index == info->edgeflag_out) {
180
181 assert(inst->Src[0].Register.File == TGSI_FILE_INPUT);
182 info->edgeflag_in = inst->Src[0].Register.Index;
183 }
184
185 if (inst->Instruction.Texture) {
186 bool shadow;
187
188 switch (inst->Texture.Texture) {
189 case TGSI_TEXTURE_SHADOW1D:
190 case TGSI_TEXTURE_SHADOW2D:
191 case TGSI_TEXTURE_SHADOWRECT:
192 case TGSI_TEXTURE_SHADOW1D_ARRAY:
193 case TGSI_TEXTURE_SHADOW2D_ARRAY:
194 case TGSI_TEXTURE_SHADOWCUBE:
195 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
196 shadow = true;
197 break;
198 default:
199 shadow = false;
200 break;
201 }
202
203 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
204 const struct tgsi_full_src_register *src = &inst->Src[i];
205
206 if (src->Register.File == TGSI_FILE_SAMPLER) {
207 const int idx = src->Register.Index;
208
209 if (idx >= info->num_samplers)
210 info->num_samplers = idx + 1;
211
212 if (shadow)
213 info->shadow_samplers |= 1 << idx;
214 }
215 }
216 }
217 }
218
219 /**
220 * Parse a TGSI property for the shader info.
221 */
222 static void
223 ilo_shader_info_parse_prop(struct ilo_shader_info *info,
224 const struct tgsi_full_property *prop)
225 {
226 switch (prop->Property.PropertyName) {
227 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
228 info->fs_color0_writes_all_cbufs = prop->u[0].Data;
229 break;
230 default:
231 break;
232 }
233 }
234
235 /**
236 * Parse a TGSI declaration for the shader info.
237 */
238 static void
239 ilo_shader_info_parse_decl(struct ilo_shader_info *info,
240 const struct tgsi_full_declaration *decl)
241 {
242 switch (decl->Declaration.File) {
243 case TGSI_FILE_INPUT:
244 if (decl->Declaration.Interpolate &&
245 decl->Interp.Interpolate == TGSI_INTERPOLATE_COLOR)
246 info->has_color_interp = true;
247 if (decl->Declaration.Semantic &&
248 decl->Semantic.Name == TGSI_SEMANTIC_POSITION)
249 info->has_pos = true;
250 break;
251 case TGSI_FILE_OUTPUT:
252 if (decl->Declaration.Semantic &&
253 decl->Semantic.Name == TGSI_SEMANTIC_EDGEFLAG)
254 info->edgeflag_out = decl->Range.First;
255 break;
256 case TGSI_FILE_SYSTEM_VALUE:
257 if (decl->Declaration.Semantic &&
258 decl->Semantic.Name == TGSI_SEMANTIC_INSTANCEID)
259 info->has_instanceid = true;
260 if (decl->Declaration.Semantic &&
261 decl->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
262 info->has_vertexid = true;
263 break;
264 default:
265 break;
266 }
267 }
268
269 static void
270 ilo_shader_info_parse_tokens(struct ilo_shader_info *info)
271 {
272 struct tgsi_parse_context parse;
273
274 info->edgeflag_in = -1;
275 info->edgeflag_out = -1;
276
277 tgsi_parse_init(&parse, info->tokens);
278 while (!tgsi_parse_end_of_tokens(&parse)) {
279 const union tgsi_full_token *token;
280
281 tgsi_parse_token(&parse);
282 token = &parse.FullToken;
283
284 switch (token->Token.Type) {
285 case TGSI_TOKEN_TYPE_DECLARATION:
286 ilo_shader_info_parse_decl(info, &token->FullDeclaration);
287 break;
288 case TGSI_TOKEN_TYPE_INSTRUCTION:
289 ilo_shader_info_parse_inst(info, &token->FullInstruction);
290 break;
291 case TGSI_TOKEN_TYPE_PROPERTY:
292 ilo_shader_info_parse_prop(info, &token->FullProperty);
293 break;
294 default:
295 break;
296 }
297 }
298 tgsi_parse_free(&parse);
299 }
300
301 /**
302 * Create a shader state.
303 */
304 struct ilo_shader_state *
305 ilo_shader_state_create(const struct ilo_context *ilo,
306 int type, const void *templ)
307 {
308 struct ilo_shader_state *state;
309 struct ilo_shader_variant variant;
310
311 state = CALLOC_STRUCT(ilo_shader_state);
312 if (!state)
313 return NULL;
314
315 state->info.dev = ilo->dev;
316 state->info.type = type;
317
318 if (type == PIPE_SHADER_COMPUTE) {
319 const struct pipe_compute_state *c =
320 (const struct pipe_compute_state *) templ;
321
322 state->info.tokens = tgsi_dup_tokens(c->prog);
323 state->info.compute.req_local_mem = c->req_local_mem;
324 state->info.compute.req_private_mem = c->req_private_mem;
325 state->info.compute.req_input_mem = c->req_input_mem;
326 }
327 else {
328 const struct pipe_shader_state *s =
329 (const struct pipe_shader_state *) templ;
330
331 state->info.tokens = tgsi_dup_tokens(s->tokens);
332 state->info.stream_output = s->stream_output;
333 }
334
335 list_inithead(&state->variants);
336
337 ilo_shader_info_parse_tokens(&state->info);
338
339 /* guess and compile now */
340 ilo_shader_variant_guess(&variant, &state->info, ilo);
341 if (!ilo_shader_state_use_variant(state, &variant)) {
342 ilo_shader_state_destroy(state);
343 return NULL;
344 }
345
346 return state;
347 }
348
349 /**
350 * Destroy a shader state.
351 */
352 void
353 ilo_shader_state_destroy(struct ilo_shader_state *state)
354 {
355 struct ilo_shader *sh, *next;
356
357 LIST_FOR_EACH_ENTRY_SAFE(sh, next, &state->variants, list)
358 ilo_shader_destroy(sh);
359
360 FREE((struct tgsi_token *) state->info.tokens);
361 FREE(state);
362 }
363
364 /**
365 * Add a compiled shader to the shader state.
366 */
367 static void
368 ilo_shader_state_add_shader(struct ilo_shader_state *state,
369 struct ilo_shader *sh)
370 {
371 list_add(&sh->list, &state->variants);
372 state->num_variants++;
373 state->total_size += sh->kernel_size;
374 }
375
376 /**
377 * Remove a compiled shader from the shader state.
378 */
379 static void
380 ilo_shader_state_remove_shader(struct ilo_shader_state *state,
381 struct ilo_shader *sh)
382 {
383 list_del(&sh->list);
384 state->num_variants--;
385 state->total_size -= sh->kernel_size;
386 }
387
388 /**
389 * Garbage collect shader variants in the shader state.
390 */
391 static void
392 ilo_shader_state_gc(struct ilo_shader_state *state)
393 {
394 /* activate when the variants take up more than 4KiB of space */
395 const int limit = 4 * 1024;
396 struct ilo_shader *sh, *next;
397
398 if (state->total_size < limit)
399 return;
400
401 /* remove from the tail as the most recently ones are at the head */
402 LIST_FOR_EACH_ENTRY_SAFE_REV(sh, next, &state->variants, list) {
403 ilo_shader_state_remove_shader(state, sh);
404 ilo_shader_destroy(sh);
405
406 if (state->total_size <= limit / 2)
407 break;
408 }
409 }
410
411 /**
412 * Search for a shader variant.
413 */
414 static struct ilo_shader *
415 ilo_shader_state_search_variant(struct ilo_shader_state *state,
416 const struct ilo_shader_variant *variant)
417 {
418 struct ilo_shader *sh = NULL, *tmp;
419
420 LIST_FOR_EACH_ENTRY(tmp, &state->variants, list) {
421 if (memcmp(&tmp->variant, variant, sizeof(*variant)) == 0) {
422 sh = tmp;
423 break;
424 }
425 }
426
427 return sh;
428 }
429
430 /**
431 * Add a shader variant to the shader state.
432 */
433 struct ilo_shader *
434 ilo_shader_state_add_variant(struct ilo_shader_state *state,
435 const struct ilo_shader_variant *variant)
436 {
437 struct ilo_shader *sh;
438
439 sh = ilo_shader_state_search_variant(state, variant);
440 if (sh)
441 return sh;
442
443 ilo_shader_state_gc(state);
444
445 switch (state->info.type) {
446 case PIPE_SHADER_VERTEX:
447 sh = ilo_shader_compile_vs(state, variant);
448 break;
449 case PIPE_SHADER_FRAGMENT:
450 sh = ilo_shader_compile_fs(state, variant);
451 break;
452 case PIPE_SHADER_GEOMETRY:
453 sh = ilo_shader_compile_gs(state, variant);
454 break;
455 case PIPE_SHADER_COMPUTE:
456 sh = ilo_shader_compile_cs(state, variant);
457 break;
458 default:
459 sh = NULL;
460 break;
461 }
462 if (!sh) {
463 assert(!"failed to compile shader");
464 return NULL;
465 }
466
467 sh->variant = *variant;
468
469 ilo_shader_state_add_shader(state, sh);
470
471 return sh;
472 }
473
474 /**
475 * Update state->shader to point to a variant. If the variant does not exist,
476 * it will be added first.
477 */
478 bool
479 ilo_shader_state_use_variant(struct ilo_shader_state *state,
480 const struct ilo_shader_variant *variant)
481 {
482 struct ilo_shader *sh;
483
484 sh = ilo_shader_state_add_variant(state, variant);
485 if (!sh)
486 return false;
487
488 /* move to head */
489 if (state->variants.next != &sh->list) {
490 list_del(&sh->list);
491 list_add(&sh->list, &state->variants);
492 }
493
494 state->shader = sh;
495
496 return true;
497 }
498
499 /**
500 * Reset the shader cache.
501 */
502 static void
503 ilo_shader_cache_reset(struct ilo_shader_cache *shc)
504 {
505 if (shc->bo)
506 shc->bo->unreference(shc->bo);
507
508 shc->bo = shc->winsys->alloc_buffer(shc->winsys,
509 "shader cache", shc->size, 0);
510 shc->busy = false;
511 shc->cur = 0;
512 shc->seqno++;
513 if (!shc->seqno)
514 shc->seqno = 1;
515 }
516
517 /**
518 * Create a shader cache. A shader cache is a bo holding all compiled shaders.
519 * When the bo is full, a larger bo is allocated and all cached shaders are
520 * invalidated. This is how outdated shaders get dropped. Active shaders
521 * will be added to the new bo when used.
522 */
523 struct ilo_shader_cache *
524 ilo_shader_cache_create(struct intel_winsys *winsys)
525 {
526 struct ilo_shader_cache *shc;
527
528 shc = CALLOC_STRUCT(ilo_shader_cache);
529 if (!shc)
530 return NULL;
531
532 shc->winsys = winsys;
533 /* initial cache size */
534 shc->size = 4096;
535
536 ilo_shader_cache_reset(shc);
537
538 return shc;
539 }
540
541 /**
542 * Destroy a shader cache.
543 */
544 void
545 ilo_shader_cache_destroy(struct ilo_shader_cache *shc)
546 {
547 if (shc->bo)
548 shc->bo->unreference(shc->bo);
549
550 FREE(shc);
551 }
552
553 /**
554 * Add shaders to the cache. This may invalidate all other shaders in the
555 * cache.
556 */
557 void
558 ilo_shader_cache_set(struct ilo_shader_cache *shc,
559 struct ilo_shader **shaders,
560 int num_shaders)
561 {
562 int new_cur, i;
563
564 /* calculate the space needed */
565 new_cur = shc->cur;
566 for (i = 0; i < num_shaders; i++) {
567 if (shaders[i]->cache_seqno != shc->seqno)
568 new_cur = align(new_cur, 64) + shaders[i]->kernel_size;
569 }
570
571 /* all shaders are already in the cache */
572 if (new_cur == shc->cur)
573 return;
574
575 /*
576 * From the Sandy Bridge PRM, volume 4 part 2, page 112:
577 *
578 * "Due to prefetch of the instruction stream, the EUs may attempt to
579 * access up to 8 instructions (128 bytes) beyond the end of the kernel
580 * program - possibly into the next memory page. Although these
581 * instructions will not be executed, software must account for the
582 * prefetch in order to avoid invalid page access faults."
583 */
584 new_cur += 128;
585
586 /*
587 * we should be able to append data without being blocked even the bo
588 * is busy...
589 */
590
591 /* reallocate when the cache is full or busy */
592 if (new_cur > shc->size || shc->busy) {
593 while (new_cur > shc->size)
594 shc->size <<= 1;
595
596 ilo_shader_cache_reset(shc);
597 }
598
599 /* upload now */
600 for (i = 0; i < num_shaders; i++) {
601 if (shaders[i]->cache_seqno != shc->seqno) {
602 /* kernels must be aligned to 64-byte */
603 shc->cur = align(shc->cur, 64);
604 shc->bo->pwrite(shc->bo, shc->cur,
605 shaders[i]->kernel_size, shaders[i]->kernel);
606
607 shaders[i]->cache_seqno = shc->seqno;
608 shaders[i]->cache_offset = shc->cur;
609
610 shc->cur += shaders[i]->kernel_size;
611 }
612 }
613 }