ilo: preliminary GEN 7.5 support
[mesa.git] / src / gallium / drivers / ilo / ilo_shader.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "tgsi/tgsi_parse.h"
29 #include "intel_winsys.h"
30 #include "brw_defines.h" /* for SBE setup */
31
32 #include "shader/ilo_shader_internal.h"
33 #include "ilo_state.h"
34 #include "ilo_shader.h"
35
36 struct ilo_shader_cache {
37 struct list_head shaders;
38 struct list_head changed;
39 };
40
41 /**
42 * Create a shader cache. A shader cache can manage shaders and upload them
43 * to a bo as a whole.
44 */
45 struct ilo_shader_cache *
46 ilo_shader_cache_create(void)
47 {
48 struct ilo_shader_cache *shc;
49
50 shc = CALLOC_STRUCT(ilo_shader_cache);
51 if (!shc)
52 return NULL;
53
54 list_inithead(&shc->shaders);
55 list_inithead(&shc->changed);
56
57 return shc;
58 }
59
60 /**
61 * Destroy a shader cache.
62 */
63 void
64 ilo_shader_cache_destroy(struct ilo_shader_cache *shc)
65 {
66 FREE(shc);
67 }
68
69 /**
70 * Add a shader to the cache.
71 */
72 void
73 ilo_shader_cache_add(struct ilo_shader_cache *shc,
74 struct ilo_shader_state *shader)
75 {
76 struct ilo_shader *sh;
77
78 shader->cache = shc;
79 LIST_FOR_EACH_ENTRY(sh, &shader->variants, list)
80 sh->uploaded = false;
81
82 list_add(&shader->list, &shc->changed);
83 }
84
85 /**
86 * Remove a shader from the cache.
87 */
88 void
89 ilo_shader_cache_remove(struct ilo_shader_cache *shc,
90 struct ilo_shader_state *shader)
91 {
92 list_del(&shader->list);
93 shader->cache = NULL;
94 }
95
96 /**
97 * Notify the cache that a managed shader has changed.
98 */
99 static void
100 ilo_shader_cache_notify_change(struct ilo_shader_cache *shc,
101 struct ilo_shader_state *shader)
102 {
103 if (shader->cache == shc) {
104 list_del(&shader->list);
105 list_add(&shader->list, &shc->changed);
106 }
107 }
108
109 /**
110 * Upload a managed shader to the bo.
111 */
112 static int
113 ilo_shader_cache_upload_shader(struct ilo_shader_cache *shc,
114 struct ilo_shader_state *shader,
115 struct intel_bo *bo, unsigned offset,
116 bool incremental)
117 {
118 const unsigned base = offset;
119 struct ilo_shader *sh;
120
121 LIST_FOR_EACH_ENTRY(sh, &shader->variants, list) {
122 int err;
123
124 if (incremental && sh->uploaded)
125 continue;
126
127 /* kernels must be aligned to 64-byte */
128 offset = align(offset, 64);
129
130 err = intel_bo_pwrite(bo, offset, sh->kernel_size, sh->kernel);
131 if (unlikely(err))
132 return -1;
133
134 sh->uploaded = true;
135 sh->cache_offset = offset;
136
137 offset += sh->kernel_size;
138 }
139
140 return (int) (offset - base);
141 }
142
143 /**
144 * Similar to ilo_shader_cache_upload(), except no upload happens.
145 */
146 static int
147 ilo_shader_cache_get_upload_size(struct ilo_shader_cache *shc,
148 unsigned offset,
149 bool incremental)
150 {
151 const unsigned base = offset;
152 struct ilo_shader_state *shader;
153
154 if (!incremental) {
155 LIST_FOR_EACH_ENTRY(shader, &shc->shaders, list) {
156 struct ilo_shader *sh;
157
158 /* see ilo_shader_cache_upload_shader() */
159 LIST_FOR_EACH_ENTRY(sh, &shader->variants, list) {
160 if (!incremental || !sh->uploaded)
161 offset = align(offset, 64) + sh->kernel_size;
162 }
163 }
164 }
165
166 LIST_FOR_EACH_ENTRY(shader, &shc->changed, list) {
167 struct ilo_shader *sh;
168
169 /* see ilo_shader_cache_upload_shader() */
170 LIST_FOR_EACH_ENTRY(sh, &shader->variants, list) {
171 if (!incremental || !sh->uploaded)
172 offset = align(offset, 64) + sh->kernel_size;
173 }
174 }
175
176 /*
177 * From the Sandy Bridge PRM, volume 4 part 2, page 112:
178 *
179 * "Due to prefetch of the instruction stream, the EUs may attempt to
180 * access up to 8 instructions (128 bytes) beyond the end of the
181 * kernel program - possibly into the next memory page. Although
182 * these instructions will not be executed, software must account for
183 * the prefetch in order to avoid invalid page access faults."
184 */
185 if (offset > base)
186 offset += 128;
187
188 return (int) (offset - base);
189 }
190
191 /**
192 * Upload managed shaders to the bo. When incremental is true, only shaders
193 * that are changed or added after the last upload are uploaded.
194 */
195 int
196 ilo_shader_cache_upload(struct ilo_shader_cache *shc,
197 struct intel_bo *bo, unsigned offset,
198 bool incremental)
199 {
200 struct ilo_shader_state *shader, *next;
201 int size = 0, s;
202
203 if (!bo)
204 return ilo_shader_cache_get_upload_size(shc, offset, incremental);
205
206 if (!incremental) {
207 LIST_FOR_EACH_ENTRY(shader, &shc->shaders, list) {
208 s = ilo_shader_cache_upload_shader(shc, shader,
209 bo, offset, incremental);
210 if (unlikely(s < 0))
211 return s;
212
213 size += s;
214 offset += s;
215 }
216 }
217
218 LIST_FOR_EACH_ENTRY_SAFE(shader, next, &shc->changed, list) {
219 s = ilo_shader_cache_upload_shader(shc, shader,
220 bo, offset, incremental);
221 if (unlikely(s < 0))
222 return s;
223
224 size += s;
225 offset += s;
226
227 list_del(&shader->list);
228 list_add(&shader->list, &shc->shaders);
229 }
230
231 return size;
232 }
233
234 /**
235 * Initialize a shader variant.
236 */
237 void
238 ilo_shader_variant_init(struct ilo_shader_variant *variant,
239 const struct ilo_shader_info *info,
240 const struct ilo_context *ilo)
241 {
242 int num_views, i;
243
244 memset(variant, 0, sizeof(*variant));
245
246 switch (info->type) {
247 case PIPE_SHADER_VERTEX:
248 variant->u.vs.rasterizer_discard =
249 ilo->rasterizer->state.rasterizer_discard;
250 variant->u.vs.num_ucps =
251 util_last_bit(ilo->rasterizer->state.clip_plane_enable);
252 break;
253 case PIPE_SHADER_GEOMETRY:
254 variant->u.gs.rasterizer_discard =
255 ilo->rasterizer->state.rasterizer_discard;
256 variant->u.gs.num_inputs = ilo->vs->shader->out.count;
257 for (i = 0; i < ilo->vs->shader->out.count; i++) {
258 variant->u.gs.semantic_names[i] =
259 ilo->vs->shader->out.semantic_names[i];
260 variant->u.gs.semantic_indices[i] =
261 ilo->vs->shader->out.semantic_indices[i];
262 }
263 break;
264 case PIPE_SHADER_FRAGMENT:
265 variant->u.fs.flatshade =
266 (info->has_color_interp && ilo->rasterizer->state.flatshade);
267 variant->u.fs.fb_height = (info->has_pos) ?
268 ilo->fb.state.height : 1;
269 variant->u.fs.num_cbufs = ilo->fb.state.nr_cbufs;
270 break;
271 default:
272 assert(!"unknown shader type");
273 break;
274 }
275
276 /* use PCB unless constant buffer 0 is not in user buffer */
277 if ((ilo->cbuf[info->type].enabled_mask & 0x1) &&
278 !ilo->cbuf[info->type].cso[0].user_buffer)
279 variant->use_pcb = false;
280 else
281 variant->use_pcb = true;
282
283 num_views = ilo->view[info->type].count;
284 assert(info->num_samplers <= num_views);
285
286 variant->num_sampler_views = info->num_samplers;
287 for (i = 0; i < info->num_samplers; i++) {
288 const struct pipe_sampler_view *view = ilo->view[info->type].states[i];
289 const struct ilo_sampler_cso *sampler = ilo->sampler[info->type].cso[i];
290
291 if (view) {
292 variant->sampler_view_swizzles[i].r = view->swizzle_r;
293 variant->sampler_view_swizzles[i].g = view->swizzle_g;
294 variant->sampler_view_swizzles[i].b = view->swizzle_b;
295 variant->sampler_view_swizzles[i].a = view->swizzle_a;
296 }
297 else if (info->shadow_samplers & (1 << i)) {
298 variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_RED;
299 variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_RED;
300 variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_RED;
301 variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_ONE;
302 }
303 else {
304 variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_RED;
305 variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_GREEN;
306 variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_BLUE;
307 variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_ALPHA;
308 }
309
310 /*
311 * When non-nearest filter and PIPE_TEX_WRAP_CLAMP wrap mode is used,
312 * the HW wrap mode is set to BRW_TEXCOORDMODE_CLAMP_BORDER, and we need
313 * to manually saturate the texture coordinates.
314 */
315 if (sampler) {
316 variant->saturate_tex_coords[0] |= sampler->saturate_s << i;
317 variant->saturate_tex_coords[1] |= sampler->saturate_t << i;
318 variant->saturate_tex_coords[2] |= sampler->saturate_r << i;
319 }
320 }
321 }
322
323 /**
324 * Guess the shader variant, knowing that the context may still change.
325 */
326 static void
327 ilo_shader_variant_guess(struct ilo_shader_variant *variant,
328 const struct ilo_shader_info *info,
329 const struct ilo_context *ilo)
330 {
331 int i;
332
333 memset(variant, 0, sizeof(*variant));
334
335 switch (info->type) {
336 case PIPE_SHADER_VERTEX:
337 break;
338 case PIPE_SHADER_GEOMETRY:
339 break;
340 case PIPE_SHADER_FRAGMENT:
341 variant->u.fs.flatshade = false;
342 variant->u.fs.fb_height = (info->has_pos) ?
343 ilo->fb.state.height : 1;
344 variant->u.fs.num_cbufs = 1;
345 break;
346 default:
347 assert(!"unknown shader type");
348 break;
349 }
350
351 variant->use_pcb = true;
352
353 variant->num_sampler_views = info->num_samplers;
354 for (i = 0; i < info->num_samplers; i++) {
355 if (info->shadow_samplers & (1 << i)) {
356 variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_RED;
357 variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_RED;
358 variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_RED;
359 variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_ONE;
360 }
361 else {
362 variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_RED;
363 variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_GREEN;
364 variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_BLUE;
365 variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_ALPHA;
366 }
367 }
368 }
369
370
371 /**
372 * Parse a TGSI instruction for the shader info.
373 */
374 static void
375 ilo_shader_info_parse_inst(struct ilo_shader_info *info,
376 const struct tgsi_full_instruction *inst)
377 {
378 int i;
379
380 /* look for edgeflag passthrough */
381 if (info->edgeflag_out >= 0 &&
382 inst->Instruction.Opcode == TGSI_OPCODE_MOV &&
383 inst->Dst[0].Register.File == TGSI_FILE_OUTPUT &&
384 inst->Dst[0].Register.Index == info->edgeflag_out) {
385
386 assert(inst->Src[0].Register.File == TGSI_FILE_INPUT);
387 info->edgeflag_in = inst->Src[0].Register.Index;
388 }
389
390 if (inst->Instruction.Texture) {
391 bool shadow;
392
393 switch (inst->Texture.Texture) {
394 case TGSI_TEXTURE_SHADOW1D:
395 case TGSI_TEXTURE_SHADOW2D:
396 case TGSI_TEXTURE_SHADOWRECT:
397 case TGSI_TEXTURE_SHADOW1D_ARRAY:
398 case TGSI_TEXTURE_SHADOW2D_ARRAY:
399 case TGSI_TEXTURE_SHADOWCUBE:
400 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
401 shadow = true;
402 break;
403 default:
404 shadow = false;
405 break;
406 }
407
408 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
409 const struct tgsi_full_src_register *src = &inst->Src[i];
410
411 if (src->Register.File == TGSI_FILE_SAMPLER) {
412 const int idx = src->Register.Index;
413
414 if (idx >= info->num_samplers)
415 info->num_samplers = idx + 1;
416
417 if (shadow)
418 info->shadow_samplers |= 1 << idx;
419 }
420 }
421 }
422 }
423
424 /**
425 * Parse a TGSI property for the shader info.
426 */
427 static void
428 ilo_shader_info_parse_prop(struct ilo_shader_info *info,
429 const struct tgsi_full_property *prop)
430 {
431 switch (prop->Property.PropertyName) {
432 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
433 info->fs_color0_writes_all_cbufs = prop->u[0].Data;
434 break;
435 default:
436 break;
437 }
438 }
439
440 /**
441 * Parse a TGSI declaration for the shader info.
442 */
443 static void
444 ilo_shader_info_parse_decl(struct ilo_shader_info *info,
445 const struct tgsi_full_declaration *decl)
446 {
447 switch (decl->Declaration.File) {
448 case TGSI_FILE_INPUT:
449 if (decl->Declaration.Interpolate &&
450 decl->Interp.Interpolate == TGSI_INTERPOLATE_COLOR)
451 info->has_color_interp = true;
452 if (decl->Declaration.Semantic &&
453 decl->Semantic.Name == TGSI_SEMANTIC_POSITION)
454 info->has_pos = true;
455 break;
456 case TGSI_FILE_OUTPUT:
457 if (decl->Declaration.Semantic &&
458 decl->Semantic.Name == TGSI_SEMANTIC_EDGEFLAG)
459 info->edgeflag_out = decl->Range.First;
460 break;
461 case TGSI_FILE_SYSTEM_VALUE:
462 if (decl->Declaration.Semantic &&
463 decl->Semantic.Name == TGSI_SEMANTIC_INSTANCEID)
464 info->has_instanceid = true;
465 if (decl->Declaration.Semantic &&
466 decl->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
467 info->has_vertexid = true;
468 break;
469 default:
470 break;
471 }
472 }
473
474 static void
475 ilo_shader_info_parse_tokens(struct ilo_shader_info *info)
476 {
477 struct tgsi_parse_context parse;
478
479 info->edgeflag_in = -1;
480 info->edgeflag_out = -1;
481
482 tgsi_parse_init(&parse, info->tokens);
483 while (!tgsi_parse_end_of_tokens(&parse)) {
484 const union tgsi_full_token *token;
485
486 tgsi_parse_token(&parse);
487 token = &parse.FullToken;
488
489 switch (token->Token.Type) {
490 case TGSI_TOKEN_TYPE_DECLARATION:
491 ilo_shader_info_parse_decl(info, &token->FullDeclaration);
492 break;
493 case TGSI_TOKEN_TYPE_INSTRUCTION:
494 ilo_shader_info_parse_inst(info, &token->FullInstruction);
495 break;
496 case TGSI_TOKEN_TYPE_PROPERTY:
497 ilo_shader_info_parse_prop(info, &token->FullProperty);
498 break;
499 default:
500 break;
501 }
502 }
503 tgsi_parse_free(&parse);
504 }
505
506 /**
507 * Create a shader state.
508 */
509 static struct ilo_shader_state *
510 ilo_shader_state_create(const struct ilo_context *ilo,
511 int type, const void *templ)
512 {
513 struct ilo_shader_state *state;
514 struct ilo_shader_variant variant;
515
516 state = CALLOC_STRUCT(ilo_shader_state);
517 if (!state)
518 return NULL;
519
520 state->info.dev = ilo->dev;
521 state->info.type = type;
522
523 if (type == PIPE_SHADER_COMPUTE) {
524 const struct pipe_compute_state *c =
525 (const struct pipe_compute_state *) templ;
526
527 state->info.tokens = tgsi_dup_tokens(c->prog);
528 state->info.compute.req_local_mem = c->req_local_mem;
529 state->info.compute.req_private_mem = c->req_private_mem;
530 state->info.compute.req_input_mem = c->req_input_mem;
531 }
532 else {
533 const struct pipe_shader_state *s =
534 (const struct pipe_shader_state *) templ;
535
536 state->info.tokens = tgsi_dup_tokens(s->tokens);
537 state->info.stream_output = s->stream_output;
538 }
539
540 list_inithead(&state->variants);
541
542 ilo_shader_info_parse_tokens(&state->info);
543
544 /* guess and compile now */
545 ilo_shader_variant_guess(&variant, &state->info, ilo);
546 if (!ilo_shader_state_use_variant(state, &variant)) {
547 ilo_shader_destroy(state);
548 return NULL;
549 }
550
551 return state;
552 }
553
554 /**
555 * Add a compiled shader to the shader state.
556 */
557 static void
558 ilo_shader_state_add_shader(struct ilo_shader_state *state,
559 struct ilo_shader *sh)
560 {
561 list_add(&sh->list, &state->variants);
562 state->num_variants++;
563 state->total_size += sh->kernel_size;
564
565 if (state->cache)
566 ilo_shader_cache_notify_change(state->cache, state);
567 }
568
569 /**
570 * Remove a compiled shader from the shader state.
571 */
572 static void
573 ilo_shader_state_remove_shader(struct ilo_shader_state *state,
574 struct ilo_shader *sh)
575 {
576 list_del(&sh->list);
577 state->num_variants--;
578 state->total_size -= sh->kernel_size;
579 }
580
581 /**
582 * Garbage collect shader variants in the shader state.
583 */
584 static void
585 ilo_shader_state_gc(struct ilo_shader_state *state)
586 {
587 /* activate when the variants take up more than 4KiB of space */
588 const int limit = 4 * 1024;
589 struct ilo_shader *sh, *next;
590
591 if (state->total_size < limit)
592 return;
593
594 /* remove from the tail as the most recently ones are at the head */
595 LIST_FOR_EACH_ENTRY_SAFE_REV(sh, next, &state->variants, list) {
596 ilo_shader_state_remove_shader(state, sh);
597 ilo_shader_destroy_kernel(sh);
598
599 if (state->total_size <= limit / 2)
600 break;
601 }
602 }
603
604 /**
605 * Search for a shader variant.
606 */
607 static struct ilo_shader *
608 ilo_shader_state_search_variant(struct ilo_shader_state *state,
609 const struct ilo_shader_variant *variant)
610 {
611 struct ilo_shader *sh = NULL, *tmp;
612
613 LIST_FOR_EACH_ENTRY(tmp, &state->variants, list) {
614 if (memcmp(&tmp->variant, variant, sizeof(*variant)) == 0) {
615 sh = tmp;
616 break;
617 }
618 }
619
620 return sh;
621 }
622
623 static void
624 copy_so_info(struct ilo_shader *sh,
625 const struct pipe_stream_output_info *so_info)
626 {
627 unsigned i, attr;
628
629 if (!so_info->num_outputs)
630 return;
631
632 sh->so_info = *so_info;
633
634 for (i = 0; i < so_info->num_outputs; i++) {
635 /* figure out which attribute is sourced */
636 for (attr = 0; attr < sh->out.count; attr++) {
637 const int reg_idx = sh->out.register_indices[attr];
638 if (reg_idx == so_info->output[i].register_index)
639 break;
640 }
641
642 if (attr < sh->out.count) {
643 sh->so_info.output[i].register_index = attr;
644 }
645 else {
646 assert(!"stream output an undefined register");
647 sh->so_info.output[i].register_index = 0;
648 }
649
650 /* PSIZE is at W channel */
651 if (sh->out.semantic_names[attr] == TGSI_SEMANTIC_PSIZE) {
652 assert(so_info->output[i].start_component == 0);
653 assert(so_info->output[i].num_components == 1);
654 sh->so_info.output[i].start_component = 3;
655 }
656 }
657 }
658
659 /**
660 * Add a shader variant to the shader state.
661 */
662 static struct ilo_shader *
663 ilo_shader_state_add_variant(struct ilo_shader_state *state,
664 const struct ilo_shader_variant *variant)
665 {
666 struct ilo_shader *sh;
667
668 switch (state->info.type) {
669 case PIPE_SHADER_VERTEX:
670 sh = ilo_shader_compile_vs(state, variant);
671 break;
672 case PIPE_SHADER_FRAGMENT:
673 sh = ilo_shader_compile_fs(state, variant);
674 break;
675 case PIPE_SHADER_GEOMETRY:
676 sh = ilo_shader_compile_gs(state, variant);
677 break;
678 case PIPE_SHADER_COMPUTE:
679 sh = ilo_shader_compile_cs(state, variant);
680 break;
681 default:
682 sh = NULL;
683 break;
684 }
685 if (!sh) {
686 assert(!"failed to compile shader");
687 return NULL;
688 }
689
690 sh->variant = *variant;
691
692 copy_so_info(sh, &state->info.stream_output);
693
694 ilo_shader_state_add_shader(state, sh);
695
696 return sh;
697 }
698
699 /**
700 * Update state->shader to point to a variant. If the variant does not exist,
701 * it will be added first.
702 */
703 bool
704 ilo_shader_state_use_variant(struct ilo_shader_state *state,
705 const struct ilo_shader_variant *variant)
706 {
707 struct ilo_shader *sh;
708 bool construct_cso = false;
709
710 sh = ilo_shader_state_search_variant(state, variant);
711 if (!sh) {
712 ilo_shader_state_gc(state);
713
714 sh = ilo_shader_state_add_variant(state, variant);
715 if (!sh)
716 return false;
717
718 construct_cso = true;
719 }
720
721 /* move to head */
722 if (state->variants.next != &sh->list) {
723 list_del(&sh->list);
724 list_add(&sh->list, &state->variants);
725 }
726
727 state->shader = sh;
728
729 if (construct_cso) {
730 switch (state->info.type) {
731 case PIPE_SHADER_VERTEX:
732 ilo_gpe_init_vs_cso(state->info.dev, state, &sh->cso);
733 break;
734 case PIPE_SHADER_GEOMETRY:
735 ilo_gpe_init_gs_cso(state->info.dev, state, &sh->cso);
736 break;
737 case PIPE_SHADER_FRAGMENT:
738 ilo_gpe_init_fs_cso(state->info.dev, state, &sh->cso);
739 break;
740 default:
741 break;
742 }
743 }
744
745 return true;
746 }
747
748 struct ilo_shader_state *
749 ilo_shader_create_vs(const struct ilo_dev_info *dev,
750 const struct pipe_shader_state *state,
751 const struct ilo_context *precompile)
752 {
753 struct ilo_shader_state *shader;
754
755 shader = ilo_shader_state_create(precompile, PIPE_SHADER_VERTEX, state);
756
757 /* states used in ilo_shader_variant_init() */
758 shader->info.non_orthogonal_states = ILO_DIRTY_VIEW_VS |
759 ILO_DIRTY_RASTERIZER |
760 ILO_DIRTY_CBUF;
761
762 return shader;
763 }
764
765 struct ilo_shader_state *
766 ilo_shader_create_gs(const struct ilo_dev_info *dev,
767 const struct pipe_shader_state *state,
768 const struct ilo_context *precompile)
769 {
770 struct ilo_shader_state *shader;
771
772 shader = ilo_shader_state_create(precompile, PIPE_SHADER_GEOMETRY, state);
773
774 /* states used in ilo_shader_variant_init() */
775 shader->info.non_orthogonal_states = ILO_DIRTY_VIEW_GS |
776 ILO_DIRTY_VS |
777 ILO_DIRTY_RASTERIZER |
778 ILO_DIRTY_CBUF;
779
780 return shader;
781 }
782
783 struct ilo_shader_state *
784 ilo_shader_create_fs(const struct ilo_dev_info *dev,
785 const struct pipe_shader_state *state,
786 const struct ilo_context *precompile)
787 {
788 struct ilo_shader_state *shader;
789
790 shader = ilo_shader_state_create(precompile, PIPE_SHADER_FRAGMENT, state);
791
792 /* states used in ilo_shader_variant_init() */
793 shader->info.non_orthogonal_states = ILO_DIRTY_VIEW_FS |
794 ILO_DIRTY_RASTERIZER |
795 ILO_DIRTY_FB |
796 ILO_DIRTY_CBUF;
797
798 return shader;
799 }
800
801 struct ilo_shader_state *
802 ilo_shader_create_cs(const struct ilo_dev_info *dev,
803 const struct pipe_compute_state *state,
804 const struct ilo_context *precompile)
805 {
806 struct ilo_shader_state *shader;
807
808 shader = ilo_shader_state_create(precompile, PIPE_SHADER_COMPUTE, state);
809
810 shader->info.non_orthogonal_states = 0;
811
812 return shader;
813 }
814
815 /**
816 * Destroy a shader state.
817 */
818 void
819 ilo_shader_destroy(struct ilo_shader_state *shader)
820 {
821 struct ilo_shader *sh, *next;
822
823 LIST_FOR_EACH_ENTRY_SAFE(sh, next, &shader->variants, list)
824 ilo_shader_destroy_kernel(sh);
825
826 FREE((struct tgsi_token *) shader->info.tokens);
827 FREE(shader);
828 }
829
830 /**
831 * Return the type (PIPE_SHADER_x) of the shader.
832 */
833 int
834 ilo_shader_get_type(const struct ilo_shader_state *shader)
835 {
836 return shader->info.type;
837 }
838
839 /**
840 * Select a kernel for the given context. This will compile a new kernel if
841 * none of the existing kernels work with the context.
842 *
843 * \param ilo the context
844 * \param dirty states of the context that are considered changed
845 * \return true if a different kernel is selected
846 */
847 bool
848 ilo_shader_select_kernel(struct ilo_shader_state *shader,
849 const struct ilo_context *ilo,
850 uint32_t dirty)
851 {
852 const struct ilo_shader * const cur = shader->shader;
853 struct ilo_shader_variant variant;
854
855 if (!(shader->info.non_orthogonal_states & dirty))
856 return false;
857
858 ilo_shader_variant_init(&variant, &shader->info, ilo);
859 ilo_shader_state_use_variant(shader, &variant);
860
861 return (shader->shader != cur);
862 }
863
864 static int
865 route_attr(const int *semantics, const int *indices, int len,
866 int semantic, int index)
867 {
868 int i;
869
870 for (i = 0; i < len; i++) {
871 if (semantics[i] == semantic && indices[i] == index)
872 return i;
873 }
874
875 /* failed to match for COLOR, try BCOLOR */
876 if (semantic == TGSI_SEMANTIC_COLOR) {
877 for (i = 0; i < len; i++) {
878 if (semantics[i] == TGSI_SEMANTIC_BCOLOR && indices[i] == index)
879 return i;
880 }
881 }
882
883 return -1;
884 }
885
886 /**
887 * Select a routing for the given source shader and rasterizer state.
888 *
889 * \return true if a different routing is selected
890 */
891 bool
892 ilo_shader_select_kernel_routing(struct ilo_shader_state *shader,
893 const struct ilo_shader_state *source,
894 const struct ilo_rasterizer_state *rasterizer)
895 {
896 const uint32_t sprite_coord_enable = rasterizer->state.sprite_coord_enable;
897 const bool light_twoside = rasterizer->state.light_twoside;
898 struct ilo_shader *kernel = shader->shader;
899 struct ilo_kernel_routing *routing = &kernel->routing;
900 const int *src_semantics, *src_indices;
901 int src_len, max_src_slot;
902 int dst_len, dst_slot;
903
904 /* we are constructing 3DSTATE_SBE here */
905 assert(shader->info.dev->gen >= ILO_GEN(6) &&
906 shader->info.dev->gen <= ILO_GEN(7.5));
907
908 assert(kernel);
909
910 if (source) {
911 assert(source->shader);
912 src_semantics = source->shader->out.semantic_names;
913 src_indices = source->shader->out.semantic_indices;
914 src_len = source->shader->out.count;
915 }
916 else {
917 src_semantics = kernel->in.semantic_names;
918 src_indices = kernel->in.semantic_indices;
919 src_len = kernel->in.count;
920 }
921
922 /* no change */
923 if (kernel->routing_initialized &&
924 routing->source_skip + routing->source_len <= src_len &&
925 kernel->routing_sprite_coord_enable == sprite_coord_enable &&
926 !memcmp(kernel->routing_src_semantics,
927 &src_semantics[routing->source_skip],
928 sizeof(kernel->routing_src_semantics[0]) * routing->source_len) &&
929 !memcmp(kernel->routing_src_indices,
930 &src_indices[routing->source_skip],
931 sizeof(kernel->routing_src_indices[0]) * routing->source_len))
932 return false;
933
934 if (source) {
935 /* skip PSIZE and POSITION (how about the optional CLIPDISTs?) */
936 assert(src_semantics[0] == TGSI_SEMANTIC_PSIZE);
937 assert(src_semantics[1] == TGSI_SEMANTIC_POSITION);
938 routing->source_skip = 2;
939
940 routing->source_len = src_len - routing->source_skip;
941 src_semantics += routing->source_skip;
942 src_indices += routing->source_skip;
943 }
944 else {
945 routing->source_skip = 0;
946 routing->source_len = src_len;
947 }
948
949 routing->const_interp_enable = kernel->in.const_interp_enable;
950 routing->point_sprite_enable = 0;
951 routing->swizzle_enable = false;
952
953 assert(kernel->in.count <= Elements(routing->swizzles));
954 dst_len = MIN2(kernel->in.count, Elements(routing->swizzles));
955 max_src_slot = -1;
956
957 for (dst_slot = 0; dst_slot < dst_len; dst_slot++) {
958 const int semantic = kernel->in.semantic_names[dst_slot];
959 const int index = kernel->in.semantic_indices[dst_slot];
960 int src_slot;
961
962 if (semantic == TGSI_SEMANTIC_GENERIC &&
963 (sprite_coord_enable & (1 << index)))
964 routing->point_sprite_enable |= 1 << dst_slot;
965
966 if (source) {
967 src_slot = route_attr(src_semantics, src_indices,
968 routing->source_len, semantic, index);
969
970 /*
971 * The source shader stage does not output this attribute. The value
972 * is supposed to be undefined, unless the attribute goes through
973 * point sprite replacement or the attribute is
974 * TGSI_SEMANTIC_POSITION. In all cases, we do not care which source
975 * attribute is picked.
976 *
977 * We should update the kernel code and omit the output of
978 * TGSI_SEMANTIC_POSITION here.
979 */
980 if (src_slot < 0)
981 src_slot = 0;
982 }
983 else {
984 src_slot = dst_slot;
985 }
986
987 routing->swizzles[dst_slot] = src_slot;
988
989 /* use the following slot for two-sided lighting */
990 if (semantic == TGSI_SEMANTIC_COLOR && light_twoside &&
991 src_slot + 1 < routing->source_len &&
992 src_semantics[src_slot + 1] == TGSI_SEMANTIC_BCOLOR &&
993 src_indices[src_slot + 1] == index) {
994 routing->swizzles[dst_slot] |= ATTRIBUTE_SWIZZLE_INPUTATTR_FACING <<
995 ATTRIBUTE_SWIZZLE_SHIFT;
996 src_slot++;
997 }
998
999 if (routing->swizzles[dst_slot] != dst_slot)
1000 routing->swizzle_enable = true;
1001
1002 if (max_src_slot < src_slot)
1003 max_src_slot = src_slot;
1004 }
1005
1006 memset(&routing->swizzles[dst_slot], 0, sizeof(routing->swizzles) -
1007 sizeof(routing->swizzles[0]) * dst_slot);
1008
1009 /*
1010 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
1011 *
1012 * "It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
1013 * 0 indicating no Vertex URB data to be read.
1014 *
1015 * This field should be set to the minimum length required to read the
1016 * maximum source attribute. The maximum source attribute is indicated
1017 * by the maximum value of the enabled Attribute # Source Attribute if
1018 * Attribute Swizzle Enable is set, Number of Output Attributes-1 if
1019 * enable is not set.
1020 *
1021 * read_length = ceiling((max_source_attr+1)/2)
1022 *
1023 * [errata] Corruption/Hang possible if length programmed larger than
1024 * recommended"
1025 */
1026 routing->source_len = max_src_slot + 1;
1027
1028 /* remember the states of the source */
1029 kernel->routing_initialized = true;
1030 kernel->routing_sprite_coord_enable = sprite_coord_enable;
1031 memcpy(kernel->routing_src_semantics, src_semantics,
1032 sizeof(kernel->routing_src_semantics[0]) * routing->source_len);
1033 memcpy(kernel->routing_src_indices, src_indices,
1034 sizeof(kernel->routing_src_indices[0]) * routing->source_len);
1035
1036 return true;
1037 }
1038
1039 /**
1040 * Return the cache offset of the selected kernel. This must be called after
1041 * ilo_shader_select_kernel() and ilo_shader_cache_upload().
1042 */
1043 uint32_t
1044 ilo_shader_get_kernel_offset(const struct ilo_shader_state *shader)
1045 {
1046 const struct ilo_shader *kernel = shader->shader;
1047
1048 assert(kernel && kernel->uploaded);
1049
1050 return kernel->cache_offset;
1051 }
1052
1053 /**
1054 * Query a kernel parameter for the selected kernel.
1055 */
1056 int
1057 ilo_shader_get_kernel_param(const struct ilo_shader_state *shader,
1058 enum ilo_kernel_param param)
1059 {
1060 const struct ilo_shader *kernel = shader->shader;
1061 int val;
1062
1063 assert(kernel);
1064
1065 switch (param) {
1066 case ILO_KERNEL_INPUT_COUNT:
1067 val = kernel->in.count;
1068 break;
1069 case ILO_KERNEL_OUTPUT_COUNT:
1070 val = kernel->out.count;
1071 break;
1072 case ILO_KERNEL_URB_DATA_START_REG:
1073 val = kernel->in.start_grf;
1074 break;
1075 case ILO_KERNEL_SKIP_CBUF0_UPLOAD:
1076 val = kernel->skip_cbuf0_upload;
1077 break;
1078 case ILO_KERNEL_PCB_CBUF0_SIZE:
1079 val = kernel->pcb.cbuf0_size;
1080 break;
1081
1082 case ILO_KERNEL_VS_INPUT_INSTANCEID:
1083 val = shader->info.has_instanceid;
1084 break;
1085 case ILO_KERNEL_VS_INPUT_VERTEXID:
1086 val = shader->info.has_vertexid;
1087 break;
1088 case ILO_KERNEL_VS_INPUT_EDGEFLAG:
1089 if (shader->info.edgeflag_in >= 0) {
1090 /* we rely on the state tracker here */
1091 assert(shader->info.edgeflag_in == kernel->in.count - 1);
1092 val = true;
1093 }
1094 else {
1095 val = false;
1096 }
1097 break;
1098 case ILO_KERNEL_VS_PCB_UCP_SIZE:
1099 val = kernel->pcb.clip_state_size;
1100 break;
1101 case ILO_KERNEL_VS_GEN6_SO:
1102 val = kernel->stream_output;
1103 break;
1104 case ILO_KERNEL_VS_GEN6_SO_START_REG:
1105 val = kernel->gs_start_grf;
1106 break;
1107 case ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET:
1108 val = kernel->gs_offsets[0];
1109 break;
1110 case ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET:
1111 val = kernel->gs_offsets[1];
1112 break;
1113 case ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET:
1114 val = kernel->gs_offsets[2];
1115 break;
1116
1117 case ILO_KERNEL_GS_DISCARD_ADJACENCY:
1118 val = kernel->in.discard_adj;
1119 break;
1120 case ILO_KERNEL_GS_GEN6_SVBI_POST_INC:
1121 val = kernel->svbi_post_inc;
1122 break;
1123
1124 case ILO_KERNEL_FS_INPUT_Z:
1125 case ILO_KERNEL_FS_INPUT_W:
1126 val = kernel->in.has_pos;
1127 break;
1128 case ILO_KERNEL_FS_OUTPUT_Z:
1129 val = kernel->out.has_pos;
1130 break;
1131 case ILO_KERNEL_FS_USE_KILL:
1132 val = kernel->has_kill;
1133 break;
1134 case ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS:
1135 val = kernel->in.barycentric_interpolation_mode;
1136 break;
1137 case ILO_KERNEL_FS_DISPATCH_16_OFFSET:
1138 val = 0;
1139 break;
1140
1141 default:
1142 assert(!"unknown kernel parameter");
1143 val = 0;
1144 break;
1145 }
1146
1147 return val;
1148 }
1149
1150 /**
1151 * Return the CSO of the selected kernel.
1152 */
1153 const struct ilo_shader_cso *
1154 ilo_shader_get_kernel_cso(const struct ilo_shader_state *shader)
1155 {
1156 const struct ilo_shader *kernel = shader->shader;
1157
1158 assert(kernel);
1159
1160 return &kernel->cso;
1161 }
1162
1163 /**
1164 * Return the SO info of the selected kernel.
1165 */
1166 const struct pipe_stream_output_info *
1167 ilo_shader_get_kernel_so_info(const struct ilo_shader_state *shader)
1168 {
1169 const struct ilo_shader *kernel = shader->shader;
1170
1171 assert(kernel);
1172
1173 return &kernel->so_info;
1174 }
1175
1176 /**
1177 * Return the routing info of the selected kernel.
1178 */
1179 const struct ilo_kernel_routing *
1180 ilo_shader_get_kernel_routing(const struct ilo_shader_state *shader)
1181 {
1182 const struct ilo_shader *kernel = shader->shader;
1183
1184 assert(kernel);
1185
1186 return &kernel->routing;
1187 }