st/mesa: implement a tgsi on-disk shader cache
[mesa.git] / src / mesa / state_tracker / st_program.c
1 /**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 * Brian Paul
31 */
32
33
34 #include "main/imports.h"
35 #include "main/hash.h"
36 #include "main/mtypes.h"
37 #include "program/prog_parameter.h"
38 #include "program/prog_print.h"
39 #include "program/programopt.h"
40
41 #include "compiler/nir/nir.h"
42
43 #include "pipe/p_context.h"
44 #include "pipe/p_defines.h"
45 #include "pipe/p_shader_tokens.h"
46 #include "draw/draw_context.h"
47 #include "tgsi/tgsi_dump.h"
48 #include "tgsi/tgsi_emulate.h"
49 #include "tgsi/tgsi_parse.h"
50 #include "tgsi/tgsi_ureg.h"
51
52 #include "st_debug.h"
53 #include "st_cb_bitmap.h"
54 #include "st_cb_drawpixels.h"
55 #include "st_context.h"
56 #include "st_tgsi_lower_yuv.h"
57 #include "st_program.h"
58 #include "st_mesa_to_tgsi.h"
59 #include "st_atifs_to_tgsi.h"
60 #include "st_nir.h"
61 #include "st_shader_cache.h"
62 #include "cso_cache/cso_context.h"
63
64
65
66 static void
67 set_affected_state_flags(uint64_t *states,
68 struct gl_program *prog,
69 uint64_t new_constants,
70 uint64_t new_sampler_views,
71 uint64_t new_samplers,
72 uint64_t new_images,
73 uint64_t new_ubos,
74 uint64_t new_ssbos,
75 uint64_t new_atomics)
76 {
77 if (prog->Parameters->NumParameters)
78 *states |= new_constants;
79
80 if (prog->info.num_textures)
81 *states |= new_sampler_views | new_samplers;
82
83 if (prog->info.num_images)
84 *states |= new_images;
85
86 if (prog->info.num_ubos)
87 *states |= new_ubos;
88
89 if (prog->info.num_ssbos)
90 *states |= new_ssbos;
91
92 if (prog->info.num_abos)
93 *states |= new_atomics;
94 }
95
96 /**
97 * This determines which states will be updated when the shader is bound.
98 */
99 void
100 st_set_prog_affected_state_flags(struct gl_program *prog)
101 {
102 uint64_t *states;
103
104 switch (prog->info.stage) {
105 case MESA_SHADER_VERTEX:
106 states = &((struct st_vertex_program*)prog)->affected_states;
107
108 *states = ST_NEW_VS_STATE |
109 ST_NEW_RASTERIZER |
110 ST_NEW_VERTEX_ARRAYS;
111
112 set_affected_state_flags(states, prog,
113 ST_NEW_VS_CONSTANTS,
114 ST_NEW_VS_SAMPLER_VIEWS,
115 ST_NEW_RENDER_SAMPLERS,
116 ST_NEW_VS_IMAGES,
117 ST_NEW_VS_UBOS,
118 ST_NEW_VS_SSBOS,
119 ST_NEW_VS_ATOMICS);
120 break;
121
122 case MESA_SHADER_TESS_CTRL:
123 states = &((struct st_tessctrl_program*)prog)->affected_states;
124
125 *states = ST_NEW_TCS_STATE;
126
127 set_affected_state_flags(states, prog,
128 ST_NEW_TCS_CONSTANTS,
129 ST_NEW_TCS_SAMPLER_VIEWS,
130 ST_NEW_RENDER_SAMPLERS,
131 ST_NEW_TCS_IMAGES,
132 ST_NEW_TCS_UBOS,
133 ST_NEW_TCS_SSBOS,
134 ST_NEW_TCS_ATOMICS);
135 break;
136
137 case MESA_SHADER_TESS_EVAL:
138 states = &((struct st_tesseval_program*)prog)->affected_states;
139
140 *states = ST_NEW_TES_STATE |
141 ST_NEW_RASTERIZER;
142
143 set_affected_state_flags(states, prog,
144 ST_NEW_TES_CONSTANTS,
145 ST_NEW_TES_SAMPLER_VIEWS,
146 ST_NEW_RENDER_SAMPLERS,
147 ST_NEW_TES_IMAGES,
148 ST_NEW_TES_UBOS,
149 ST_NEW_TES_SSBOS,
150 ST_NEW_TES_ATOMICS);
151 break;
152
153 case MESA_SHADER_GEOMETRY:
154 states = &((struct st_geometry_program*)prog)->affected_states;
155
156 *states = ST_NEW_GS_STATE |
157 ST_NEW_RASTERIZER;
158
159 set_affected_state_flags(states, prog,
160 ST_NEW_GS_CONSTANTS,
161 ST_NEW_GS_SAMPLER_VIEWS,
162 ST_NEW_RENDER_SAMPLERS,
163 ST_NEW_GS_IMAGES,
164 ST_NEW_GS_UBOS,
165 ST_NEW_GS_SSBOS,
166 ST_NEW_GS_ATOMICS);
167 break;
168
169 case MESA_SHADER_FRAGMENT:
170 states = &((struct st_fragment_program*)prog)->affected_states;
171
172 /* gl_FragCoord and glDrawPixels always use constants. */
173 *states = ST_NEW_FS_STATE |
174 ST_NEW_SAMPLE_SHADING |
175 ST_NEW_FS_CONSTANTS;
176
177 set_affected_state_flags(states, prog,
178 ST_NEW_FS_CONSTANTS,
179 ST_NEW_FS_SAMPLER_VIEWS,
180 ST_NEW_RENDER_SAMPLERS,
181 ST_NEW_FS_IMAGES,
182 ST_NEW_FS_UBOS,
183 ST_NEW_FS_SSBOS,
184 ST_NEW_FS_ATOMICS);
185 break;
186
187 case MESA_SHADER_COMPUTE:
188 states = &((struct st_compute_program*)prog)->affected_states;
189
190 *states = ST_NEW_CS_STATE;
191
192 set_affected_state_flags(states, prog,
193 ST_NEW_CS_CONSTANTS,
194 ST_NEW_CS_SAMPLER_VIEWS,
195 ST_NEW_CS_SAMPLERS,
196 ST_NEW_CS_IMAGES,
197 ST_NEW_CS_UBOS,
198 ST_NEW_CS_SSBOS,
199 ST_NEW_CS_ATOMICS);
200 break;
201
202 default:
203 unreachable("unhandled shader stage");
204 }
205 }
206
207 /**
208 * Delete a vertex program variant. Note the caller must unlink
209 * the variant from the linked list.
210 */
211 static void
212 delete_vp_variant(struct st_context *st, struct st_vp_variant *vpv)
213 {
214 if (vpv->driver_shader)
215 cso_delete_vertex_shader(st->cso_context, vpv->driver_shader);
216
217 if (vpv->draw_shader)
218 draw_delete_vertex_shader( st->draw, vpv->draw_shader );
219
220 if (((vpv->tgsi.type == PIPE_SHADER_IR_TGSI)) && vpv->tgsi.tokens)
221 ureg_free_tokens(vpv->tgsi.tokens);
222
223 free( vpv );
224 }
225
226
227
228 /**
229 * Clean out any old compilations:
230 */
231 void
232 st_release_vp_variants( struct st_context *st,
233 struct st_vertex_program *stvp )
234 {
235 struct st_vp_variant *vpv;
236
237 for (vpv = stvp->variants; vpv; ) {
238 struct st_vp_variant *next = vpv->next;
239 delete_vp_variant(st, vpv);
240 vpv = next;
241 }
242
243 stvp->variants = NULL;
244
245 if ((stvp->tgsi.type == PIPE_SHADER_IR_TGSI) && stvp->tgsi.tokens) {
246 tgsi_free_tokens(stvp->tgsi.tokens);
247 stvp->tgsi.tokens = NULL;
248 }
249 }
250
251
252
253 /**
254 * Delete a fragment program variant. Note the caller must unlink
255 * the variant from the linked list.
256 */
257 static void
258 delete_fp_variant(struct st_context *st, struct st_fp_variant *fpv)
259 {
260 if (fpv->driver_shader)
261 cso_delete_fragment_shader(st->cso_context, fpv->driver_shader);
262 free(fpv);
263 }
264
265
266 /**
267 * Free all variants of a fragment program.
268 */
269 void
270 st_release_fp_variants(struct st_context *st, struct st_fragment_program *stfp)
271 {
272 struct st_fp_variant *fpv;
273
274 for (fpv = stfp->variants; fpv; ) {
275 struct st_fp_variant *next = fpv->next;
276 delete_fp_variant(st, fpv);
277 fpv = next;
278 }
279
280 stfp->variants = NULL;
281
282 if ((stfp->tgsi.type == PIPE_SHADER_IR_TGSI) && stfp->tgsi.tokens) {
283 ureg_free_tokens(stfp->tgsi.tokens);
284 stfp->tgsi.tokens = NULL;
285 }
286 }
287
288
289 /**
290 * Delete a basic program variant. Note the caller must unlink
291 * the variant from the linked list.
292 */
293 static void
294 delete_basic_variant(struct st_context *st, struct st_basic_variant *v,
295 GLenum target)
296 {
297 if (v->driver_shader) {
298 switch (target) {
299 case GL_TESS_CONTROL_PROGRAM_NV:
300 cso_delete_tessctrl_shader(st->cso_context, v->driver_shader);
301 break;
302 case GL_TESS_EVALUATION_PROGRAM_NV:
303 cso_delete_tesseval_shader(st->cso_context, v->driver_shader);
304 break;
305 case GL_GEOMETRY_PROGRAM_NV:
306 cso_delete_geometry_shader(st->cso_context, v->driver_shader);
307 break;
308 case GL_COMPUTE_PROGRAM_NV:
309 cso_delete_compute_shader(st->cso_context, v->driver_shader);
310 break;
311 default:
312 assert(!"this shouldn't occur");
313 }
314 }
315
316 free(v);
317 }
318
319
320 /**
321 * Free all basic program variants.
322 */
323 void
324 st_release_basic_variants(struct st_context *st, GLenum target,
325 struct st_basic_variant **variants,
326 struct pipe_shader_state *tgsi)
327 {
328 struct st_basic_variant *v;
329
330 for (v = *variants; v; ) {
331 struct st_basic_variant *next = v->next;
332 delete_basic_variant(st, v, target);
333 v = next;
334 }
335
336 *variants = NULL;
337
338 if (tgsi->tokens) {
339 ureg_free_tokens(tgsi->tokens);
340 tgsi->tokens = NULL;
341 }
342 }
343
344
345 /**
346 * Free all variants of a compute program.
347 */
348 void
349 st_release_cp_variants(struct st_context *st, struct st_compute_program *stcp)
350 {
351 struct st_basic_variant **variants = &stcp->variants;
352 struct st_basic_variant *v;
353
354 for (v = *variants; v; ) {
355 struct st_basic_variant *next = v->next;
356 delete_basic_variant(st, v, stcp->Base.Target);
357 v = next;
358 }
359
360 *variants = NULL;
361
362 if (stcp->tgsi.prog) {
363 ureg_free_tokens(stcp->tgsi.prog);
364 stcp->tgsi.prog = NULL;
365 }
366 }
367
368 /**
369 * Translate a vertex program.
370 */
371 bool
372 st_translate_vertex_program(struct st_context *st,
373 struct st_vertex_program *stvp)
374 {
375 struct ureg_program *ureg;
376 enum pipe_error error;
377 unsigned num_outputs = 0;
378 unsigned attr;
379 unsigned input_to_index[VERT_ATTRIB_MAX] = {0};
380 unsigned output_slot_to_attr[VARYING_SLOT_MAX] = {0};
381 ubyte output_semantic_name[VARYING_SLOT_MAX] = {0};
382 ubyte output_semantic_index[VARYING_SLOT_MAX] = {0};
383
384 stvp->num_inputs = 0;
385
386 if (stvp->Base.arb.IsPositionInvariant)
387 _mesa_insert_mvp_code(st->ctx, &stvp->Base);
388
389 /*
390 * Determine number of inputs, the mappings between VERT_ATTRIB_x
391 * and TGSI generic input indexes, plus input attrib semantic info.
392 */
393 for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
394 if ((stvp->Base.info.inputs_read & BITFIELD64_BIT(attr)) != 0) {
395 input_to_index[attr] = stvp->num_inputs;
396 stvp->index_to_input[stvp->num_inputs] = attr;
397 stvp->num_inputs++;
398 if ((stvp->Base.info.double_inputs_read &
399 BITFIELD64_BIT(attr)) != 0) {
400 /* add placeholder for second part of a double attribute */
401 stvp->index_to_input[stvp->num_inputs] = ST_DOUBLE_ATTRIB_PLACEHOLDER;
402 stvp->num_inputs++;
403 }
404 }
405 }
406 /* bit of a hack, presetup potentially unused edgeflag input */
407 input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
408 stvp->index_to_input[stvp->num_inputs] = VERT_ATTRIB_EDGEFLAG;
409
410 /* Compute mapping of vertex program outputs to slots.
411 */
412 for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
413 if ((stvp->Base.info.outputs_written & BITFIELD64_BIT(attr)) == 0) {
414 stvp->result_to_output[attr] = ~0;
415 }
416 else {
417 unsigned slot = num_outputs++;
418
419 stvp->result_to_output[attr] = slot;
420 output_slot_to_attr[slot] = attr;
421
422 switch (attr) {
423 case VARYING_SLOT_POS:
424 output_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
425 output_semantic_index[slot] = 0;
426 break;
427 case VARYING_SLOT_COL0:
428 output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
429 output_semantic_index[slot] = 0;
430 break;
431 case VARYING_SLOT_COL1:
432 output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
433 output_semantic_index[slot] = 1;
434 break;
435 case VARYING_SLOT_BFC0:
436 output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
437 output_semantic_index[slot] = 0;
438 break;
439 case VARYING_SLOT_BFC1:
440 output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
441 output_semantic_index[slot] = 1;
442 break;
443 case VARYING_SLOT_FOGC:
444 output_semantic_name[slot] = TGSI_SEMANTIC_FOG;
445 output_semantic_index[slot] = 0;
446 break;
447 case VARYING_SLOT_PSIZ:
448 output_semantic_name[slot] = TGSI_SEMANTIC_PSIZE;
449 output_semantic_index[slot] = 0;
450 break;
451 case VARYING_SLOT_CLIP_DIST0:
452 output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
453 output_semantic_index[slot] = 0;
454 break;
455 case VARYING_SLOT_CLIP_DIST1:
456 output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
457 output_semantic_index[slot] = 1;
458 break;
459 case VARYING_SLOT_CULL_DIST0:
460 case VARYING_SLOT_CULL_DIST1:
461 /* these should have been lowered by GLSL */
462 assert(0);
463 break;
464 case VARYING_SLOT_EDGE:
465 assert(0);
466 break;
467 case VARYING_SLOT_CLIP_VERTEX:
468 output_semantic_name[slot] = TGSI_SEMANTIC_CLIPVERTEX;
469 output_semantic_index[slot] = 0;
470 break;
471 case VARYING_SLOT_LAYER:
472 output_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
473 output_semantic_index[slot] = 0;
474 break;
475 case VARYING_SLOT_VIEWPORT:
476 output_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
477 output_semantic_index[slot] = 0;
478 break;
479
480 case VARYING_SLOT_TEX0:
481 case VARYING_SLOT_TEX1:
482 case VARYING_SLOT_TEX2:
483 case VARYING_SLOT_TEX3:
484 case VARYING_SLOT_TEX4:
485 case VARYING_SLOT_TEX5:
486 case VARYING_SLOT_TEX6:
487 case VARYING_SLOT_TEX7:
488 if (st->needs_texcoord_semantic) {
489 output_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
490 output_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
491 break;
492 }
493 /* fall through */
494 case VARYING_SLOT_VAR0:
495 default:
496 assert(attr >= VARYING_SLOT_VAR0 ||
497 (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
498 output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
499 output_semantic_index[slot] =
500 st_get_generic_varying_index(st, attr);
501 break;
502 }
503 }
504 }
505 /* similar hack to above, presetup potentially unused edgeflag output */
506 stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs;
507 output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
508 output_semantic_index[num_outputs] = 0;
509
510 /* ARB_vp: */
511 if (!stvp->glsl_to_tgsi && !stvp->shader_program) {
512 _mesa_remove_output_reads(&stvp->Base, PROGRAM_OUTPUT);
513
514 /* This determines which states will be updated when the assembly
515 * shader is bound.
516 */
517 stvp->affected_states = ST_NEW_VS_STATE |
518 ST_NEW_RASTERIZER |
519 ST_NEW_VERTEX_ARRAYS;
520
521 if (stvp->Base.Parameters->NumParameters)
522 stvp->affected_states |= ST_NEW_VS_CONSTANTS;
523
524 /* No samplers are allowed in ARB_vp. */
525 }
526
527 if (stvp->shader_program) {
528 nir_shader *nir = st_glsl_to_nir(st, &stvp->Base, stvp->shader_program,
529 MESA_SHADER_VERTEX);
530
531 stvp->tgsi.type = PIPE_SHADER_IR_NIR;
532 stvp->tgsi.ir.nir = nir;
533
534 struct gl_program *prog = stvp->shader_program->last_vert_prog;
535 if (prog) {
536 st_translate_stream_output_info2(prog->sh.LinkedTransformFeedback,
537 stvp->result_to_output,
538 &stvp->tgsi.stream_output);
539 }
540
541 return true;
542 }
543
544 ureg = ureg_create_with_screen(PIPE_SHADER_VERTEX, st->pipe->screen);
545 if (ureg == NULL)
546 return false;
547
548 if (stvp->Base.info.clip_distance_array_size)
549 ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
550 stvp->Base.info.clip_distance_array_size);
551 if (stvp->Base.info.cull_distance_array_size)
552 ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
553 stvp->Base.info.cull_distance_array_size);
554
555 if (ST_DEBUG & DEBUG_MESA) {
556 _mesa_print_program(&stvp->Base);
557 _mesa_print_program_parameters(st->ctx, &stvp->Base);
558 debug_printf("\n");
559 }
560
561 if (stvp->glsl_to_tgsi) {
562 error = st_translate_program(st->ctx,
563 PIPE_SHADER_VERTEX,
564 ureg,
565 stvp->glsl_to_tgsi,
566 &stvp->Base,
567 /* inputs */
568 stvp->num_inputs,
569 input_to_index,
570 NULL, /* inputSlotToAttr */
571 NULL, /* input semantic name */
572 NULL, /* input semantic index */
573 NULL, /* interp mode */
574 /* outputs */
575 num_outputs,
576 stvp->result_to_output,
577 output_slot_to_attr,
578 output_semantic_name,
579 output_semantic_index);
580
581 st_translate_stream_output_info(stvp->glsl_to_tgsi,
582 stvp->result_to_output,
583 &stvp->tgsi.stream_output);
584
585 free_glsl_to_tgsi_visitor(stvp->glsl_to_tgsi);
586 } else
587 error = st_translate_mesa_program(st->ctx,
588 PIPE_SHADER_VERTEX,
589 ureg,
590 &stvp->Base,
591 /* inputs */
592 stvp->num_inputs,
593 input_to_index,
594 NULL, /* input semantic name */
595 NULL, /* input semantic index */
596 NULL,
597 /* outputs */
598 num_outputs,
599 stvp->result_to_output,
600 output_semantic_name,
601 output_semantic_index);
602
603 if (error) {
604 debug_printf("%s: failed to translate Mesa program:\n", __func__);
605 _mesa_print_program(&stvp->Base);
606 debug_assert(0);
607 return false;
608 }
609
610 unsigned num_tokens;
611 stvp->tgsi.tokens = ureg_get_tokens(ureg, &num_tokens);
612 ureg_destroy(ureg);
613
614 if (stvp->glsl_to_tgsi) {
615 stvp->glsl_to_tgsi = NULL;
616 st_store_tgsi_in_disk_cache(st, &stvp->Base, NULL, num_tokens);
617 }
618
619 return stvp->tgsi.tokens != NULL;
620 }
621
622 static struct st_vp_variant *
623 st_create_vp_variant(struct st_context *st,
624 struct st_vertex_program *stvp,
625 const struct st_vp_variant_key *key)
626 {
627 struct st_vp_variant *vpv = CALLOC_STRUCT(st_vp_variant);
628 struct pipe_context *pipe = st->pipe;
629
630 vpv->key = *key;
631 vpv->tgsi.stream_output = stvp->tgsi.stream_output;
632 vpv->num_inputs = stvp->num_inputs;
633
634 if (stvp->tgsi.type == PIPE_SHADER_IR_NIR) {
635 vpv->tgsi.type = PIPE_SHADER_IR_NIR;
636 vpv->tgsi.ir.nir = nir_shader_clone(NULL, stvp->tgsi.ir.nir);
637 if (key->clamp_color)
638 NIR_PASS_V(vpv->tgsi.ir.nir, nir_lower_clamp_color_outputs);
639 if (key->passthrough_edgeflags)
640 NIR_PASS_V(vpv->tgsi.ir.nir, nir_lower_passthrough_edgeflags);
641
642 st_finalize_nir(st, &stvp->Base, vpv->tgsi.ir.nir);
643
644 vpv->driver_shader = pipe->create_vs_state(pipe, &vpv->tgsi);
645 /* driver takes ownership of IR: */
646 vpv->tgsi.ir.nir = NULL;
647 return vpv;
648 }
649
650 vpv->tgsi.tokens = tgsi_dup_tokens(stvp->tgsi.tokens);
651
652 /* Emulate features. */
653 if (key->clamp_color || key->passthrough_edgeflags) {
654 const struct tgsi_token *tokens;
655 unsigned flags =
656 (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
657 (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
658
659 tokens = tgsi_emulate(vpv->tgsi.tokens, flags);
660
661 if (tokens) {
662 tgsi_free_tokens(vpv->tgsi.tokens);
663 vpv->tgsi.tokens = tokens;
664
665 if (key->passthrough_edgeflags)
666 vpv->num_inputs++;
667 } else
668 fprintf(stderr, "mesa: cannot emulate deprecated features\n");
669 }
670
671 if (ST_DEBUG & DEBUG_TGSI) {
672 tgsi_dump(vpv->tgsi.tokens, 0);
673 debug_printf("\n");
674 }
675
676 vpv->driver_shader = pipe->create_vs_state(pipe, &vpv->tgsi);
677 return vpv;
678 }
679
680
681 /**
682 * Find/create a vertex program variant.
683 */
684 struct st_vp_variant *
685 st_get_vp_variant(struct st_context *st,
686 struct st_vertex_program *stvp,
687 const struct st_vp_variant_key *key)
688 {
689 struct st_vp_variant *vpv;
690
691 /* Search for existing variant */
692 for (vpv = stvp->variants; vpv; vpv = vpv->next) {
693 if (memcmp(&vpv->key, key, sizeof(*key)) == 0) {
694 break;
695 }
696 }
697
698 if (!vpv) {
699 /* create now */
700 vpv = st_create_vp_variant(st, stvp, key);
701 if (vpv) {
702 /* insert into list */
703 vpv->next = stvp->variants;
704 stvp->variants = vpv;
705 }
706 }
707
708 return vpv;
709 }
710
711
712 /**
713 * Translate a Mesa fragment shader into a TGSI shader.
714 */
715 bool
716 st_translate_fragment_program(struct st_context *st,
717 struct st_fragment_program *stfp)
718 {
719 GLuint outputMapping[2 * FRAG_RESULT_MAX];
720 GLuint inputMapping[VARYING_SLOT_MAX];
721 GLuint inputSlotToAttr[VARYING_SLOT_MAX];
722 GLuint interpMode[PIPE_MAX_SHADER_INPUTS]; /* XXX size? */
723 GLuint attr;
724 GLbitfield64 inputsRead;
725 struct ureg_program *ureg;
726
727 GLboolean write_all = GL_FALSE;
728
729 ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
730 ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
731 uint fs_num_inputs = 0;
732
733 ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
734 ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
735 uint fs_num_outputs = 0;
736
737 memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
738
739 /* Non-GLSL programs: */
740 if (!stfp->glsl_to_tgsi && !stfp->shader_program) {
741 _mesa_remove_output_reads(&stfp->Base, PROGRAM_OUTPUT);
742 if (st->ctx->Const.GLSLFragCoordIsSysVal)
743 _mesa_program_fragment_position_to_sysval(&stfp->Base);
744
745 /* This determines which states will be updated when the assembly
746 * shader is bound.
747 *
748 * fragment.position and glDrawPixels always use constants.
749 */
750 stfp->affected_states = ST_NEW_FS_STATE |
751 ST_NEW_SAMPLE_SHADING |
752 ST_NEW_FS_CONSTANTS;
753
754 if (stfp->ati_fs) {
755 /* Just set them for ATI_fs unconditionally. */
756 stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
757 ST_NEW_RENDER_SAMPLERS;
758 } else {
759 /* ARB_fp */
760 if (stfp->Base.SamplersUsed)
761 stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
762 ST_NEW_RENDER_SAMPLERS;
763 }
764 }
765
766 /*
767 * Convert Mesa program inputs to TGSI input register semantics.
768 */
769 inputsRead = stfp->Base.info.inputs_read;
770 for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
771 if ((inputsRead & BITFIELD64_BIT(attr)) != 0) {
772 const GLuint slot = fs_num_inputs++;
773
774 inputMapping[attr] = slot;
775 inputSlotToAttr[slot] = attr;
776
777 switch (attr) {
778 case VARYING_SLOT_POS:
779 input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
780 input_semantic_index[slot] = 0;
781 interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
782 break;
783 case VARYING_SLOT_COL0:
784 input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
785 input_semantic_index[slot] = 0;
786 interpMode[slot] = stfp->glsl_to_tgsi ?
787 TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
788 break;
789 case VARYING_SLOT_COL1:
790 input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
791 input_semantic_index[slot] = 1;
792 interpMode[slot] = stfp->glsl_to_tgsi ?
793 TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
794 break;
795 case VARYING_SLOT_FOGC:
796 input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
797 input_semantic_index[slot] = 0;
798 interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
799 break;
800 case VARYING_SLOT_FACE:
801 input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
802 input_semantic_index[slot] = 0;
803 interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
804 break;
805 case VARYING_SLOT_PRIMITIVE_ID:
806 input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
807 input_semantic_index[slot] = 0;
808 interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
809 break;
810 case VARYING_SLOT_LAYER:
811 input_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
812 input_semantic_index[slot] = 0;
813 interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
814 break;
815 case VARYING_SLOT_VIEWPORT:
816 input_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
817 input_semantic_index[slot] = 0;
818 interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
819 break;
820 case VARYING_SLOT_CLIP_DIST0:
821 input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
822 input_semantic_index[slot] = 0;
823 interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
824 break;
825 case VARYING_SLOT_CLIP_DIST1:
826 input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
827 input_semantic_index[slot] = 1;
828 interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
829 break;
830 case VARYING_SLOT_CULL_DIST0:
831 case VARYING_SLOT_CULL_DIST1:
832 /* these should have been lowered by GLSL */
833 assert(0);
834 break;
835 /* In most cases, there is nothing special about these
836 * inputs, so adopt a convention to use the generic
837 * semantic name and the mesa VARYING_SLOT_ number as the
838 * index.
839 *
840 * All that is required is that the vertex shader labels
841 * its own outputs similarly, and that the vertex shader
842 * generates at least every output required by the
843 * fragment shader plus fixed-function hardware (such as
844 * BFC).
845 *
846 * However, some drivers may need us to identify the PNTC and TEXi
847 * varyings if, for example, their capability to replace them with
848 * sprite coordinates is limited.
849 */
850 case VARYING_SLOT_PNTC:
851 if (st->needs_texcoord_semantic) {
852 input_semantic_name[slot] = TGSI_SEMANTIC_PCOORD;
853 input_semantic_index[slot] = 0;
854 interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
855 break;
856 }
857 /* fall through */
858 case VARYING_SLOT_TEX0:
859 case VARYING_SLOT_TEX1:
860 case VARYING_SLOT_TEX2:
861 case VARYING_SLOT_TEX3:
862 case VARYING_SLOT_TEX4:
863 case VARYING_SLOT_TEX5:
864 case VARYING_SLOT_TEX6:
865 case VARYING_SLOT_TEX7:
866 if (st->needs_texcoord_semantic) {
867 input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
868 input_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
869 interpMode[slot] = stfp->glsl_to_tgsi ?
870 TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
871 break;
872 }
873 /* fall through */
874 case VARYING_SLOT_VAR0:
875 default:
876 /* Semantic indices should be zero-based because drivers may choose
877 * to assign a fixed slot determined by that index.
878 * This is useful because ARB_separate_shader_objects uses location
879 * qualifiers for linkage, and if the semantic index corresponds to
880 * these locations, linkage passes in the driver become unecessary.
881 *
882 * If needs_texcoord_semantic is true, no semantic indices will be
883 * consumed for the TEXi varyings, and we can base the locations of
884 * the user varyings on VAR0. Otherwise, we use TEX0 as base index.
885 */
886 assert(attr >= VARYING_SLOT_VAR0 || attr == VARYING_SLOT_PNTC ||
887 (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
888 input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
889 input_semantic_index[slot] = st_get_generic_varying_index(st, attr);
890 if (attr == VARYING_SLOT_PNTC)
891 interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
892 else {
893 interpMode[slot] = stfp->glsl_to_tgsi ?
894 TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
895 }
896 break;
897 }
898 }
899 else {
900 inputMapping[attr] = -1;
901 }
902 }
903
904 /*
905 * Semantics and mapping for outputs
906 */
907 GLbitfield64 outputsWritten = stfp->Base.info.outputs_written;
908
909 /* if z is written, emit that first */
910 if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
911 fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
912 fs_output_semantic_index[fs_num_outputs] = 0;
913 outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
914 fs_num_outputs++;
915 outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
916 }
917
918 if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
919 fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
920 fs_output_semantic_index[fs_num_outputs] = 0;
921 outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
922 fs_num_outputs++;
923 outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
924 }
925
926 if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
927 fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_SAMPLEMASK;
928 fs_output_semantic_index[fs_num_outputs] = 0;
929 outputMapping[FRAG_RESULT_SAMPLE_MASK] = fs_num_outputs;
930 fs_num_outputs++;
931 outputsWritten &= ~(1 << FRAG_RESULT_SAMPLE_MASK);
932 }
933
934 /* handle remaining outputs (color) */
935 for (attr = 0; attr < ARRAY_SIZE(outputMapping); attr++) {
936 const GLbitfield64 written = attr < FRAG_RESULT_MAX ? outputsWritten :
937 stfp->Base.SecondaryOutputsWritten;
938 const unsigned loc = attr % FRAG_RESULT_MAX;
939
940 if (written & BITFIELD64_BIT(loc)) {
941 switch (loc) {
942 case FRAG_RESULT_DEPTH:
943 case FRAG_RESULT_STENCIL:
944 case FRAG_RESULT_SAMPLE_MASK:
945 /* handled above */
946 assert(0);
947 break;
948 case FRAG_RESULT_COLOR:
949 write_all = GL_TRUE; /* fallthrough */
950 default: {
951 int index;
952 assert(loc == FRAG_RESULT_COLOR ||
953 (FRAG_RESULT_DATA0 <= loc && loc < FRAG_RESULT_MAX));
954
955 index = (loc == FRAG_RESULT_COLOR) ? 0 : (loc - FRAG_RESULT_DATA0);
956
957 if (attr >= FRAG_RESULT_MAX) {
958 /* Secondary color for dual source blending. */
959 assert(index == 0);
960 index++;
961 }
962
963 fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
964 fs_output_semantic_index[fs_num_outputs] = index;
965 outputMapping[attr] = fs_num_outputs;
966 break;
967 }
968 }
969
970 fs_num_outputs++;
971 }
972 }
973
974 if (stfp->shader_program) {
975 nir_shader *nir = st_glsl_to_nir(st, &stfp->Base, stfp->shader_program,
976 MESA_SHADER_FRAGMENT);
977
978 stfp->tgsi.type = PIPE_SHADER_IR_NIR;
979 stfp->tgsi.ir.nir = nir;
980
981 return true;
982 }
983
984 ureg = ureg_create_with_screen(PIPE_SHADER_FRAGMENT, st->pipe->screen);
985 if (ureg == NULL)
986 return false;
987
988 if (ST_DEBUG & DEBUG_MESA) {
989 _mesa_print_program(&stfp->Base);
990 _mesa_print_program_parameters(st->ctx, &stfp->Base);
991 debug_printf("\n");
992 }
993 if (write_all == GL_TRUE)
994 ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1);
995
996 if (stfp->Base.info.fs.depth_layout != FRAG_DEPTH_LAYOUT_NONE) {
997 switch (stfp->Base.info.fs.depth_layout) {
998 case FRAG_DEPTH_LAYOUT_ANY:
999 ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1000 TGSI_FS_DEPTH_LAYOUT_ANY);
1001 break;
1002 case FRAG_DEPTH_LAYOUT_GREATER:
1003 ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1004 TGSI_FS_DEPTH_LAYOUT_GREATER);
1005 break;
1006 case FRAG_DEPTH_LAYOUT_LESS:
1007 ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1008 TGSI_FS_DEPTH_LAYOUT_LESS);
1009 break;
1010 case FRAG_DEPTH_LAYOUT_UNCHANGED:
1011 ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1012 TGSI_FS_DEPTH_LAYOUT_UNCHANGED);
1013 break;
1014 default:
1015 assert(0);
1016 }
1017 }
1018
1019 if (stfp->glsl_to_tgsi) {
1020 st_translate_program(st->ctx,
1021 PIPE_SHADER_FRAGMENT,
1022 ureg,
1023 stfp->glsl_to_tgsi,
1024 &stfp->Base,
1025 /* inputs */
1026 fs_num_inputs,
1027 inputMapping,
1028 inputSlotToAttr,
1029 input_semantic_name,
1030 input_semantic_index,
1031 interpMode,
1032 /* outputs */
1033 fs_num_outputs,
1034 outputMapping,
1035 NULL,
1036 fs_output_semantic_name,
1037 fs_output_semantic_index);
1038
1039 free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
1040 } else if (stfp->ati_fs)
1041 st_translate_atifs_program(ureg,
1042 stfp->ati_fs,
1043 &stfp->Base,
1044 /* inputs */
1045 fs_num_inputs,
1046 inputMapping,
1047 input_semantic_name,
1048 input_semantic_index,
1049 interpMode,
1050 /* outputs */
1051 fs_num_outputs,
1052 outputMapping,
1053 fs_output_semantic_name,
1054 fs_output_semantic_index);
1055 else
1056 st_translate_mesa_program(st->ctx,
1057 PIPE_SHADER_FRAGMENT,
1058 ureg,
1059 &stfp->Base,
1060 /* inputs */
1061 fs_num_inputs,
1062 inputMapping,
1063 input_semantic_name,
1064 input_semantic_index,
1065 interpMode,
1066 /* outputs */
1067 fs_num_outputs,
1068 outputMapping,
1069 fs_output_semantic_name,
1070 fs_output_semantic_index);
1071
1072 unsigned num_tokens;
1073 stfp->tgsi.tokens = ureg_get_tokens(ureg, &num_tokens);
1074 ureg_destroy(ureg);
1075
1076 if (stfp->glsl_to_tgsi) {
1077 stfp->glsl_to_tgsi = NULL;
1078 st_store_tgsi_in_disk_cache(st, &stfp->Base, NULL, num_tokens);
1079 }
1080
1081 return stfp->tgsi.tokens != NULL;
1082 }
1083
1084 static struct st_fp_variant *
1085 st_create_fp_variant(struct st_context *st,
1086 struct st_fragment_program *stfp,
1087 const struct st_fp_variant_key *key)
1088 {
1089 struct pipe_context *pipe = st->pipe;
1090 struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
1091 struct pipe_shader_state tgsi = {0};
1092 struct gl_program_parameter_list *params = stfp->Base.Parameters;
1093 static const gl_state_index texcoord_state[STATE_LENGTH] =
1094 { STATE_INTERNAL, STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
1095 static const gl_state_index scale_state[STATE_LENGTH] =
1096 { STATE_INTERNAL, STATE_PT_SCALE };
1097 static const gl_state_index bias_state[STATE_LENGTH] =
1098 { STATE_INTERNAL, STATE_PT_BIAS };
1099
1100 if (!variant)
1101 return NULL;
1102
1103 if (stfp->tgsi.type == PIPE_SHADER_IR_NIR) {
1104 tgsi.type = PIPE_SHADER_IR_NIR;
1105 tgsi.ir.nir = nir_shader_clone(NULL, stfp->tgsi.ir.nir);
1106
1107 if (key->clamp_color)
1108 NIR_PASS_V(tgsi.ir.nir, nir_lower_clamp_color_outputs);
1109
1110 if (key->persample_shading) {
1111 nir_shader *shader = tgsi.ir.nir;
1112 nir_foreach_variable(var, &shader->inputs)
1113 var->data.sample = true;
1114 }
1115
1116 assert(!(key->bitmap && key->drawpixels));
1117
1118 /* glBitmap */
1119 if (key->bitmap) {
1120 nir_lower_bitmap_options options = {0};
1121
1122 variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1123 options.sampler = variant->bitmap_sampler;
1124 options.swizzle_xxxx = (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM);
1125
1126 NIR_PASS_V(tgsi.ir.nir, nir_lower_bitmap, &options);
1127 }
1128
1129 /* glDrawPixels (color only) */
1130 if (key->drawpixels) {
1131 nir_lower_drawpixels_options options = {{0}};
1132 unsigned samplers_used = stfp->Base.SamplersUsed;
1133
1134 /* Find the first unused slot. */
1135 variant->drawpix_sampler = ffs(~samplers_used) - 1;
1136 options.drawpix_sampler = variant->drawpix_sampler;
1137 samplers_used |= (1 << variant->drawpix_sampler);
1138
1139 options.pixel_maps = key->pixelMaps;
1140 if (key->pixelMaps) {
1141 variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1142 options.pixelmap_sampler = variant->pixelmap_sampler;
1143 }
1144
1145 options.scale_and_bias = key->scaleAndBias;
1146 if (key->scaleAndBias) {
1147 _mesa_add_state_reference(params, scale_state);
1148 memcpy(options.scale_state_tokens, scale_state,
1149 sizeof(options.scale_state_tokens));
1150 _mesa_add_state_reference(params, bias_state);
1151 memcpy(options.bias_state_tokens, bias_state,
1152 sizeof(options.bias_state_tokens));
1153 }
1154
1155 _mesa_add_state_reference(params, texcoord_state);
1156 memcpy(options.texcoord_state_tokens, texcoord_state,
1157 sizeof(options.texcoord_state_tokens));
1158
1159 NIR_PASS_V(tgsi.ir.nir, nir_lower_drawpixels, &options);
1160 }
1161
1162 if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv)) {
1163 nir_lower_tex_options options = {0};
1164 options.lower_y_uv_external = key->external.lower_nv12;
1165 options.lower_y_u_v_external = key->external.lower_iyuv;
1166 NIR_PASS_V(tgsi.ir.nir, nir_lower_tex, &options);
1167 }
1168
1169 st_finalize_nir(st, &stfp->Base, tgsi.ir.nir);
1170
1171 if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv)) {
1172 /* This pass needs to happen *after* nir_lower_sampler */
1173 NIR_PASS_V(tgsi.ir.nir, st_nir_lower_tex_src_plane,
1174 ~stfp->Base.SamplersUsed,
1175 key->external.lower_nv12,
1176 key->external.lower_iyuv);
1177 }
1178
1179 variant->driver_shader = pipe->create_fs_state(pipe, &tgsi);
1180 variant->key = *key;
1181
1182 return variant;
1183 }
1184
1185 tgsi.tokens = stfp->tgsi.tokens;
1186
1187 assert(!(key->bitmap && key->drawpixels));
1188
1189 /* Fix texture targets and add fog for ATI_fs */
1190 if (stfp->ati_fs) {
1191 const struct tgsi_token *tokens = st_fixup_atifs(tgsi.tokens, key);
1192
1193 if (tokens)
1194 tgsi.tokens = tokens;
1195 else
1196 fprintf(stderr, "mesa: cannot post-process ATI_fs\n");
1197 }
1198
1199 /* Emulate features. */
1200 if (key->clamp_color || key->persample_shading) {
1201 const struct tgsi_token *tokens;
1202 unsigned flags =
1203 (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
1204 (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
1205
1206 tokens = tgsi_emulate(tgsi.tokens, flags);
1207
1208 if (tokens) {
1209 if (tgsi.tokens != stfp->tgsi.tokens)
1210 tgsi_free_tokens(tgsi.tokens);
1211 tgsi.tokens = tokens;
1212 } else
1213 fprintf(stderr, "mesa: cannot emulate deprecated features\n");
1214 }
1215
1216 /* glBitmap */
1217 if (key->bitmap) {
1218 const struct tgsi_token *tokens;
1219
1220 variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1221
1222 tokens = st_get_bitmap_shader(tgsi.tokens,
1223 st->internal_target,
1224 variant->bitmap_sampler,
1225 st->needs_texcoord_semantic,
1226 st->bitmap.tex_format ==
1227 PIPE_FORMAT_L8_UNORM);
1228
1229 if (tokens) {
1230 if (tgsi.tokens != stfp->tgsi.tokens)
1231 tgsi_free_tokens(tgsi.tokens);
1232 tgsi.tokens = tokens;
1233 } else
1234 fprintf(stderr, "mesa: cannot create a shader for glBitmap\n");
1235 }
1236
1237 /* glDrawPixels (color only) */
1238 if (key->drawpixels) {
1239 const struct tgsi_token *tokens;
1240 unsigned scale_const = 0, bias_const = 0, texcoord_const = 0;
1241
1242 /* Find the first unused slot. */
1243 variant->drawpix_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1244
1245 if (key->pixelMaps) {
1246 unsigned samplers_used = stfp->Base.SamplersUsed |
1247 (1 << variant->drawpix_sampler);
1248
1249 variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1250 }
1251
1252 if (key->scaleAndBias) {
1253 scale_const = _mesa_add_state_reference(params, scale_state);
1254 bias_const = _mesa_add_state_reference(params, bias_state);
1255 }
1256
1257 texcoord_const = _mesa_add_state_reference(params, texcoord_state);
1258
1259 tokens = st_get_drawpix_shader(tgsi.tokens,
1260 st->needs_texcoord_semantic,
1261 key->scaleAndBias, scale_const,
1262 bias_const, key->pixelMaps,
1263 variant->drawpix_sampler,
1264 variant->pixelmap_sampler,
1265 texcoord_const, st->internal_target);
1266
1267 if (tokens) {
1268 if (tgsi.tokens != stfp->tgsi.tokens)
1269 tgsi_free_tokens(tgsi.tokens);
1270 tgsi.tokens = tokens;
1271 } else
1272 fprintf(stderr, "mesa: cannot create a shader for glDrawPixels\n");
1273 }
1274
1275 if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv)) {
1276 const struct tgsi_token *tokens;
1277
1278 /* samplers inserted would conflict, but this should be unpossible: */
1279 assert(!(key->bitmap || key->drawpixels));
1280
1281 tokens = st_tgsi_lower_yuv(tgsi.tokens,
1282 ~stfp->Base.SamplersUsed,
1283 key->external.lower_nv12,
1284 key->external.lower_iyuv);
1285 if (tokens) {
1286 if (tgsi.tokens != stfp->tgsi.tokens)
1287 tgsi_free_tokens(tgsi.tokens);
1288 tgsi.tokens = tokens;
1289 } else {
1290 fprintf(stderr, "mesa: cannot create a shader for samplerExternalOES\n");
1291 }
1292 }
1293
1294 if (ST_DEBUG & DEBUG_TGSI) {
1295 tgsi_dump(tgsi.tokens, 0);
1296 debug_printf("\n");
1297 }
1298
1299 /* fill in variant */
1300 variant->driver_shader = pipe->create_fs_state(pipe, &tgsi);
1301 variant->key = *key;
1302
1303 if (tgsi.tokens != stfp->tgsi.tokens)
1304 tgsi_free_tokens(tgsi.tokens);
1305 return variant;
1306 }
1307
1308 /**
1309 * Translate fragment program if needed.
1310 */
1311 struct st_fp_variant *
1312 st_get_fp_variant(struct st_context *st,
1313 struct st_fragment_program *stfp,
1314 const struct st_fp_variant_key *key)
1315 {
1316 struct st_fp_variant *fpv;
1317
1318 /* Search for existing variant */
1319 for (fpv = stfp->variants; fpv; fpv = fpv->next) {
1320 if (memcmp(&fpv->key, key, sizeof(*key)) == 0) {
1321 break;
1322 }
1323 }
1324
1325 if (!fpv) {
1326 /* create new */
1327 fpv = st_create_fp_variant(st, stfp, key);
1328 if (fpv) {
1329 /* insert into list */
1330 fpv->next = stfp->variants;
1331 stfp->variants = fpv;
1332 }
1333 }
1334
1335 return fpv;
1336 }
1337
1338
1339 /**
1340 * Translate a program. This is common code for geometry and tessellation
1341 * shaders.
1342 */
1343 static void
1344 st_translate_program_common(struct st_context *st,
1345 struct gl_program *prog,
1346 struct glsl_to_tgsi_visitor *glsl_to_tgsi,
1347 struct ureg_program *ureg,
1348 unsigned tgsi_processor,
1349 struct pipe_shader_state *out_state)
1350 {
1351 GLuint inputSlotToAttr[VARYING_SLOT_TESS_MAX];
1352 GLuint inputMapping[VARYING_SLOT_TESS_MAX];
1353 GLuint outputSlotToAttr[VARYING_SLOT_TESS_MAX];
1354 GLuint outputMapping[VARYING_SLOT_TESS_MAX];
1355 GLuint attr;
1356
1357 ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
1358 ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
1359 uint num_inputs = 0;
1360
1361 ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
1362 ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
1363 uint num_outputs = 0;
1364
1365 GLint i;
1366
1367 memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr));
1368 memset(inputMapping, 0, sizeof(inputMapping));
1369 memset(outputSlotToAttr, 0, sizeof(outputSlotToAttr));
1370 memset(outputMapping, 0, sizeof(outputMapping));
1371 memset(out_state, 0, sizeof(*out_state));
1372
1373 if (prog->info.clip_distance_array_size)
1374 ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
1375 prog->info.clip_distance_array_size);
1376 if (prog->info.cull_distance_array_size)
1377 ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
1378 prog->info.cull_distance_array_size);
1379
1380 /*
1381 * Convert Mesa program inputs to TGSI input register semantics.
1382 */
1383 for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1384 if ((prog->info.inputs_read & BITFIELD64_BIT(attr)) != 0) {
1385 const GLuint slot = num_inputs++;
1386
1387 inputMapping[attr] = slot;
1388 inputSlotToAttr[slot] = attr;
1389
1390 switch (attr) {
1391 case VARYING_SLOT_PRIMITIVE_ID:
1392 assert(tgsi_processor == PIPE_SHADER_GEOMETRY);
1393 input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
1394 input_semantic_index[slot] = 0;
1395 break;
1396 case VARYING_SLOT_POS:
1397 input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
1398 input_semantic_index[slot] = 0;
1399 break;
1400 case VARYING_SLOT_COL0:
1401 input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
1402 input_semantic_index[slot] = 0;
1403 break;
1404 case VARYING_SLOT_COL1:
1405 input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
1406 input_semantic_index[slot] = 1;
1407 break;
1408 case VARYING_SLOT_FOGC:
1409 input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
1410 input_semantic_index[slot] = 0;
1411 break;
1412 case VARYING_SLOT_CLIP_VERTEX:
1413 input_semantic_name[slot] = TGSI_SEMANTIC_CLIPVERTEX;
1414 input_semantic_index[slot] = 0;
1415 break;
1416 case VARYING_SLOT_CLIP_DIST0:
1417 input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
1418 input_semantic_index[slot] = 0;
1419 break;
1420 case VARYING_SLOT_CLIP_DIST1:
1421 input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
1422 input_semantic_index[slot] = 1;
1423 break;
1424 case VARYING_SLOT_CULL_DIST0:
1425 case VARYING_SLOT_CULL_DIST1:
1426 /* these should have been lowered by GLSL */
1427 assert(0);
1428 break;
1429 case VARYING_SLOT_PSIZ:
1430 input_semantic_name[slot] = TGSI_SEMANTIC_PSIZE;
1431 input_semantic_index[slot] = 0;
1432 break;
1433 case VARYING_SLOT_TEX0:
1434 case VARYING_SLOT_TEX1:
1435 case VARYING_SLOT_TEX2:
1436 case VARYING_SLOT_TEX3:
1437 case VARYING_SLOT_TEX4:
1438 case VARYING_SLOT_TEX5:
1439 case VARYING_SLOT_TEX6:
1440 case VARYING_SLOT_TEX7:
1441 if (st->needs_texcoord_semantic) {
1442 input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
1443 input_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
1444 break;
1445 }
1446 /* fall through */
1447 case VARYING_SLOT_VAR0:
1448 default:
1449 assert(attr >= VARYING_SLOT_VAR0 ||
1450 (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
1451 input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
1452 input_semantic_index[slot] =
1453 st_get_generic_varying_index(st, attr);
1454 break;
1455 }
1456 }
1457 }
1458
1459 /* Also add patch inputs. */
1460 for (attr = 0; attr < 32; attr++) {
1461 if (prog->info.patch_inputs_read & (1u << attr)) {
1462 GLuint slot = num_inputs++;
1463 GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1464
1465 inputMapping[patch_attr] = slot;
1466 inputSlotToAttr[slot] = patch_attr;
1467 input_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1468 input_semantic_index[slot] = attr;
1469 }
1470 }
1471
1472 /* initialize output semantics to defaults */
1473 for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
1474 output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
1475 output_semantic_index[i] = 0;
1476 }
1477
1478 /*
1479 * Determine number of outputs, the (default) output register
1480 * mapping and the semantic information for each output.
1481 */
1482 for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1483 if (prog->info.outputs_written & BITFIELD64_BIT(attr)) {
1484 GLuint slot = num_outputs++;
1485
1486 outputMapping[attr] = slot;
1487 outputSlotToAttr[slot] = attr;
1488
1489 switch (attr) {
1490 case VARYING_SLOT_POS:
1491 assert(slot == 0);
1492 output_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
1493 output_semantic_index[slot] = 0;
1494 break;
1495 case VARYING_SLOT_COL0:
1496 output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
1497 output_semantic_index[slot] = 0;
1498 break;
1499 case VARYING_SLOT_COL1:
1500 output_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
1501 output_semantic_index[slot] = 1;
1502 break;
1503 case VARYING_SLOT_BFC0:
1504 output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
1505 output_semantic_index[slot] = 0;
1506 break;
1507 case VARYING_SLOT_BFC1:
1508 output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR;
1509 output_semantic_index[slot] = 1;
1510 break;
1511 case VARYING_SLOT_FOGC:
1512 output_semantic_name[slot] = TGSI_SEMANTIC_FOG;
1513 output_semantic_index[slot] = 0;
1514 break;
1515 case VARYING_SLOT_PSIZ:
1516 output_semantic_name[slot] = TGSI_SEMANTIC_PSIZE;
1517 output_semantic_index[slot] = 0;
1518 break;
1519 case VARYING_SLOT_CLIP_VERTEX:
1520 output_semantic_name[slot] = TGSI_SEMANTIC_CLIPVERTEX;
1521 output_semantic_index[slot] = 0;
1522 break;
1523 case VARYING_SLOT_CLIP_DIST0:
1524 output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
1525 output_semantic_index[slot] = 0;
1526 break;
1527 case VARYING_SLOT_CLIP_DIST1:
1528 output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
1529 output_semantic_index[slot] = 1;
1530 break;
1531 case VARYING_SLOT_CULL_DIST0:
1532 case VARYING_SLOT_CULL_DIST1:
1533 /* these should have been lowered by GLSL */
1534 assert(0);
1535 break;
1536 case VARYING_SLOT_LAYER:
1537 output_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
1538 output_semantic_index[slot] = 0;
1539 break;
1540 case VARYING_SLOT_PRIMITIVE_ID:
1541 output_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
1542 output_semantic_index[slot] = 0;
1543 break;
1544 case VARYING_SLOT_VIEWPORT:
1545 output_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
1546 output_semantic_index[slot] = 0;
1547 break;
1548 case VARYING_SLOT_TESS_LEVEL_OUTER:
1549 output_semantic_name[slot] = TGSI_SEMANTIC_TESSOUTER;
1550 output_semantic_index[slot] = 0;
1551 break;
1552 case VARYING_SLOT_TESS_LEVEL_INNER:
1553 output_semantic_name[slot] = TGSI_SEMANTIC_TESSINNER;
1554 output_semantic_index[slot] = 0;
1555 break;
1556 case VARYING_SLOT_TEX0:
1557 case VARYING_SLOT_TEX1:
1558 case VARYING_SLOT_TEX2:
1559 case VARYING_SLOT_TEX3:
1560 case VARYING_SLOT_TEX4:
1561 case VARYING_SLOT_TEX5:
1562 case VARYING_SLOT_TEX6:
1563 case VARYING_SLOT_TEX7:
1564 if (st->needs_texcoord_semantic) {
1565 output_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
1566 output_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
1567 break;
1568 }
1569 /* fall through */
1570 case VARYING_SLOT_VAR0:
1571 default:
1572 assert(slot < ARRAY_SIZE(output_semantic_name));
1573 assert(attr >= VARYING_SLOT_VAR0 ||
1574 (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
1575 output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
1576 output_semantic_index[slot] =
1577 st_get_generic_varying_index(st, attr);
1578 break;
1579 }
1580 }
1581 }
1582
1583 /* Also add patch outputs. */
1584 for (attr = 0; attr < 32; attr++) {
1585 if (prog->info.patch_outputs_written & (1u << attr)) {
1586 GLuint slot = num_outputs++;
1587 GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1588
1589 outputMapping[patch_attr] = slot;
1590 outputSlotToAttr[slot] = patch_attr;
1591 output_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1592 output_semantic_index[slot] = attr;
1593 }
1594 }
1595
1596 st_translate_program(st->ctx,
1597 tgsi_processor,
1598 ureg,
1599 glsl_to_tgsi,
1600 prog,
1601 /* inputs */
1602 num_inputs,
1603 inputMapping,
1604 inputSlotToAttr,
1605 input_semantic_name,
1606 input_semantic_index,
1607 NULL,
1608 /* outputs */
1609 num_outputs,
1610 outputMapping,
1611 outputSlotToAttr,
1612 output_semantic_name,
1613 output_semantic_index);
1614
1615 unsigned num_tokens;
1616 out_state->tokens = ureg_get_tokens(ureg, &num_tokens);
1617 ureg_destroy(ureg);
1618
1619 st_translate_stream_output_info(glsl_to_tgsi,
1620 outputMapping,
1621 &out_state->stream_output);
1622
1623 st_store_tgsi_in_disk_cache(st, prog, out_state, num_tokens);
1624
1625 if ((ST_DEBUG & DEBUG_TGSI) && (ST_DEBUG & DEBUG_MESA)) {
1626 _mesa_print_program(prog);
1627 debug_printf("\n");
1628 }
1629
1630 if (ST_DEBUG & DEBUG_TGSI) {
1631 tgsi_dump(out_state->tokens, 0);
1632 debug_printf("\n");
1633 }
1634 }
1635
1636
1637 /**
1638 * Translate a geometry program to create a new variant.
1639 */
1640 bool
1641 st_translate_geometry_program(struct st_context *st,
1642 struct st_geometry_program *stgp)
1643 {
1644 struct ureg_program *ureg;
1645
1646 ureg = ureg_create_with_screen(PIPE_SHADER_GEOMETRY, st->pipe->screen);
1647 if (ureg == NULL)
1648 return false;
1649
1650 ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM,
1651 stgp->Base.info.gs.input_primitive);
1652 ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM,
1653 stgp->Base.info.gs.output_primitive);
1654 ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
1655 stgp->Base.info.gs.vertices_out);
1656 ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS,
1657 stgp->Base.info.gs.invocations);
1658
1659 st_translate_program_common(st, &stgp->Base, stgp->glsl_to_tgsi, ureg,
1660 PIPE_SHADER_GEOMETRY, &stgp->tgsi);
1661
1662 free_glsl_to_tgsi_visitor(stgp->glsl_to_tgsi);
1663 stgp->glsl_to_tgsi = NULL;
1664 return true;
1665 }
1666
1667
1668 /**
1669 * Get/create a basic program variant.
1670 */
1671 struct st_basic_variant *
1672 st_get_basic_variant(struct st_context *st,
1673 unsigned pipe_shader,
1674 struct pipe_shader_state *tgsi,
1675 struct st_basic_variant **variants)
1676 {
1677 struct pipe_context *pipe = st->pipe;
1678 struct st_basic_variant *v;
1679 struct st_basic_variant_key key;
1680
1681 memset(&key, 0, sizeof(key));
1682 key.st = st->has_shareable_shaders ? NULL : st;
1683
1684 /* Search for existing variant */
1685 for (v = *variants; v; v = v->next) {
1686 if (memcmp(&v->key, &key, sizeof(key)) == 0) {
1687 break;
1688 }
1689 }
1690
1691 if (!v) {
1692 /* create new */
1693 v = CALLOC_STRUCT(st_basic_variant);
1694 if (v) {
1695 /* fill in new variant */
1696 switch (pipe_shader) {
1697 case PIPE_SHADER_TESS_CTRL:
1698 v->driver_shader = pipe->create_tcs_state(pipe, tgsi);
1699 break;
1700 case PIPE_SHADER_TESS_EVAL:
1701 v->driver_shader = pipe->create_tes_state(pipe, tgsi);
1702 break;
1703 case PIPE_SHADER_GEOMETRY:
1704 v->driver_shader = pipe->create_gs_state(pipe, tgsi);
1705 break;
1706 default:
1707 assert(!"unhandled shader type");
1708 free(v);
1709 return NULL;
1710 }
1711
1712 v->key = key;
1713
1714 /* insert into list */
1715 v->next = *variants;
1716 *variants = v;
1717 }
1718 }
1719
1720 return v;
1721 }
1722
1723
1724 /**
1725 * Translate a tessellation control program to create a new variant.
1726 */
1727 bool
1728 st_translate_tessctrl_program(struct st_context *st,
1729 struct st_tessctrl_program *sttcp)
1730 {
1731 struct ureg_program *ureg;
1732
1733 ureg = ureg_create_with_screen(PIPE_SHADER_TESS_CTRL, st->pipe->screen);
1734 if (ureg == NULL)
1735 return false;
1736
1737 ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT,
1738 sttcp->Base.info.tess.tcs_vertices_out);
1739
1740 st_translate_program_common(st, &sttcp->Base, sttcp->glsl_to_tgsi, ureg,
1741 PIPE_SHADER_TESS_CTRL, &sttcp->tgsi);
1742
1743 free_glsl_to_tgsi_visitor(sttcp->glsl_to_tgsi);
1744 sttcp->glsl_to_tgsi = NULL;
1745 return true;
1746 }
1747
1748
1749 /**
1750 * Translate a tessellation evaluation program to create a new variant.
1751 */
1752 bool
1753 st_translate_tesseval_program(struct st_context *st,
1754 struct st_tesseval_program *sttep)
1755 {
1756 struct ureg_program *ureg;
1757
1758 ureg = ureg_create_with_screen(PIPE_SHADER_TESS_EVAL, st->pipe->screen);
1759 if (ureg == NULL)
1760 return false;
1761
1762 if (sttep->Base.info.tess.primitive_mode == GL_ISOLINES)
1763 ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, GL_LINES);
1764 else
1765 ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE,
1766 sttep->Base.info.tess.primitive_mode);
1767
1768 STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL);
1769 STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 ==
1770 PIPE_TESS_SPACING_FRACTIONAL_ODD);
1771 STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 ==
1772 PIPE_TESS_SPACING_FRACTIONAL_EVEN);
1773
1774 ureg_property(ureg, TGSI_PROPERTY_TES_SPACING,
1775 (sttep->Base.info.tess.spacing + 1) % 3);
1776
1777 ureg_property(ureg, TGSI_PROPERTY_TES_VERTEX_ORDER_CW,
1778 !sttep->Base.info.tess.ccw);
1779 ureg_property(ureg, TGSI_PROPERTY_TES_POINT_MODE,
1780 sttep->Base.info.tess.point_mode);
1781
1782 st_translate_program_common(st, &sttep->Base, sttep->glsl_to_tgsi,
1783 ureg, PIPE_SHADER_TESS_EVAL, &sttep->tgsi);
1784
1785 free_glsl_to_tgsi_visitor(sttep->glsl_to_tgsi);
1786 sttep->glsl_to_tgsi = NULL;
1787 return true;
1788 }
1789
1790
1791 /**
1792 * Translate a compute program to create a new variant.
1793 */
1794 bool
1795 st_translate_compute_program(struct st_context *st,
1796 struct st_compute_program *stcp)
1797 {
1798 struct ureg_program *ureg;
1799 struct pipe_shader_state prog;
1800
1801 ureg = ureg_create_with_screen(PIPE_SHADER_COMPUTE, st->pipe->screen);
1802 if (ureg == NULL)
1803 return false;
1804
1805 st_translate_program_common(st, &stcp->Base, stcp->glsl_to_tgsi, ureg,
1806 PIPE_SHADER_COMPUTE, &prog);
1807
1808 stcp->tgsi.ir_type = PIPE_SHADER_IR_TGSI;
1809 stcp->tgsi.prog = prog.tokens;
1810 stcp->tgsi.req_local_mem = stcp->Base.info.cs.shared_size;
1811 stcp->tgsi.req_private_mem = 0;
1812 stcp->tgsi.req_input_mem = 0;
1813
1814 free_glsl_to_tgsi_visitor(stcp->glsl_to_tgsi);
1815 stcp->glsl_to_tgsi = NULL;
1816 return true;
1817 }
1818
1819
1820 /**
1821 * Get/create compute program variant.
1822 */
1823 struct st_basic_variant *
1824 st_get_cp_variant(struct st_context *st,
1825 struct pipe_compute_state *tgsi,
1826 struct st_basic_variant **variants)
1827 {
1828 struct pipe_context *pipe = st->pipe;
1829 struct st_basic_variant *v;
1830 struct st_basic_variant_key key;
1831
1832 memset(&key, 0, sizeof(key));
1833 key.st = st->has_shareable_shaders ? NULL : st;
1834
1835 /* Search for existing variant */
1836 for (v = *variants; v; v = v->next) {
1837 if (memcmp(&v->key, &key, sizeof(key)) == 0) {
1838 break;
1839 }
1840 }
1841
1842 if (!v) {
1843 /* create new */
1844 v = CALLOC_STRUCT(st_basic_variant);
1845 if (v) {
1846 /* fill in new variant */
1847 v->driver_shader = pipe->create_compute_state(pipe, tgsi);
1848 v->key = key;
1849
1850 /* insert into list */
1851 v->next = *variants;
1852 *variants = v;
1853 }
1854 }
1855
1856 return v;
1857 }
1858
1859
1860 /**
1861 * Vert/Geom/Frag programs have per-context variants. Free all the
1862 * variants attached to the given program which match the given context.
1863 */
1864 static void
1865 destroy_program_variants(struct st_context *st, struct gl_program *target)
1866 {
1867 if (!target || target == &_mesa_DummyProgram)
1868 return;
1869
1870 switch (target->Target) {
1871 case GL_VERTEX_PROGRAM_ARB:
1872 {
1873 struct st_vertex_program *stvp = (struct st_vertex_program *) target;
1874 struct st_vp_variant *vpv, **prevPtr = &stvp->variants;
1875
1876 for (vpv = stvp->variants; vpv; ) {
1877 struct st_vp_variant *next = vpv->next;
1878 if (vpv->key.st == st) {
1879 /* unlink from list */
1880 *prevPtr = next;
1881 /* destroy this variant */
1882 delete_vp_variant(st, vpv);
1883 }
1884 else {
1885 prevPtr = &vpv->next;
1886 }
1887 vpv = next;
1888 }
1889 }
1890 break;
1891 case GL_FRAGMENT_PROGRAM_ARB:
1892 {
1893 struct st_fragment_program *stfp =
1894 (struct st_fragment_program *) target;
1895 struct st_fp_variant *fpv, **prevPtr = &stfp->variants;
1896
1897 for (fpv = stfp->variants; fpv; ) {
1898 struct st_fp_variant *next = fpv->next;
1899 if (fpv->key.st == st) {
1900 /* unlink from list */
1901 *prevPtr = next;
1902 /* destroy this variant */
1903 delete_fp_variant(st, fpv);
1904 }
1905 else {
1906 prevPtr = &fpv->next;
1907 }
1908 fpv = next;
1909 }
1910 }
1911 break;
1912 case GL_GEOMETRY_PROGRAM_NV:
1913 case GL_TESS_CONTROL_PROGRAM_NV:
1914 case GL_TESS_EVALUATION_PROGRAM_NV:
1915 case GL_COMPUTE_PROGRAM_NV:
1916 {
1917 struct st_geometry_program *gp = (struct st_geometry_program*)target;
1918 struct st_tessctrl_program *tcp = (struct st_tessctrl_program*)target;
1919 struct st_tesseval_program *tep = (struct st_tesseval_program*)target;
1920 struct st_compute_program *cp = (struct st_compute_program*)target;
1921 struct st_basic_variant **variants =
1922 target->Target == GL_GEOMETRY_PROGRAM_NV ? &gp->variants :
1923 target->Target == GL_TESS_CONTROL_PROGRAM_NV ? &tcp->variants :
1924 target->Target == GL_TESS_EVALUATION_PROGRAM_NV ? &tep->variants :
1925 target->Target == GL_COMPUTE_PROGRAM_NV ? &cp->variants :
1926 NULL;
1927 struct st_basic_variant *v, **prevPtr = variants;
1928
1929 for (v = *variants; v; ) {
1930 struct st_basic_variant *next = v->next;
1931 if (v->key.st == st) {
1932 /* unlink from list */
1933 *prevPtr = next;
1934 /* destroy this variant */
1935 delete_basic_variant(st, v, target->Target);
1936 }
1937 else {
1938 prevPtr = &v->next;
1939 }
1940 v = next;
1941 }
1942 }
1943 break;
1944 default:
1945 _mesa_problem(NULL, "Unexpected program target 0x%x in "
1946 "destroy_program_variants_cb()", target->Target);
1947 }
1948 }
1949
1950
1951 /**
1952 * Callback for _mesa_HashWalk. Free all the shader's program variants
1953 * which match the given context.
1954 */
1955 static void
1956 destroy_shader_program_variants_cb(GLuint key, void *data, void *userData)
1957 {
1958 struct st_context *st = (struct st_context *) userData;
1959 struct gl_shader *shader = (struct gl_shader *) data;
1960
1961 switch (shader->Type) {
1962 case GL_SHADER_PROGRAM_MESA:
1963 {
1964 struct gl_shader_program *shProg = (struct gl_shader_program *) data;
1965 GLuint i;
1966
1967 for (i = 0; i < ARRAY_SIZE(shProg->_LinkedShaders); i++) {
1968 if (shProg->_LinkedShaders[i])
1969 destroy_program_variants(st, shProg->_LinkedShaders[i]->Program);
1970 }
1971 }
1972 break;
1973 case GL_VERTEX_SHADER:
1974 case GL_FRAGMENT_SHADER:
1975 case GL_GEOMETRY_SHADER:
1976 case GL_TESS_CONTROL_SHADER:
1977 case GL_TESS_EVALUATION_SHADER:
1978 case GL_COMPUTE_SHADER:
1979 break;
1980 default:
1981 assert(0);
1982 }
1983 }
1984
1985
1986 /**
1987 * Callback for _mesa_HashWalk. Free all the program variants which match
1988 * the given context.
1989 */
1990 static void
1991 destroy_program_variants_cb(GLuint key, void *data, void *userData)
1992 {
1993 struct st_context *st = (struct st_context *) userData;
1994 struct gl_program *program = (struct gl_program *) data;
1995 destroy_program_variants(st, program);
1996 }
1997
1998
1999 /**
2000 * Walk over all shaders and programs to delete any variants which
2001 * belong to the given context.
2002 * This is called during context tear-down.
2003 */
2004 void
2005 st_destroy_program_variants(struct st_context *st)
2006 {
2007 /* If shaders can be shared with other contexts, the last context will
2008 * call DeleteProgram on all shaders, releasing everything.
2009 */
2010 if (st->has_shareable_shaders)
2011 return;
2012
2013 /* ARB vert/frag program */
2014 _mesa_HashWalk(st->ctx->Shared->Programs,
2015 destroy_program_variants_cb, st);
2016
2017 /* GLSL vert/frag/geom shaders */
2018 _mesa_HashWalk(st->ctx->Shared->ShaderObjects,
2019 destroy_shader_program_variants_cb, st);
2020 }
2021
2022
2023 /**
2024 * For debugging, print/dump the current vertex program.
2025 */
2026 void
2027 st_print_current_vertex_program(void)
2028 {
2029 GET_CURRENT_CONTEXT(ctx);
2030
2031 if (ctx->VertexProgram._Current) {
2032 struct st_vertex_program *stvp =
2033 (struct st_vertex_program *) ctx->VertexProgram._Current;
2034 struct st_vp_variant *stv;
2035
2036 debug_printf("Vertex program %u\n", stvp->Base.Id);
2037
2038 for (stv = stvp->variants; stv; stv = stv->next) {
2039 debug_printf("variant %p\n", stv);
2040 tgsi_dump(stv->tgsi.tokens, 0);
2041 }
2042 }
2043 }
2044
2045
2046 /**
2047 * Compile one shader variant.
2048 */
2049 void
2050 st_precompile_shader_variant(struct st_context *st,
2051 struct gl_program *prog)
2052 {
2053 switch (prog->Target) {
2054 case GL_VERTEX_PROGRAM_ARB: {
2055 struct st_vertex_program *p = (struct st_vertex_program *)prog;
2056 struct st_vp_variant_key key;
2057
2058 memset(&key, 0, sizeof(key));
2059 key.st = st->has_shareable_shaders ? NULL : st;
2060 st_get_vp_variant(st, p, &key);
2061 break;
2062 }
2063
2064 case GL_TESS_CONTROL_PROGRAM_NV: {
2065 struct st_tessctrl_program *p = (struct st_tessctrl_program *)prog;
2066 st_get_basic_variant(st, PIPE_SHADER_TESS_CTRL, &p->tgsi, &p->variants);
2067 break;
2068 }
2069
2070 case GL_TESS_EVALUATION_PROGRAM_NV: {
2071 struct st_tesseval_program *p = (struct st_tesseval_program *)prog;
2072 st_get_basic_variant(st, PIPE_SHADER_TESS_EVAL, &p->tgsi, &p->variants);
2073 break;
2074 }
2075
2076 case GL_GEOMETRY_PROGRAM_NV: {
2077 struct st_geometry_program *p = (struct st_geometry_program *)prog;
2078 st_get_basic_variant(st, PIPE_SHADER_GEOMETRY, &p->tgsi, &p->variants);
2079 break;
2080 }
2081
2082 case GL_FRAGMENT_PROGRAM_ARB: {
2083 struct st_fragment_program *p = (struct st_fragment_program *)prog;
2084 struct st_fp_variant_key key;
2085
2086 memset(&key, 0, sizeof(key));
2087 key.st = st->has_shareable_shaders ? NULL : st;
2088 st_get_fp_variant(st, p, &key);
2089 break;
2090 }
2091
2092 case GL_COMPUTE_PROGRAM_NV: {
2093 struct st_compute_program *p = (struct st_compute_program *)prog;
2094 st_get_cp_variant(st, &p->tgsi, &p->variants);
2095 break;
2096 }
2097
2098 default:
2099 assert(0);
2100 }
2101 }