st/mesa: use a separate VS variant for the draw module
[mesa.git] / src / mesa / state_tracker / st_program.c
1 /**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 * Brian Paul
31 */
32
33
34 #include "main/errors.h"
35 #include "main/imports.h"
36 #include "main/hash.h"
37 #include "main/mtypes.h"
38 #include "program/prog_parameter.h"
39 #include "program/prog_print.h"
40 #include "program/prog_to_nir.h"
41 #include "program/programopt.h"
42
43 #include "compiler/nir/nir.h"
44 #include "draw/draw_context.h"
45
46 #include "pipe/p_context.h"
47 #include "pipe/p_defines.h"
48 #include "pipe/p_shader_tokens.h"
49 #include "draw/draw_context.h"
50 #include "tgsi/tgsi_dump.h"
51 #include "tgsi/tgsi_emulate.h"
52 #include "tgsi/tgsi_parse.h"
53 #include "tgsi/tgsi_ureg.h"
54
55 #include "st_debug.h"
56 #include "st_cb_bitmap.h"
57 #include "st_cb_drawpixels.h"
58 #include "st_context.h"
59 #include "st_tgsi_lower_depth_clamp.h"
60 #include "st_tgsi_lower_yuv.h"
61 #include "st_program.h"
62 #include "st_mesa_to_tgsi.h"
63 #include "st_atifs_to_tgsi.h"
64 #include "st_nir.h"
65 #include "st_shader_cache.h"
66 #include "st_util.h"
67 #include "cso_cache/cso_context.h"
68
69
70
71 static void
72 set_affected_state_flags(uint64_t *states,
73 struct gl_program *prog,
74 uint64_t new_constants,
75 uint64_t new_sampler_views,
76 uint64_t new_samplers,
77 uint64_t new_images,
78 uint64_t new_ubos,
79 uint64_t new_ssbos,
80 uint64_t new_atomics)
81 {
82 if (prog->Parameters->NumParameters)
83 *states |= new_constants;
84
85 if (prog->info.num_textures)
86 *states |= new_sampler_views | new_samplers;
87
88 if (prog->info.num_images)
89 *states |= new_images;
90
91 if (prog->info.num_ubos)
92 *states |= new_ubos;
93
94 if (prog->info.num_ssbos)
95 *states |= new_ssbos;
96
97 if (prog->info.num_abos)
98 *states |= new_atomics;
99 }
100
101 /**
102 * This determines which states will be updated when the shader is bound.
103 */
104 void
105 st_set_prog_affected_state_flags(struct gl_program *prog)
106 {
107 uint64_t *states;
108
109 switch (prog->info.stage) {
110 case MESA_SHADER_VERTEX:
111 states = &((struct st_program*)prog)->affected_states;
112
113 *states = ST_NEW_VS_STATE |
114 ST_NEW_RASTERIZER |
115 ST_NEW_VERTEX_ARRAYS;
116
117 set_affected_state_flags(states, prog,
118 ST_NEW_VS_CONSTANTS,
119 ST_NEW_VS_SAMPLER_VIEWS,
120 ST_NEW_VS_SAMPLERS,
121 ST_NEW_VS_IMAGES,
122 ST_NEW_VS_UBOS,
123 ST_NEW_VS_SSBOS,
124 ST_NEW_VS_ATOMICS);
125 break;
126
127 case MESA_SHADER_TESS_CTRL:
128 states = &(st_program(prog))->affected_states;
129
130 *states = ST_NEW_TCS_STATE;
131
132 set_affected_state_flags(states, prog,
133 ST_NEW_TCS_CONSTANTS,
134 ST_NEW_TCS_SAMPLER_VIEWS,
135 ST_NEW_TCS_SAMPLERS,
136 ST_NEW_TCS_IMAGES,
137 ST_NEW_TCS_UBOS,
138 ST_NEW_TCS_SSBOS,
139 ST_NEW_TCS_ATOMICS);
140 break;
141
142 case MESA_SHADER_TESS_EVAL:
143 states = &(st_program(prog))->affected_states;
144
145 *states = ST_NEW_TES_STATE |
146 ST_NEW_RASTERIZER;
147
148 set_affected_state_flags(states, prog,
149 ST_NEW_TES_CONSTANTS,
150 ST_NEW_TES_SAMPLER_VIEWS,
151 ST_NEW_TES_SAMPLERS,
152 ST_NEW_TES_IMAGES,
153 ST_NEW_TES_UBOS,
154 ST_NEW_TES_SSBOS,
155 ST_NEW_TES_ATOMICS);
156 break;
157
158 case MESA_SHADER_GEOMETRY:
159 states = &(st_program(prog))->affected_states;
160
161 *states = ST_NEW_GS_STATE |
162 ST_NEW_RASTERIZER;
163
164 set_affected_state_flags(states, prog,
165 ST_NEW_GS_CONSTANTS,
166 ST_NEW_GS_SAMPLER_VIEWS,
167 ST_NEW_GS_SAMPLERS,
168 ST_NEW_GS_IMAGES,
169 ST_NEW_GS_UBOS,
170 ST_NEW_GS_SSBOS,
171 ST_NEW_GS_ATOMICS);
172 break;
173
174 case MESA_SHADER_FRAGMENT:
175 states = &((struct st_program*)prog)->affected_states;
176
177 /* gl_FragCoord and glDrawPixels always use constants. */
178 *states = ST_NEW_FS_STATE |
179 ST_NEW_SAMPLE_SHADING |
180 ST_NEW_FS_CONSTANTS;
181
182 set_affected_state_flags(states, prog,
183 ST_NEW_FS_CONSTANTS,
184 ST_NEW_FS_SAMPLER_VIEWS,
185 ST_NEW_FS_SAMPLERS,
186 ST_NEW_FS_IMAGES,
187 ST_NEW_FS_UBOS,
188 ST_NEW_FS_SSBOS,
189 ST_NEW_FS_ATOMICS);
190 break;
191
192 case MESA_SHADER_COMPUTE:
193 states = &((struct st_program*)prog)->affected_states;
194
195 *states = ST_NEW_CS_STATE;
196
197 set_affected_state_flags(states, prog,
198 ST_NEW_CS_CONSTANTS,
199 ST_NEW_CS_SAMPLER_VIEWS,
200 ST_NEW_CS_SAMPLERS,
201 ST_NEW_CS_IMAGES,
202 ST_NEW_CS_UBOS,
203 ST_NEW_CS_SSBOS,
204 ST_NEW_CS_ATOMICS);
205 break;
206
207 default:
208 unreachable("unhandled shader stage");
209 }
210 }
211
212
213 /**
214 * Delete a shader variant. Note the caller must unlink the variant from
215 * the linked list.
216 */
217 static void
218 delete_variant(struct st_context *st, struct st_variant *v, GLenum target)
219 {
220 if (v->driver_shader) {
221 if (st->has_shareable_shaders || v->st == st) {
222 /* The shader's context matches the calling context, or we
223 * don't care.
224 */
225 switch (target) {
226 case GL_VERTEX_PROGRAM_ARB:
227 cso_delete_vertex_shader(st->cso_context, v->driver_shader);
228 break;
229 case GL_TESS_CONTROL_PROGRAM_NV:
230 cso_delete_tessctrl_shader(st->cso_context, v->driver_shader);
231 break;
232 case GL_TESS_EVALUATION_PROGRAM_NV:
233 cso_delete_tesseval_shader(st->cso_context, v->driver_shader);
234 break;
235 case GL_GEOMETRY_PROGRAM_NV:
236 cso_delete_geometry_shader(st->cso_context, v->driver_shader);
237 break;
238 case GL_FRAGMENT_PROGRAM_ARB:
239 cso_delete_fragment_shader(st->cso_context, v->driver_shader);
240 break;
241 case GL_COMPUTE_PROGRAM_NV:
242 cso_delete_compute_shader(st->cso_context, v->driver_shader);
243 break;
244 default:
245 unreachable("bad shader type in delete_basic_variant");
246 }
247 } else {
248 /* We can't delete a shader with a context different from the one
249 * that created it. Add it to the creating context's zombie list.
250 */
251 enum pipe_shader_type type =
252 pipe_shader_type_from_mesa(_mesa_program_enum_to_shader_stage(target));
253
254 st_save_zombie_shader(v->st, type, v->driver_shader);
255 }
256 }
257
258 free(v);
259 }
260
261
262 /**
263 * Free all basic program variants.
264 */
265 void
266 st_release_variants(struct st_context *st, struct st_program *p)
267 {
268 struct st_variant *v;
269
270 for (v = p->variants; v; ) {
271 struct st_variant *next = v->next;
272 delete_variant(st, v, p->Base.Target);
273 v = next;
274 }
275
276 p->variants = NULL;
277
278 if (p->state.tokens) {
279 ureg_free_tokens(p->state.tokens);
280 p->state.tokens = NULL;
281 }
282
283 /* Note: Any setup of ->ir.nir that has had pipe->create_*_state called on
284 * it has resulted in the driver taking ownership of the NIR. Those
285 * callers should be NULLing out the nir field in any pipe_shader_state
286 * that might have this called in order to indicate that.
287 *
288 * GLSL IR and ARB programs will have set gl_program->nir to the same
289 * shader as ir->ir.nir, so it will be freed by _mesa_delete_program().
290 */
291 }
292
293 void
294 st_finalize_nir_before_variants(struct nir_shader *nir)
295 {
296 NIR_PASS_V(nir, nir_opt_access);
297
298 NIR_PASS_V(nir, nir_split_var_copies);
299 NIR_PASS_V(nir, nir_lower_var_copies);
300 if (nir->options->lower_all_io_to_temps ||
301 nir->options->lower_all_io_to_elements ||
302 nir->info.stage == MESA_SHADER_VERTEX ||
303 nir->info.stage == MESA_SHADER_GEOMETRY) {
304 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
305 } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
306 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
307 }
308
309 st_nir_assign_vs_in_locations(nir);
310 }
311
312 /**
313 * Translate ARB (asm) program to NIR
314 */
315 static nir_shader *
316 st_translate_prog_to_nir(struct st_context *st, struct gl_program *prog,
317 gl_shader_stage stage)
318 {
319 struct pipe_screen *screen = st->pipe->screen;
320 const struct gl_shader_compiler_options *options =
321 &st->ctx->Const.ShaderCompilerOptions[stage];
322
323 /* Translate to NIR */
324 nir_shader *nir = prog_to_nir(prog, options->NirOptions);
325 NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
326 nir_validate_shader(nir, "after st/ptn lower_regs_to_ssa");
327
328 NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, prog, screen);
329 NIR_PASS_V(nir, nir_lower_system_values);
330
331 /* Optimise NIR */
332 NIR_PASS_V(nir, nir_opt_constant_folding);
333 st_nir_opts(nir);
334 st_finalize_nir_before_variants(nir);
335
336 if (st->allow_st_finalize_nir_twice)
337 st_finalize_nir(st, prog, NULL, nir, true);
338
339 nir_validate_shader(nir, "after st/glsl finalize_nir");
340
341 return nir;
342 }
343
344 void
345 st_prepare_vertex_program(struct st_program *stp)
346 {
347 struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
348
349 stvp->num_inputs = 0;
350 memset(stvp->input_to_index, ~0, sizeof(stvp->input_to_index));
351 memset(stvp->result_to_output, ~0, sizeof(stvp->result_to_output));
352
353 /* Determine number of inputs, the mappings between VERT_ATTRIB_x
354 * and TGSI generic input indexes, plus input attrib semantic info.
355 */
356 for (unsigned attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
357 if ((stp->Base.info.inputs_read & BITFIELD64_BIT(attr)) != 0) {
358 stvp->input_to_index[attr] = stvp->num_inputs;
359 stvp->index_to_input[stvp->num_inputs] = attr;
360 stvp->num_inputs++;
361
362 if ((stp->Base.DualSlotInputs & BITFIELD64_BIT(attr)) != 0) {
363 /* add placeholder for second part of a double attribute */
364 stvp->index_to_input[stvp->num_inputs] = ST_DOUBLE_ATTRIB_PLACEHOLDER;
365 stvp->num_inputs++;
366 }
367 }
368 }
369 /* pre-setup potentially unused edgeflag input */
370 stvp->input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
371 stvp->index_to_input[stvp->num_inputs] = VERT_ATTRIB_EDGEFLAG;
372
373 /* Compute mapping of vertex program outputs to slots. */
374 unsigned num_outputs = 0;
375 for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
376 if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr))
377 stvp->result_to_output[attr] = num_outputs++;
378 }
379 /* pre-setup potentially unused edgeflag output */
380 stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs;
381 }
382
383 void
384 st_translate_stream_output_info(struct gl_program *prog)
385 {
386 struct gl_transform_feedback_info *info = prog->sh.LinkedTransformFeedback;
387 if (!info)
388 return;
389
390 /* Determine the (default) output register mapping for each output. */
391 unsigned num_outputs = 0;
392 ubyte output_mapping[VARYING_SLOT_TESS_MAX];
393 memset(output_mapping, 0, sizeof(output_mapping));
394
395 for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
396 if (prog->info.outputs_written & BITFIELD64_BIT(attr))
397 output_mapping[attr] = num_outputs++;
398 }
399
400 /* Translate stream output info. */
401 struct pipe_stream_output_info *so_info =
402 &((struct st_program*)prog)->state.stream_output;
403
404 for (unsigned i = 0; i < info->NumOutputs; i++) {
405 so_info->output[i].register_index =
406 output_mapping[info->Outputs[i].OutputRegister];
407 so_info->output[i].start_component = info->Outputs[i].ComponentOffset;
408 so_info->output[i].num_components = info->Outputs[i].NumComponents;
409 so_info->output[i].output_buffer = info->Outputs[i].OutputBuffer;
410 so_info->output[i].dst_offset = info->Outputs[i].DstOffset;
411 so_info->output[i].stream = info->Outputs[i].StreamId;
412 }
413
414 for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
415 so_info->stride[i] = info->Buffers[i].Stride;
416 }
417 so_info->num_outputs = info->NumOutputs;
418 }
419
420 /**
421 * Translate a vertex program.
422 */
423 bool
424 st_translate_vertex_program(struct st_context *st,
425 struct st_program *stp)
426 {
427 struct ureg_program *ureg;
428 enum pipe_error error;
429 unsigned num_outputs = 0;
430 unsigned attr;
431 ubyte output_semantic_name[VARYING_SLOT_MAX] = {0};
432 ubyte output_semantic_index[VARYING_SLOT_MAX] = {0};
433
434 if (stp->Base.arb.IsPositionInvariant)
435 _mesa_insert_mvp_code(st->ctx, &stp->Base);
436
437 st_prepare_vertex_program(stp);
438
439 /* ARB_vp: */
440 if (!stp->glsl_to_tgsi) {
441 _mesa_remove_output_reads(&stp->Base, PROGRAM_OUTPUT);
442
443 /* This determines which states will be updated when the assembly
444 * shader is bound.
445 */
446 stp->affected_states = ST_NEW_VS_STATE |
447 ST_NEW_RASTERIZER |
448 ST_NEW_VERTEX_ARRAYS;
449
450 if (stp->Base.Parameters->NumParameters)
451 stp->affected_states |= ST_NEW_VS_CONSTANTS;
452
453 /* No samplers are allowed in ARB_vp. */
454 }
455
456 /* Get semantic names and indices. */
457 for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
458 if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr)) {
459 unsigned slot = num_outputs++;
460 unsigned semantic_name, semantic_index;
461 tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
462 &semantic_name, &semantic_index);
463 output_semantic_name[slot] = semantic_name;
464 output_semantic_index[slot] = semantic_index;
465 }
466 }
467 /* pre-setup potentially unused edgeflag output */
468 output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
469 output_semantic_index[num_outputs] = 0;
470
471 ureg = ureg_create_with_screen(PIPE_SHADER_VERTEX, st->pipe->screen);
472 if (ureg == NULL)
473 return false;
474
475 if (stp->Base.info.clip_distance_array_size)
476 ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
477 stp->Base.info.clip_distance_array_size);
478 if (stp->Base.info.cull_distance_array_size)
479 ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
480 stp->Base.info.cull_distance_array_size);
481
482 if (ST_DEBUG & DEBUG_MESA) {
483 _mesa_print_program(&stp->Base);
484 _mesa_print_program_parameters(st->ctx, &stp->Base);
485 debug_printf("\n");
486 }
487
488 struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
489
490 if (stp->glsl_to_tgsi) {
491 error = st_translate_program(st->ctx,
492 PIPE_SHADER_VERTEX,
493 ureg,
494 stp->glsl_to_tgsi,
495 &stp->Base,
496 /* inputs */
497 stvp->num_inputs,
498 stvp->input_to_index,
499 NULL, /* inputSlotToAttr */
500 NULL, /* input semantic name */
501 NULL, /* input semantic index */
502 NULL, /* interp mode */
503 /* outputs */
504 num_outputs,
505 stvp->result_to_output,
506 output_semantic_name,
507 output_semantic_index);
508
509 st_translate_stream_output_info(&stp->Base);
510
511 free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
512 } else
513 error = st_translate_mesa_program(st->ctx,
514 PIPE_SHADER_VERTEX,
515 ureg,
516 &stp->Base,
517 /* inputs */
518 stvp->num_inputs,
519 stvp->input_to_index,
520 NULL, /* input semantic name */
521 NULL, /* input semantic index */
522 NULL,
523 /* outputs */
524 num_outputs,
525 stvp->result_to_output,
526 output_semantic_name,
527 output_semantic_index);
528
529 if (error) {
530 debug_printf("%s: failed to translate Mesa program:\n", __func__);
531 _mesa_print_program(&stp->Base);
532 debug_assert(0);
533 return false;
534 }
535
536 stp->state.tokens = ureg_get_tokens(ureg, NULL);
537 ureg_destroy(ureg);
538
539 if (stp->glsl_to_tgsi) {
540 stp->glsl_to_tgsi = NULL;
541 st_store_ir_in_disk_cache(st, &stp->Base, false);
542 }
543
544 /* Translate to NIR.
545 *
546 * This must be done after the translation to TGSI is done, because
547 * we'll pass the NIR shader to the driver and the TGSI version to
548 * the draw module for the select/feedback/rasterpos code.
549 */
550 if (st->pipe->screen->get_shader_param(st->pipe->screen,
551 PIPE_SHADER_VERTEX,
552 PIPE_SHADER_CAP_PREFERRED_IR)) {
553 assert(!stp->glsl_to_tgsi);
554
555 nir_shader *nir =
556 st_translate_prog_to_nir(st, &stp->Base, MESA_SHADER_VERTEX);
557
558 if (stp->Base.nir)
559 ralloc_free(stp->Base.nir);
560 stp->state.type = PIPE_SHADER_IR_NIR;
561 stp->Base.nir = nir;
562 return true;
563 }
564
565 return stp->state.tokens != NULL;
566 }
567
568 static const gl_state_index16 depth_range_state[STATE_LENGTH] =
569 { STATE_DEPTH_RANGE };
570
571 static struct st_vp_variant *
572 st_create_vp_variant(struct st_context *st,
573 struct st_program *stvp,
574 const struct st_common_variant_key *key)
575 {
576 struct st_vp_variant *vpv = CALLOC_STRUCT(st_vp_variant);
577 struct pipe_context *pipe = st->pipe;
578 struct pipe_screen *screen = pipe->screen;
579 struct pipe_shader_state state = {0};
580
581 static const gl_state_index16 point_size_state[STATE_LENGTH] =
582 { STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED, 0 };
583 struct gl_program_parameter_list *params = stvp->Base.Parameters;
584
585 vpv->key = *key;
586 vpv->num_inputs = ((struct st_vertex_program*)stvp)->num_inputs;
587
588 state.stream_output = stvp->state.stream_output;
589
590 if (stvp->state.type == PIPE_SHADER_IR_NIR) {
591 bool finalize = false;
592
593 state.type = PIPE_SHADER_IR_NIR;
594 state.ir.nir = nir_shader_clone(NULL, stvp->Base.nir);
595 if (key->clamp_color) {
596 NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
597 finalize = true;
598 }
599 if (key->passthrough_edgeflags) {
600 NIR_PASS_V(state.ir.nir, nir_lower_passthrough_edgeflags);
601 vpv->num_inputs++;
602 finalize = true;
603 }
604
605 if (key->lower_point_size) {
606 _mesa_add_state_reference(params, point_size_state);
607 NIR_PASS_V(state.ir.nir, nir_lower_point_size_mov,
608 point_size_state);
609 finalize = true;
610 }
611
612 if (key->lower_ucp) {
613 bool can_compact = screen->get_param(screen,
614 PIPE_CAP_NIR_COMPACT_ARRAYS);
615
616 bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;
617 gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH];
618 for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
619 if (use_eye) {
620 clipplane_state[i][0] = STATE_CLIPPLANE;
621 clipplane_state[i][1] = i;
622 } else {
623 clipplane_state[i][0] = STATE_INTERNAL;
624 clipplane_state[i][1] = STATE_CLIP_INTERNAL;
625 clipplane_state[i][2] = i;
626 }
627 _mesa_add_state_reference(params, clipplane_state[i]);
628 }
629
630 NIR_PASS_V(state.ir.nir, nir_lower_clip_vs, key->lower_ucp,
631 true, can_compact, clipplane_state);
632 NIR_PASS_V(state.ir.nir, nir_lower_io_to_temporaries,
633 nir_shader_get_entrypoint(state.ir.nir), true, false);
634 NIR_PASS_V(state.ir.nir, nir_lower_global_vars_to_local);
635 finalize = true;
636 }
637
638 if (finalize || !st->allow_st_finalize_nir_twice) {
639 st_finalize_nir(st, &stvp->Base, stvp->shader_program, state.ir.nir,
640 true);
641
642 /* Some of the lowering above may have introduced new varyings */
643 nir_shader_gather_info(state.ir.nir,
644 nir_shader_get_entrypoint(state.ir.nir));
645 }
646
647 if (ST_DEBUG & DEBUG_PRINT_IR)
648 nir_print_shader(state.ir.nir, stderr);
649
650 if (key->is_draw_shader)
651 vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
652 else
653 vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
654
655 return vpv;
656 }
657
658 state.type = PIPE_SHADER_IR_TGSI;
659 state.tokens = tgsi_dup_tokens(stvp->state.tokens);
660
661 /* Emulate features. */
662 if (key->clamp_color || key->passthrough_edgeflags) {
663 const struct tgsi_token *tokens;
664 unsigned flags =
665 (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
666 (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
667
668 tokens = tgsi_emulate(state.tokens, flags);
669
670 if (tokens) {
671 tgsi_free_tokens(state.tokens);
672 state.tokens = tokens;
673
674 if (key->passthrough_edgeflags)
675 vpv->num_inputs++;
676 } else
677 fprintf(stderr, "mesa: cannot emulate deprecated features\n");
678 }
679
680 if (key->lower_depth_clamp) {
681 unsigned depth_range_const =
682 _mesa_add_state_reference(params, depth_range_state);
683
684 const struct tgsi_token *tokens;
685 tokens = st_tgsi_lower_depth_clamp(state.tokens, depth_range_const,
686 key->clip_negative_one_to_one);
687 if (tokens != state.tokens)
688 tgsi_free_tokens(state.tokens);
689 state.tokens = tokens;
690 }
691
692 if (ST_DEBUG & DEBUG_PRINT_IR)
693 tgsi_dump(state.tokens, 0);
694
695 if (key->is_draw_shader)
696 vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
697 else
698 vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
699
700 return vpv;
701 }
702
703
704 /**
705 * Find/create a vertex program variant.
706 */
707 struct st_vp_variant *
708 st_get_vp_variant(struct st_context *st,
709 struct st_program *stp,
710 const struct st_common_variant_key *key)
711 {
712 struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
713 struct st_vp_variant *vpv;
714
715 /* Search for existing variant */
716 for (vpv = st_vp_variant(stp->variants); vpv;
717 vpv = st_vp_variant(vpv->base.next)) {
718 if (memcmp(&vpv->key, key, sizeof(*key)) == 0) {
719 break;
720 }
721 }
722
723 if (!vpv) {
724 /* create now */
725 vpv = st_create_vp_variant(st, stp, key);
726 if (vpv) {
727 vpv->base.st = key->st;
728
729 for (unsigned index = 0; index < vpv->num_inputs; ++index) {
730 unsigned attr = stvp->index_to_input[index];
731 if (attr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
732 continue;
733 vpv->vert_attrib_mask |= 1u << attr;
734 }
735
736 /* insert into list */
737 vpv->base.next = stp->variants;
738 stp->variants = &vpv->base;
739 }
740 }
741
742 return vpv;
743 }
744
745
746 /**
747 * Translate a Mesa fragment shader into a TGSI shader.
748 */
749 bool
750 st_translate_fragment_program(struct st_context *st,
751 struct st_program *stfp)
752 {
753 /* Non-GLSL programs: */
754 if (!stfp->glsl_to_tgsi) {
755 _mesa_remove_output_reads(&stfp->Base, PROGRAM_OUTPUT);
756 if (st->ctx->Const.GLSLFragCoordIsSysVal)
757 _mesa_program_fragment_position_to_sysval(&stfp->Base);
758
759 /* This determines which states will be updated when the assembly
760 * shader is bound.
761 *
762 * fragment.position and glDrawPixels always use constants.
763 */
764 stfp->affected_states = ST_NEW_FS_STATE |
765 ST_NEW_SAMPLE_SHADING |
766 ST_NEW_FS_CONSTANTS;
767
768 if (stfp->ati_fs) {
769 /* Just set them for ATI_fs unconditionally. */
770 stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
771 ST_NEW_FS_SAMPLERS;
772 } else {
773 /* ARB_fp */
774 if (stfp->Base.SamplersUsed)
775 stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
776 ST_NEW_FS_SAMPLERS;
777 }
778
779 /* Translate to NIR. */
780 if (!stfp->ati_fs &&
781 st->pipe->screen->get_shader_param(st->pipe->screen,
782 PIPE_SHADER_FRAGMENT,
783 PIPE_SHADER_CAP_PREFERRED_IR)) {
784 nir_shader *nir =
785 st_translate_prog_to_nir(st, &stfp->Base, MESA_SHADER_FRAGMENT);
786
787 if (stfp->Base.nir)
788 ralloc_free(stfp->Base.nir);
789 stfp->state.type = PIPE_SHADER_IR_NIR;
790 stfp->Base.nir = nir;
791 return true;
792 }
793 }
794
795 ubyte outputMapping[2 * FRAG_RESULT_MAX];
796 ubyte inputMapping[VARYING_SLOT_MAX];
797 ubyte inputSlotToAttr[VARYING_SLOT_MAX];
798 ubyte interpMode[PIPE_MAX_SHADER_INPUTS]; /* XXX size? */
799 GLuint attr;
800 GLbitfield64 inputsRead;
801 struct ureg_program *ureg;
802
803 GLboolean write_all = GL_FALSE;
804
805 ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
806 ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
807 uint fs_num_inputs = 0;
808
809 ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
810 ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
811 uint fs_num_outputs = 0;
812
813 memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
814
815 /*
816 * Convert Mesa program inputs to TGSI input register semantics.
817 */
818 inputsRead = stfp->Base.info.inputs_read;
819 for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
820 if ((inputsRead & BITFIELD64_BIT(attr)) != 0) {
821 const GLuint slot = fs_num_inputs++;
822
823 inputMapping[attr] = slot;
824 inputSlotToAttr[slot] = attr;
825
826 switch (attr) {
827 case VARYING_SLOT_POS:
828 input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
829 input_semantic_index[slot] = 0;
830 interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
831 break;
832 case VARYING_SLOT_COL0:
833 input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
834 input_semantic_index[slot] = 0;
835 interpMode[slot] = stfp->glsl_to_tgsi ?
836 TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
837 break;
838 case VARYING_SLOT_COL1:
839 input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
840 input_semantic_index[slot] = 1;
841 interpMode[slot] = stfp->glsl_to_tgsi ?
842 TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
843 break;
844 case VARYING_SLOT_FOGC:
845 input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
846 input_semantic_index[slot] = 0;
847 interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
848 break;
849 case VARYING_SLOT_FACE:
850 input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
851 input_semantic_index[slot] = 0;
852 interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
853 break;
854 case VARYING_SLOT_PRIMITIVE_ID:
855 input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
856 input_semantic_index[slot] = 0;
857 interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
858 break;
859 case VARYING_SLOT_LAYER:
860 input_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
861 input_semantic_index[slot] = 0;
862 interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
863 break;
864 case VARYING_SLOT_VIEWPORT:
865 input_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
866 input_semantic_index[slot] = 0;
867 interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
868 break;
869 case VARYING_SLOT_CLIP_DIST0:
870 input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
871 input_semantic_index[slot] = 0;
872 interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
873 break;
874 case VARYING_SLOT_CLIP_DIST1:
875 input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
876 input_semantic_index[slot] = 1;
877 interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
878 break;
879 case VARYING_SLOT_CULL_DIST0:
880 case VARYING_SLOT_CULL_DIST1:
881 /* these should have been lowered by GLSL */
882 assert(0);
883 break;
884 /* In most cases, there is nothing special about these
885 * inputs, so adopt a convention to use the generic
886 * semantic name and the mesa VARYING_SLOT_ number as the
887 * index.
888 *
889 * All that is required is that the vertex shader labels
890 * its own outputs similarly, and that the vertex shader
891 * generates at least every output required by the
892 * fragment shader plus fixed-function hardware (such as
893 * BFC).
894 *
895 * However, some drivers may need us to identify the PNTC and TEXi
896 * varyings if, for example, their capability to replace them with
897 * sprite coordinates is limited.
898 */
899 case VARYING_SLOT_PNTC:
900 if (st->needs_texcoord_semantic) {
901 input_semantic_name[slot] = TGSI_SEMANTIC_PCOORD;
902 input_semantic_index[slot] = 0;
903 interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
904 break;
905 }
906 /* fall through */
907 case VARYING_SLOT_TEX0:
908 case VARYING_SLOT_TEX1:
909 case VARYING_SLOT_TEX2:
910 case VARYING_SLOT_TEX3:
911 case VARYING_SLOT_TEX4:
912 case VARYING_SLOT_TEX5:
913 case VARYING_SLOT_TEX6:
914 case VARYING_SLOT_TEX7:
915 if (st->needs_texcoord_semantic) {
916 input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
917 input_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
918 interpMode[slot] = stfp->glsl_to_tgsi ?
919 TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
920 break;
921 }
922 /* fall through */
923 case VARYING_SLOT_VAR0:
924 default:
925 /* Semantic indices should be zero-based because drivers may choose
926 * to assign a fixed slot determined by that index.
927 * This is useful because ARB_separate_shader_objects uses location
928 * qualifiers for linkage, and if the semantic index corresponds to
929 * these locations, linkage passes in the driver become unecessary.
930 *
931 * If needs_texcoord_semantic is true, no semantic indices will be
932 * consumed for the TEXi varyings, and we can base the locations of
933 * the user varyings on VAR0. Otherwise, we use TEX0 as base index.
934 */
935 assert(attr >= VARYING_SLOT_VAR0 || attr == VARYING_SLOT_PNTC ||
936 (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
937 input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
938 input_semantic_index[slot] = st_get_generic_varying_index(st, attr);
939 if (attr == VARYING_SLOT_PNTC)
940 interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
941 else {
942 interpMode[slot] = stfp->glsl_to_tgsi ?
943 TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
944 }
945 break;
946 }
947 }
948 else {
949 inputMapping[attr] = -1;
950 }
951 }
952
953 /*
954 * Semantics and mapping for outputs
955 */
956 GLbitfield64 outputsWritten = stfp->Base.info.outputs_written;
957
958 /* if z is written, emit that first */
959 if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
960 fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
961 fs_output_semantic_index[fs_num_outputs] = 0;
962 outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
963 fs_num_outputs++;
964 outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
965 }
966
967 if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
968 fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
969 fs_output_semantic_index[fs_num_outputs] = 0;
970 outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
971 fs_num_outputs++;
972 outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
973 }
974
975 if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
976 fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_SAMPLEMASK;
977 fs_output_semantic_index[fs_num_outputs] = 0;
978 outputMapping[FRAG_RESULT_SAMPLE_MASK] = fs_num_outputs;
979 fs_num_outputs++;
980 outputsWritten &= ~(1 << FRAG_RESULT_SAMPLE_MASK);
981 }
982
983 /* handle remaining outputs (color) */
984 for (attr = 0; attr < ARRAY_SIZE(outputMapping); attr++) {
985 const GLbitfield64 written = attr < FRAG_RESULT_MAX ? outputsWritten :
986 stfp->Base.SecondaryOutputsWritten;
987 const unsigned loc = attr % FRAG_RESULT_MAX;
988
989 if (written & BITFIELD64_BIT(loc)) {
990 switch (loc) {
991 case FRAG_RESULT_DEPTH:
992 case FRAG_RESULT_STENCIL:
993 case FRAG_RESULT_SAMPLE_MASK:
994 /* handled above */
995 assert(0);
996 break;
997 case FRAG_RESULT_COLOR:
998 write_all = GL_TRUE; /* fallthrough */
999 default: {
1000 int index;
1001 assert(loc == FRAG_RESULT_COLOR ||
1002 (FRAG_RESULT_DATA0 <= loc && loc < FRAG_RESULT_MAX));
1003
1004 index = (loc == FRAG_RESULT_COLOR) ? 0 : (loc - FRAG_RESULT_DATA0);
1005
1006 if (attr >= FRAG_RESULT_MAX) {
1007 /* Secondary color for dual source blending. */
1008 assert(index == 0);
1009 index++;
1010 }
1011
1012 fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
1013 fs_output_semantic_index[fs_num_outputs] = index;
1014 outputMapping[attr] = fs_num_outputs;
1015 break;
1016 }
1017 }
1018
1019 fs_num_outputs++;
1020 }
1021 }
1022
1023 ureg = ureg_create_with_screen(PIPE_SHADER_FRAGMENT, st->pipe->screen);
1024 if (ureg == NULL)
1025 return false;
1026
1027 if (ST_DEBUG & DEBUG_MESA) {
1028 _mesa_print_program(&stfp->Base);
1029 _mesa_print_program_parameters(st->ctx, &stfp->Base);
1030 debug_printf("\n");
1031 }
1032 if (write_all == GL_TRUE)
1033 ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1);
1034
1035 if (stfp->Base.info.fs.depth_layout != FRAG_DEPTH_LAYOUT_NONE) {
1036 switch (stfp->Base.info.fs.depth_layout) {
1037 case FRAG_DEPTH_LAYOUT_ANY:
1038 ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1039 TGSI_FS_DEPTH_LAYOUT_ANY);
1040 break;
1041 case FRAG_DEPTH_LAYOUT_GREATER:
1042 ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1043 TGSI_FS_DEPTH_LAYOUT_GREATER);
1044 break;
1045 case FRAG_DEPTH_LAYOUT_LESS:
1046 ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1047 TGSI_FS_DEPTH_LAYOUT_LESS);
1048 break;
1049 case FRAG_DEPTH_LAYOUT_UNCHANGED:
1050 ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1051 TGSI_FS_DEPTH_LAYOUT_UNCHANGED);
1052 break;
1053 default:
1054 assert(0);
1055 }
1056 }
1057
1058 if (stfp->glsl_to_tgsi) {
1059 st_translate_program(st->ctx,
1060 PIPE_SHADER_FRAGMENT,
1061 ureg,
1062 stfp->glsl_to_tgsi,
1063 &stfp->Base,
1064 /* inputs */
1065 fs_num_inputs,
1066 inputMapping,
1067 inputSlotToAttr,
1068 input_semantic_name,
1069 input_semantic_index,
1070 interpMode,
1071 /* outputs */
1072 fs_num_outputs,
1073 outputMapping,
1074 fs_output_semantic_name,
1075 fs_output_semantic_index);
1076
1077 free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
1078 } else if (stfp->ati_fs)
1079 st_translate_atifs_program(ureg,
1080 stfp->ati_fs,
1081 &stfp->Base,
1082 /* inputs */
1083 fs_num_inputs,
1084 inputMapping,
1085 input_semantic_name,
1086 input_semantic_index,
1087 interpMode,
1088 /* outputs */
1089 fs_num_outputs,
1090 outputMapping,
1091 fs_output_semantic_name,
1092 fs_output_semantic_index);
1093 else
1094 st_translate_mesa_program(st->ctx,
1095 PIPE_SHADER_FRAGMENT,
1096 ureg,
1097 &stfp->Base,
1098 /* inputs */
1099 fs_num_inputs,
1100 inputMapping,
1101 input_semantic_name,
1102 input_semantic_index,
1103 interpMode,
1104 /* outputs */
1105 fs_num_outputs,
1106 outputMapping,
1107 fs_output_semantic_name,
1108 fs_output_semantic_index);
1109
1110 stfp->state.tokens = ureg_get_tokens(ureg, NULL);
1111 ureg_destroy(ureg);
1112
1113 if (stfp->glsl_to_tgsi) {
1114 stfp->glsl_to_tgsi = NULL;
1115 st_store_ir_in_disk_cache(st, &stfp->Base, false);
1116 }
1117
1118 return stfp->state.tokens != NULL;
1119 }
1120
1121 static struct st_fp_variant *
1122 st_create_fp_variant(struct st_context *st,
1123 struct st_program *stfp,
1124 const struct st_fp_variant_key *key)
1125 {
1126 struct pipe_context *pipe = st->pipe;
1127 struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
1128 struct pipe_shader_state state = {0};
1129 struct gl_program_parameter_list *params = stfp->Base.Parameters;
1130 static const gl_state_index16 texcoord_state[STATE_LENGTH] =
1131 { STATE_INTERNAL, STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
1132 static const gl_state_index16 scale_state[STATE_LENGTH] =
1133 { STATE_INTERNAL, STATE_PT_SCALE };
1134 static const gl_state_index16 bias_state[STATE_LENGTH] =
1135 { STATE_INTERNAL, STATE_PT_BIAS };
1136 static const gl_state_index16 alpha_ref_state[STATE_LENGTH] =
1137 { STATE_INTERNAL, STATE_ALPHA_REF };
1138
1139 if (!variant)
1140 return NULL;
1141
1142 if (stfp->state.type == PIPE_SHADER_IR_NIR) {
1143 bool finalize = false;
1144
1145 state.type = PIPE_SHADER_IR_NIR;
1146 state.ir.nir = nir_shader_clone(NULL, stfp->Base.nir);
1147
1148 if (key->clamp_color) {
1149 NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1150 finalize = true;
1151 }
1152
1153 if (key->lower_flatshade) {
1154 NIR_PASS_V(state.ir.nir, nir_lower_flatshade);
1155 finalize = true;
1156 }
1157
1158 if (key->lower_alpha_func != COMPARE_FUNC_NEVER) {
1159 _mesa_add_state_reference(params, alpha_ref_state);
1160 NIR_PASS_V(state.ir.nir, nir_lower_alpha_test, key->lower_alpha_func,
1161 false, alpha_ref_state);
1162 finalize = true;
1163 }
1164
1165 if (key->lower_two_sided_color) {
1166 NIR_PASS_V(state.ir.nir, nir_lower_two_sided_color);
1167 finalize = true;
1168 }
1169
1170 if (key->persample_shading) {
1171 nir_shader *shader = state.ir.nir;
1172 nir_foreach_variable(var, &shader->inputs)
1173 var->data.sample = true;
1174 finalize = true;
1175 }
1176
1177 assert(!(key->bitmap && key->drawpixels));
1178
1179 /* glBitmap */
1180 if (key->bitmap) {
1181 nir_lower_bitmap_options options = {0};
1182
1183 variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1184 options.sampler = variant->bitmap_sampler;
1185 options.swizzle_xxxx = st->bitmap.tex_format == PIPE_FORMAT_R8_UNORM;
1186
1187 NIR_PASS_V(state.ir.nir, nir_lower_bitmap, &options);
1188 finalize = true;
1189 }
1190
1191 /* glDrawPixels (color only) */
1192 if (key->drawpixels) {
1193 nir_lower_drawpixels_options options = {{0}};
1194 unsigned samplers_used = stfp->Base.SamplersUsed;
1195
1196 /* Find the first unused slot. */
1197 variant->drawpix_sampler = ffs(~samplers_used) - 1;
1198 options.drawpix_sampler = variant->drawpix_sampler;
1199 samplers_used |= (1 << variant->drawpix_sampler);
1200
1201 options.pixel_maps = key->pixelMaps;
1202 if (key->pixelMaps) {
1203 variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1204 options.pixelmap_sampler = variant->pixelmap_sampler;
1205 }
1206
1207 options.scale_and_bias = key->scaleAndBias;
1208 if (key->scaleAndBias) {
1209 _mesa_add_state_reference(params, scale_state);
1210 memcpy(options.scale_state_tokens, scale_state,
1211 sizeof(options.scale_state_tokens));
1212 _mesa_add_state_reference(params, bias_state);
1213 memcpy(options.bias_state_tokens, bias_state,
1214 sizeof(options.bias_state_tokens));
1215 }
1216
1217 _mesa_add_state_reference(params, texcoord_state);
1218 memcpy(options.texcoord_state_tokens, texcoord_state,
1219 sizeof(options.texcoord_state_tokens));
1220
1221 NIR_PASS_V(state.ir.nir, nir_lower_drawpixels, &options);
1222 finalize = true;
1223 }
1224
1225 if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1226 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1227 key->external.lower_ayuv || key->external.lower_xyuv)) {
1228 nir_lower_tex_options options = {0};
1229 options.lower_y_uv_external = key->external.lower_nv12;
1230 options.lower_y_u_v_external = key->external.lower_iyuv;
1231 options.lower_xy_uxvx_external = key->external.lower_xy_uxvx;
1232 options.lower_yx_xuxv_external = key->external.lower_yx_xuxv;
1233 options.lower_ayuv_external = key->external.lower_ayuv;
1234 options.lower_xyuv_external = key->external.lower_xyuv;
1235 NIR_PASS_V(state.ir.nir, nir_lower_tex, &options);
1236 finalize = true;
1237 }
1238
1239 if (finalize || !st->allow_st_finalize_nir_twice) {
1240 st_finalize_nir(st, &stfp->Base, stfp->shader_program, state.ir.nir,
1241 false);
1242 }
1243
1244 /* This pass needs to happen *after* nir_lower_sampler */
1245 if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1246 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1247 NIR_PASS_V(state.ir.nir, st_nir_lower_tex_src_plane,
1248 ~stfp->Base.SamplersUsed,
1249 key->external.lower_nv12 || key->external.lower_xy_uxvx ||
1250 key->external.lower_yx_xuxv,
1251 key->external.lower_iyuv);
1252 finalize = true;
1253 }
1254
1255 if (finalize || !st->allow_st_finalize_nir_twice) {
1256 /* Some of the lowering above may have introduced new varyings */
1257 nir_shader_gather_info(state.ir.nir,
1258 nir_shader_get_entrypoint(state.ir.nir));
1259
1260 struct pipe_screen *screen = pipe->screen;
1261 if (screen->finalize_nir)
1262 screen->finalize_nir(screen, state.ir.nir, false);
1263 }
1264
1265 if (ST_DEBUG & DEBUG_PRINT_IR)
1266 nir_print_shader(state.ir.nir, stderr);
1267
1268 variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1269 variant->key = *key;
1270
1271 return variant;
1272 }
1273
1274 state.tokens = stfp->state.tokens;
1275
1276 assert(!(key->bitmap && key->drawpixels));
1277
1278 /* Fix texture targets and add fog for ATI_fs */
1279 if (stfp->ati_fs) {
1280 const struct tgsi_token *tokens = st_fixup_atifs(state.tokens, key);
1281
1282 if (tokens)
1283 state.tokens = tokens;
1284 else
1285 fprintf(stderr, "mesa: cannot post-process ATI_fs\n");
1286 }
1287
1288 /* Emulate features. */
1289 if (key->clamp_color || key->persample_shading) {
1290 const struct tgsi_token *tokens;
1291 unsigned flags =
1292 (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
1293 (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
1294
1295 tokens = tgsi_emulate(state.tokens, flags);
1296
1297 if (tokens) {
1298 if (state.tokens != stfp->state.tokens)
1299 tgsi_free_tokens(state.tokens);
1300 state.tokens = tokens;
1301 } else
1302 fprintf(stderr, "mesa: cannot emulate deprecated features\n");
1303 }
1304
1305 /* glBitmap */
1306 if (key->bitmap) {
1307 const struct tgsi_token *tokens;
1308
1309 variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1310
1311 tokens = st_get_bitmap_shader(state.tokens,
1312 st->internal_target,
1313 variant->bitmap_sampler,
1314 st->needs_texcoord_semantic,
1315 st->bitmap.tex_format ==
1316 PIPE_FORMAT_R8_UNORM);
1317
1318 if (tokens) {
1319 if (state.tokens != stfp->state.tokens)
1320 tgsi_free_tokens(state.tokens);
1321 state.tokens = tokens;
1322 } else
1323 fprintf(stderr, "mesa: cannot create a shader for glBitmap\n");
1324 }
1325
1326 /* glDrawPixels (color only) */
1327 if (key->drawpixels) {
1328 const struct tgsi_token *tokens;
1329 unsigned scale_const = 0, bias_const = 0, texcoord_const = 0;
1330
1331 /* Find the first unused slot. */
1332 variant->drawpix_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1333
1334 if (key->pixelMaps) {
1335 unsigned samplers_used = stfp->Base.SamplersUsed |
1336 (1 << variant->drawpix_sampler);
1337
1338 variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1339 }
1340
1341 if (key->scaleAndBias) {
1342 scale_const = _mesa_add_state_reference(params, scale_state);
1343 bias_const = _mesa_add_state_reference(params, bias_state);
1344 }
1345
1346 texcoord_const = _mesa_add_state_reference(params, texcoord_state);
1347
1348 tokens = st_get_drawpix_shader(state.tokens,
1349 st->needs_texcoord_semantic,
1350 key->scaleAndBias, scale_const,
1351 bias_const, key->pixelMaps,
1352 variant->drawpix_sampler,
1353 variant->pixelmap_sampler,
1354 texcoord_const, st->internal_target);
1355
1356 if (tokens) {
1357 if (state.tokens != stfp->state.tokens)
1358 tgsi_free_tokens(state.tokens);
1359 state.tokens = tokens;
1360 } else
1361 fprintf(stderr, "mesa: cannot create a shader for glDrawPixels\n");
1362 }
1363
1364 if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1365 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1366 const struct tgsi_token *tokens;
1367
1368 /* samplers inserted would conflict, but this should be unpossible: */
1369 assert(!(key->bitmap || key->drawpixels));
1370
1371 tokens = st_tgsi_lower_yuv(state.tokens,
1372 ~stfp->Base.SamplersUsed,
1373 key->external.lower_nv12 ||
1374 key->external.lower_xy_uxvx ||
1375 key->external.lower_yx_xuxv,
1376 key->external.lower_iyuv);
1377 if (tokens) {
1378 if (state.tokens != stfp->state.tokens)
1379 tgsi_free_tokens(state.tokens);
1380 state.tokens = tokens;
1381 } else {
1382 fprintf(stderr, "mesa: cannot create a shader for samplerExternalOES\n");
1383 }
1384 }
1385
1386 if (key->lower_depth_clamp) {
1387 unsigned depth_range_const = _mesa_add_state_reference(params, depth_range_state);
1388
1389 const struct tgsi_token *tokens;
1390 tokens = st_tgsi_lower_depth_clamp_fs(state.tokens, depth_range_const);
1391 if (state.tokens != stfp->state.tokens)
1392 tgsi_free_tokens(state.tokens);
1393 state.tokens = tokens;
1394 }
1395
1396 if (ST_DEBUG & DEBUG_PRINT_IR)
1397 tgsi_dump(state.tokens, 0);
1398
1399 /* fill in variant */
1400 variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1401 variant->key = *key;
1402
1403 if (state.tokens != stfp->state.tokens)
1404 tgsi_free_tokens(state.tokens);
1405 return variant;
1406 }
1407
1408 /**
1409 * Translate fragment program if needed.
1410 */
1411 struct st_fp_variant *
1412 st_get_fp_variant(struct st_context *st,
1413 struct st_program *stfp,
1414 const struct st_fp_variant_key *key)
1415 {
1416 struct st_fp_variant *fpv;
1417
1418 /* Search for existing variant */
1419 for (fpv = st_fp_variant(stfp->variants); fpv;
1420 fpv = st_fp_variant(fpv->base.next)) {
1421 if (memcmp(&fpv->key, key, sizeof(*key)) == 0) {
1422 break;
1423 }
1424 }
1425
1426 if (!fpv) {
1427 /* create new */
1428 fpv = st_create_fp_variant(st, stfp, key);
1429 if (fpv) {
1430 fpv->base.st = key->st;
1431
1432 if (key->bitmap || key->drawpixels) {
1433 /* Regular variants should always come before the
1434 * bitmap & drawpixels variants, (unless there
1435 * are no regular variants) so that
1436 * st_update_fp can take a fast path when
1437 * shader_has_one_variant is set.
1438 */
1439 if (!stfp->variants) {
1440 stfp->variants = &fpv->base;
1441 } else {
1442 /* insert into list after the first one */
1443 fpv->base.next = stfp->variants->next;
1444 stfp->variants->next = &fpv->base;
1445 }
1446 } else {
1447 /* insert into list */
1448 fpv->base.next = stfp->variants;
1449 stfp->variants = &fpv->base;
1450 }
1451 }
1452 }
1453
1454 return fpv;
1455 }
1456
1457 /**
1458 * Translate a program. This is common code for geometry and tessellation
1459 * shaders.
1460 */
1461 bool
1462 st_translate_common_program(struct st_context *st,
1463 struct st_program *stp)
1464 {
1465 struct gl_program *prog = &stp->Base;
1466 enum pipe_shader_type stage =
1467 pipe_shader_type_from_mesa(stp->Base.info.stage);
1468 struct ureg_program *ureg = ureg_create_with_screen(stage, st->pipe->screen);
1469
1470 if (ureg == NULL)
1471 return false;
1472
1473 switch (stage) {
1474 case PIPE_SHADER_TESS_CTRL:
1475 ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT,
1476 stp->Base.info.tess.tcs_vertices_out);
1477 break;
1478
1479 case PIPE_SHADER_TESS_EVAL:
1480 if (stp->Base.info.tess.primitive_mode == GL_ISOLINES)
1481 ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, GL_LINES);
1482 else
1483 ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE,
1484 stp->Base.info.tess.primitive_mode);
1485
1486 STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL);
1487 STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 ==
1488 PIPE_TESS_SPACING_FRACTIONAL_ODD);
1489 STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 ==
1490 PIPE_TESS_SPACING_FRACTIONAL_EVEN);
1491
1492 ureg_property(ureg, TGSI_PROPERTY_TES_SPACING,
1493 (stp->Base.info.tess.spacing + 1) % 3);
1494
1495 ureg_property(ureg, TGSI_PROPERTY_TES_VERTEX_ORDER_CW,
1496 !stp->Base.info.tess.ccw);
1497 ureg_property(ureg, TGSI_PROPERTY_TES_POINT_MODE,
1498 stp->Base.info.tess.point_mode);
1499 break;
1500
1501 case PIPE_SHADER_GEOMETRY:
1502 ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM,
1503 stp->Base.info.gs.input_primitive);
1504 ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM,
1505 stp->Base.info.gs.output_primitive);
1506 ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
1507 stp->Base.info.gs.vertices_out);
1508 ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS,
1509 stp->Base.info.gs.invocations);
1510 break;
1511
1512 default:
1513 break;
1514 }
1515
1516 ubyte inputSlotToAttr[VARYING_SLOT_TESS_MAX];
1517 ubyte inputMapping[VARYING_SLOT_TESS_MAX];
1518 ubyte outputMapping[VARYING_SLOT_TESS_MAX];
1519 GLuint attr;
1520
1521 ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
1522 ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
1523 uint num_inputs = 0;
1524
1525 ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
1526 ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
1527 uint num_outputs = 0;
1528
1529 GLint i;
1530
1531 memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr));
1532 memset(inputMapping, 0, sizeof(inputMapping));
1533 memset(outputMapping, 0, sizeof(outputMapping));
1534 memset(&stp->state, 0, sizeof(stp->state));
1535
1536 if (prog->info.clip_distance_array_size)
1537 ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
1538 prog->info.clip_distance_array_size);
1539 if (prog->info.cull_distance_array_size)
1540 ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
1541 prog->info.cull_distance_array_size);
1542
1543 /*
1544 * Convert Mesa program inputs to TGSI input register semantics.
1545 */
1546 for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1547 if ((prog->info.inputs_read & BITFIELD64_BIT(attr)) == 0)
1548 continue;
1549
1550 unsigned slot = num_inputs++;
1551
1552 inputMapping[attr] = slot;
1553 inputSlotToAttr[slot] = attr;
1554
1555 unsigned semantic_name, semantic_index;
1556 tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1557 &semantic_name, &semantic_index);
1558 input_semantic_name[slot] = semantic_name;
1559 input_semantic_index[slot] = semantic_index;
1560 }
1561
1562 /* Also add patch inputs. */
1563 for (attr = 0; attr < 32; attr++) {
1564 if (prog->info.patch_inputs_read & (1u << attr)) {
1565 GLuint slot = num_inputs++;
1566 GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1567
1568 inputMapping[patch_attr] = slot;
1569 inputSlotToAttr[slot] = patch_attr;
1570 input_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1571 input_semantic_index[slot] = attr;
1572 }
1573 }
1574
1575 /* initialize output semantics to defaults */
1576 for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
1577 output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
1578 output_semantic_index[i] = 0;
1579 }
1580
1581 /*
1582 * Determine number of outputs, the (default) output register
1583 * mapping and the semantic information for each output.
1584 */
1585 for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1586 if (prog->info.outputs_written & BITFIELD64_BIT(attr)) {
1587 GLuint slot = num_outputs++;
1588
1589 outputMapping[attr] = slot;
1590
1591 unsigned semantic_name, semantic_index;
1592 tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1593 &semantic_name, &semantic_index);
1594 output_semantic_name[slot] = semantic_name;
1595 output_semantic_index[slot] = semantic_index;
1596 }
1597 }
1598
1599 /* Also add patch outputs. */
1600 for (attr = 0; attr < 32; attr++) {
1601 if (prog->info.patch_outputs_written & (1u << attr)) {
1602 GLuint slot = num_outputs++;
1603 GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1604
1605 outputMapping[patch_attr] = slot;
1606 output_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1607 output_semantic_index[slot] = attr;
1608 }
1609 }
1610
1611 st_translate_program(st->ctx,
1612 stage,
1613 ureg,
1614 stp->glsl_to_tgsi,
1615 prog,
1616 /* inputs */
1617 num_inputs,
1618 inputMapping,
1619 inputSlotToAttr,
1620 input_semantic_name,
1621 input_semantic_index,
1622 NULL,
1623 /* outputs */
1624 num_outputs,
1625 outputMapping,
1626 output_semantic_name,
1627 output_semantic_index);
1628
1629 stp->state.tokens = ureg_get_tokens(ureg, NULL);
1630
1631 ureg_destroy(ureg);
1632
1633 st_translate_stream_output_info(prog);
1634
1635 st_store_ir_in_disk_cache(st, prog, false);
1636
1637 if (ST_DEBUG & DEBUG_PRINT_IR && ST_DEBUG & DEBUG_MESA)
1638 _mesa_print_program(prog);
1639
1640 free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
1641 stp->glsl_to_tgsi = NULL;
1642 return true;
1643 }
1644
1645
1646 /**
1647 * Get/create a basic program variant.
1648 */
1649 struct st_variant *
1650 st_get_common_variant(struct st_context *st,
1651 struct st_program *prog,
1652 const struct st_common_variant_key *key)
1653 {
1654 struct pipe_context *pipe = st->pipe;
1655 struct st_variant *v;
1656 struct pipe_shader_state state = {0};
1657
1658 /* Search for existing variant */
1659 for (v = prog->variants; v; v = v->next) {
1660 if (memcmp(&st_common_variant(v)->key, key, sizeof(*key)) == 0)
1661 break;
1662 }
1663
1664 if (!v) {
1665 /* create new */
1666 v = (struct st_variant*)CALLOC_STRUCT(st_common_variant);
1667 if (v) {
1668 if (prog->state.type == PIPE_SHADER_IR_NIR) {
1669 bool finalize = false;
1670
1671 state.type = PIPE_SHADER_IR_NIR;
1672 state.ir.nir = nir_shader_clone(NULL, prog->Base.nir);
1673
1674 if (key->clamp_color) {
1675 NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1676 finalize = true;
1677 }
1678
1679 state.stream_output = prog->state.stream_output;
1680
1681 if (finalize || !st->allow_st_finalize_nir_twice) {
1682 st_finalize_nir(st, &prog->Base, prog->shader_program,
1683 state.ir.nir, true);
1684 }
1685
1686 if (ST_DEBUG & DEBUG_PRINT_IR)
1687 nir_print_shader(state.ir.nir, stderr);
1688 } else {
1689 if (key->lower_depth_clamp) {
1690 struct gl_program_parameter_list *params = prog->Base.Parameters;
1691
1692 unsigned depth_range_const =
1693 _mesa_add_state_reference(params, depth_range_state);
1694
1695 const struct tgsi_token *tokens;
1696 tokens =
1697 st_tgsi_lower_depth_clamp(prog->state.tokens,
1698 depth_range_const,
1699 key->clip_negative_one_to_one);
1700
1701 if (tokens != prog->state.tokens)
1702 tgsi_free_tokens(prog->state.tokens);
1703
1704 prog->state.tokens = tokens;
1705 }
1706 state = prog->state;
1707
1708 if (ST_DEBUG & DEBUG_PRINT_IR)
1709 tgsi_dump(state.tokens, 0);
1710 }
1711 /* fill in new variant */
1712 switch (prog->Base.info.stage) {
1713 case MESA_SHADER_TESS_CTRL:
1714 v->driver_shader = pipe->create_tcs_state(pipe, &state);
1715 break;
1716 case MESA_SHADER_TESS_EVAL:
1717 v->driver_shader = pipe->create_tes_state(pipe, &state);
1718 break;
1719 case MESA_SHADER_GEOMETRY:
1720 v->driver_shader = pipe->create_gs_state(pipe, &state);
1721 break;
1722 case MESA_SHADER_COMPUTE: {
1723 struct pipe_compute_state cs = {0};
1724 cs.ir_type = state.type;
1725 cs.req_local_mem = prog->Base.info.cs.shared_size;
1726
1727 if (state.type == PIPE_SHADER_IR_NIR)
1728 cs.prog = state.ir.nir;
1729 else
1730 cs.prog = state.tokens;
1731
1732 v->driver_shader = pipe->create_compute_state(pipe, &cs);
1733 break;
1734 }
1735 default:
1736 assert(!"unhandled shader type");
1737 free(v);
1738 return NULL;
1739 }
1740
1741 st_common_variant(v)->key = *key;
1742 v->st = key->st;
1743
1744 /* insert into list */
1745 v->next = prog->variants;
1746 prog->variants = v;
1747 }
1748 }
1749
1750 return v;
1751 }
1752
1753
1754 /**
1755 * Vert/Geom/Frag programs have per-context variants. Free all the
1756 * variants attached to the given program which match the given context.
1757 */
1758 static void
1759 destroy_program_variants(struct st_context *st, struct gl_program *target)
1760 {
1761 if (!target || target == &_mesa_DummyProgram)
1762 return;
1763
1764 struct st_program *p = st_program(target);
1765 struct st_variant *v, **prevPtr = &p->variants;
1766
1767 for (v = p->variants; v; ) {
1768 struct st_variant *next = v->next;
1769 if (v->st == st) {
1770 /* unlink from list */
1771 *prevPtr = next;
1772 /* destroy this variant */
1773 delete_variant(st, v, target->Target);
1774 }
1775 else {
1776 prevPtr = &v->next;
1777 }
1778 v = next;
1779 }
1780 }
1781
1782
1783 /**
1784 * Callback for _mesa_HashWalk. Free all the shader's program variants
1785 * which match the given context.
1786 */
1787 static void
1788 destroy_shader_program_variants_cb(GLuint key, void *data, void *userData)
1789 {
1790 struct st_context *st = (struct st_context *) userData;
1791 struct gl_shader *shader = (struct gl_shader *) data;
1792
1793 switch (shader->Type) {
1794 case GL_SHADER_PROGRAM_MESA:
1795 {
1796 struct gl_shader_program *shProg = (struct gl_shader_program *) data;
1797 GLuint i;
1798
1799 for (i = 0; i < ARRAY_SIZE(shProg->_LinkedShaders); i++) {
1800 if (shProg->_LinkedShaders[i])
1801 destroy_program_variants(st, shProg->_LinkedShaders[i]->Program);
1802 }
1803 }
1804 break;
1805 case GL_VERTEX_SHADER:
1806 case GL_FRAGMENT_SHADER:
1807 case GL_GEOMETRY_SHADER:
1808 case GL_TESS_CONTROL_SHADER:
1809 case GL_TESS_EVALUATION_SHADER:
1810 case GL_COMPUTE_SHADER:
1811 break;
1812 default:
1813 assert(0);
1814 }
1815 }
1816
1817
1818 /**
1819 * Callback for _mesa_HashWalk. Free all the program variants which match
1820 * the given context.
1821 */
1822 static void
1823 destroy_program_variants_cb(GLuint key, void *data, void *userData)
1824 {
1825 struct st_context *st = (struct st_context *) userData;
1826 struct gl_program *program = (struct gl_program *) data;
1827 destroy_program_variants(st, program);
1828 }
1829
1830
1831 /**
1832 * Walk over all shaders and programs to delete any variants which
1833 * belong to the given context.
1834 * This is called during context tear-down.
1835 */
1836 void
1837 st_destroy_program_variants(struct st_context *st)
1838 {
1839 /* If shaders can be shared with other contexts, the last context will
1840 * call DeleteProgram on all shaders, releasing everything.
1841 */
1842 if (st->has_shareable_shaders)
1843 return;
1844
1845 /* ARB vert/frag program */
1846 _mesa_HashWalk(st->ctx->Shared->Programs,
1847 destroy_program_variants_cb, st);
1848
1849 /* GLSL vert/frag/geom shaders */
1850 _mesa_HashWalk(st->ctx->Shared->ShaderObjects,
1851 destroy_shader_program_variants_cb, st);
1852 }
1853
1854
1855 /**
1856 * Compile one shader variant.
1857 */
1858 static void
1859 st_precompile_shader_variant(struct st_context *st,
1860 struct gl_program *prog)
1861 {
1862 switch (prog->Target) {
1863 case GL_VERTEX_PROGRAM_ARB: {
1864 struct st_program *p = (struct st_program *)prog;
1865 struct st_common_variant_key key;
1866
1867 memset(&key, 0, sizeof(key));
1868
1869 key.st = st->has_shareable_shaders ? NULL : st;
1870 st_get_vp_variant(st, p, &key);
1871 break;
1872 }
1873
1874 case GL_FRAGMENT_PROGRAM_ARB: {
1875 struct st_program *p = (struct st_program *)prog;
1876 struct st_fp_variant_key key;
1877
1878 memset(&key, 0, sizeof(key));
1879
1880 key.st = st->has_shareable_shaders ? NULL : st;
1881 st_get_fp_variant(st, p, &key);
1882 break;
1883 }
1884
1885 case GL_TESS_CONTROL_PROGRAM_NV:
1886 case GL_TESS_EVALUATION_PROGRAM_NV:
1887 case GL_GEOMETRY_PROGRAM_NV:
1888 case GL_COMPUTE_PROGRAM_NV: {
1889 struct st_program *p = st_program(prog);
1890 struct st_common_variant_key key;
1891
1892 memset(&key, 0, sizeof(key));
1893
1894 key.st = st->has_shareable_shaders ? NULL : st;
1895 st_get_common_variant(st, p, &key);
1896 break;
1897 }
1898
1899 default:
1900 assert(0);
1901 }
1902 }
1903
1904 void
1905 st_finalize_program(struct st_context *st, struct gl_program *prog)
1906 {
1907 if (st->current_program[prog->info.stage] == prog) {
1908 if (prog->info.stage == MESA_SHADER_VERTEX)
1909 st->dirty |= ST_NEW_VERTEX_PROGRAM(st, (struct st_program *)prog);
1910 else
1911 st->dirty |= ((struct st_program *)prog)->affected_states;
1912 }
1913
1914 if (prog->nir)
1915 nir_sweep(prog->nir);
1916
1917 /* Create Gallium shaders now instead of on demand. */
1918 if (ST_DEBUG & DEBUG_PRECOMPILE ||
1919 st->shader_has_one_variant[prog->info.stage])
1920 st_precompile_shader_variant(st, prog);
1921 }