remove final imports.h and imports.c bits
[mesa.git] / src / mesa / state_tracker / st_program.c
1 /**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 * Brian Paul
31 */
32
33
34 #include "main/errors.h"
35
36 #include "main/hash.h"
37 #include "main/mtypes.h"
38 #include "program/prog_parameter.h"
39 #include "program/prog_print.h"
40 #include "program/prog_to_nir.h"
41 #include "program/programopt.h"
42
43 #include "compiler/nir/nir.h"
44 #include "compiler/nir/nir_serialize.h"
45 #include "draw/draw_context.h"
46
47 #include "pipe/p_context.h"
48 #include "pipe/p_defines.h"
49 #include "pipe/p_shader_tokens.h"
50 #include "draw/draw_context.h"
51 #include "tgsi/tgsi_dump.h"
52 #include "tgsi/tgsi_emulate.h"
53 #include "tgsi/tgsi_parse.h"
54 #include "tgsi/tgsi_ureg.h"
55
56 #include "util/u_memory.h"
57
58 #include "st_debug.h"
59 #include "st_cb_bitmap.h"
60 #include "st_cb_drawpixels.h"
61 #include "st_context.h"
62 #include "st_tgsi_lower_depth_clamp.h"
63 #include "st_tgsi_lower_yuv.h"
64 #include "st_program.h"
65 #include "st_mesa_to_tgsi.h"
66 #include "st_atifs_to_tgsi.h"
67 #include "st_nir.h"
68 #include "st_shader_cache.h"
69 #include "st_util.h"
70 #include "cso_cache/cso_context.h"
71
72
73
74 static void
75 set_affected_state_flags(uint64_t *states,
76 struct gl_program *prog,
77 uint64_t new_constants,
78 uint64_t new_sampler_views,
79 uint64_t new_samplers,
80 uint64_t new_images,
81 uint64_t new_ubos,
82 uint64_t new_ssbos,
83 uint64_t new_atomics)
84 {
85 if (prog->Parameters->NumParameters)
86 *states |= new_constants;
87
88 if (prog->info.num_textures)
89 *states |= new_sampler_views | new_samplers;
90
91 if (prog->info.num_images)
92 *states |= new_images;
93
94 if (prog->info.num_ubos)
95 *states |= new_ubos;
96
97 if (prog->info.num_ssbos)
98 *states |= new_ssbos;
99
100 if (prog->info.num_abos)
101 *states |= new_atomics;
102 }
103
104 /**
105 * This determines which states will be updated when the shader is bound.
106 */
107 void
108 st_set_prog_affected_state_flags(struct gl_program *prog)
109 {
110 uint64_t *states;
111
112 switch (prog->info.stage) {
113 case MESA_SHADER_VERTEX:
114 states = &((struct st_program*)prog)->affected_states;
115
116 *states = ST_NEW_VS_STATE |
117 ST_NEW_RASTERIZER |
118 ST_NEW_VERTEX_ARRAYS;
119
120 set_affected_state_flags(states, prog,
121 ST_NEW_VS_CONSTANTS,
122 ST_NEW_VS_SAMPLER_VIEWS,
123 ST_NEW_VS_SAMPLERS,
124 ST_NEW_VS_IMAGES,
125 ST_NEW_VS_UBOS,
126 ST_NEW_VS_SSBOS,
127 ST_NEW_VS_ATOMICS);
128 break;
129
130 case MESA_SHADER_TESS_CTRL:
131 states = &(st_program(prog))->affected_states;
132
133 *states = ST_NEW_TCS_STATE;
134
135 set_affected_state_flags(states, prog,
136 ST_NEW_TCS_CONSTANTS,
137 ST_NEW_TCS_SAMPLER_VIEWS,
138 ST_NEW_TCS_SAMPLERS,
139 ST_NEW_TCS_IMAGES,
140 ST_NEW_TCS_UBOS,
141 ST_NEW_TCS_SSBOS,
142 ST_NEW_TCS_ATOMICS);
143 break;
144
145 case MESA_SHADER_TESS_EVAL:
146 states = &(st_program(prog))->affected_states;
147
148 *states = ST_NEW_TES_STATE |
149 ST_NEW_RASTERIZER;
150
151 set_affected_state_flags(states, prog,
152 ST_NEW_TES_CONSTANTS,
153 ST_NEW_TES_SAMPLER_VIEWS,
154 ST_NEW_TES_SAMPLERS,
155 ST_NEW_TES_IMAGES,
156 ST_NEW_TES_UBOS,
157 ST_NEW_TES_SSBOS,
158 ST_NEW_TES_ATOMICS);
159 break;
160
161 case MESA_SHADER_GEOMETRY:
162 states = &(st_program(prog))->affected_states;
163
164 *states = ST_NEW_GS_STATE |
165 ST_NEW_RASTERIZER;
166
167 set_affected_state_flags(states, prog,
168 ST_NEW_GS_CONSTANTS,
169 ST_NEW_GS_SAMPLER_VIEWS,
170 ST_NEW_GS_SAMPLERS,
171 ST_NEW_GS_IMAGES,
172 ST_NEW_GS_UBOS,
173 ST_NEW_GS_SSBOS,
174 ST_NEW_GS_ATOMICS);
175 break;
176
177 case MESA_SHADER_FRAGMENT:
178 states = &((struct st_program*)prog)->affected_states;
179
180 /* gl_FragCoord and glDrawPixels always use constants. */
181 *states = ST_NEW_FS_STATE |
182 ST_NEW_SAMPLE_SHADING |
183 ST_NEW_FS_CONSTANTS;
184
185 set_affected_state_flags(states, prog,
186 ST_NEW_FS_CONSTANTS,
187 ST_NEW_FS_SAMPLER_VIEWS,
188 ST_NEW_FS_SAMPLERS,
189 ST_NEW_FS_IMAGES,
190 ST_NEW_FS_UBOS,
191 ST_NEW_FS_SSBOS,
192 ST_NEW_FS_ATOMICS);
193 break;
194
195 case MESA_SHADER_COMPUTE:
196 states = &((struct st_program*)prog)->affected_states;
197
198 *states = ST_NEW_CS_STATE;
199
200 set_affected_state_flags(states, prog,
201 ST_NEW_CS_CONSTANTS,
202 ST_NEW_CS_SAMPLER_VIEWS,
203 ST_NEW_CS_SAMPLERS,
204 ST_NEW_CS_IMAGES,
205 ST_NEW_CS_UBOS,
206 ST_NEW_CS_SSBOS,
207 ST_NEW_CS_ATOMICS);
208 break;
209
210 default:
211 unreachable("unhandled shader stage");
212 }
213 }
214
215
216 /**
217 * Delete a shader variant. Note the caller must unlink the variant from
218 * the linked list.
219 */
220 static void
221 delete_variant(struct st_context *st, struct st_variant *v, GLenum target)
222 {
223 if (v->driver_shader) {
224 if (target == GL_VERTEX_PROGRAM_ARB &&
225 ((struct st_common_variant*)v)->key.is_draw_shader) {
226 /* Draw shader. */
227 draw_delete_vertex_shader(st->draw, v->driver_shader);
228 } else if (st->has_shareable_shaders || v->st == st) {
229 /* The shader's context matches the calling context, or we
230 * don't care.
231 */
232 switch (target) {
233 case GL_VERTEX_PROGRAM_ARB:
234 st->pipe->delete_vs_state(st->pipe, v->driver_shader);
235 break;
236 case GL_TESS_CONTROL_PROGRAM_NV:
237 st->pipe->delete_tcs_state(st->pipe, v->driver_shader);
238 break;
239 case GL_TESS_EVALUATION_PROGRAM_NV:
240 st->pipe->delete_tes_state(st->pipe, v->driver_shader);
241 break;
242 case GL_GEOMETRY_PROGRAM_NV:
243 st->pipe->delete_gs_state(st->pipe, v->driver_shader);
244 break;
245 case GL_FRAGMENT_PROGRAM_ARB:
246 st->pipe->delete_fs_state(st->pipe, v->driver_shader);
247 break;
248 case GL_COMPUTE_PROGRAM_NV:
249 st->pipe->delete_compute_state(st->pipe, v->driver_shader);
250 break;
251 default:
252 unreachable("bad shader type in delete_basic_variant");
253 }
254 } else {
255 /* We can't delete a shader with a context different from the one
256 * that created it. Add it to the creating context's zombie list.
257 */
258 enum pipe_shader_type type =
259 pipe_shader_type_from_mesa(_mesa_program_enum_to_shader_stage(target));
260
261 st_save_zombie_shader(v->st, type, v->driver_shader);
262 }
263 }
264
265 free(v);
266 }
267
268 static void
269 st_unbind_program(struct st_context *st, struct st_program *p)
270 {
271 /* Unbind the shader in cso_context and re-bind in st/mesa. */
272 switch (p->Base.info.stage) {
273 case MESA_SHADER_VERTEX:
274 cso_set_vertex_shader_handle(st->cso_context, NULL);
275 st->dirty |= ST_NEW_VS_STATE;
276 break;
277 case MESA_SHADER_TESS_CTRL:
278 cso_set_tessctrl_shader_handle(st->cso_context, NULL);
279 st->dirty |= ST_NEW_TCS_STATE;
280 break;
281 case MESA_SHADER_TESS_EVAL:
282 cso_set_tesseval_shader_handle(st->cso_context, NULL);
283 st->dirty |= ST_NEW_TES_STATE;
284 break;
285 case MESA_SHADER_GEOMETRY:
286 cso_set_geometry_shader_handle(st->cso_context, NULL);
287 st->dirty |= ST_NEW_GS_STATE;
288 break;
289 case MESA_SHADER_FRAGMENT:
290 cso_set_fragment_shader_handle(st->cso_context, NULL);
291 st->dirty |= ST_NEW_FS_STATE;
292 break;
293 case MESA_SHADER_COMPUTE:
294 cso_set_compute_shader_handle(st->cso_context, NULL);
295 st->dirty |= ST_NEW_CS_STATE;
296 break;
297 default:
298 unreachable("invalid shader type");
299 }
300 }
301
302 /**
303 * Free all basic program variants.
304 */
305 void
306 st_release_variants(struct st_context *st, struct st_program *p)
307 {
308 struct st_variant *v;
309
310 /* If we are releasing shaders, re-bind them, because we don't
311 * know which shaders are bound in the driver.
312 */
313 if (p->variants)
314 st_unbind_program(st, p);
315
316 for (v = p->variants; v; ) {
317 struct st_variant *next = v->next;
318 delete_variant(st, v, p->Base.Target);
319 v = next;
320 }
321
322 p->variants = NULL;
323
324 if (p->state.tokens) {
325 ureg_free_tokens(p->state.tokens);
326 p->state.tokens = NULL;
327 }
328
329 /* Note: Any setup of ->ir.nir that has had pipe->create_*_state called on
330 * it has resulted in the driver taking ownership of the NIR. Those
331 * callers should be NULLing out the nir field in any pipe_shader_state
332 * that might have this called in order to indicate that.
333 *
334 * GLSL IR and ARB programs will have set gl_program->nir to the same
335 * shader as ir->ir.nir, so it will be freed by _mesa_delete_program().
336 */
337 }
338
339 /**
340 * Free all basic program variants and unref program.
341 */
342 void
343 st_release_program(struct st_context *st, struct st_program **p)
344 {
345 if (!*p)
346 return;
347
348 st_release_variants(st, *p);
349 st_reference_prog(st, p, NULL);
350 }
351
352 void
353 st_finalize_nir_before_variants(struct nir_shader *nir)
354 {
355 NIR_PASS_V(nir, nir_opt_access);
356
357 NIR_PASS_V(nir, nir_split_var_copies);
358 NIR_PASS_V(nir, nir_lower_var_copies);
359 if (nir->options->lower_all_io_to_temps ||
360 nir->options->lower_all_io_to_elements ||
361 nir->info.stage == MESA_SHADER_VERTEX ||
362 nir->info.stage == MESA_SHADER_GEOMETRY) {
363 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
364 } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
365 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
366 }
367
368 st_nir_assign_vs_in_locations(nir);
369 }
370
371 /**
372 * Translate ARB (asm) program to NIR
373 */
374 static nir_shader *
375 st_translate_prog_to_nir(struct st_context *st, struct gl_program *prog,
376 gl_shader_stage stage)
377 {
378 struct pipe_screen *screen = st->pipe->screen;
379 const struct gl_shader_compiler_options *options =
380 &st->ctx->Const.ShaderCompilerOptions[stage];
381
382 /* Translate to NIR */
383 nir_shader *nir = prog_to_nir(prog, options->NirOptions);
384 NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
385 nir_validate_shader(nir, "after st/ptn lower_regs_to_ssa");
386
387 NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, prog, screen);
388 NIR_PASS_V(nir, nir_lower_system_values);
389
390 /* Optimise NIR */
391 NIR_PASS_V(nir, nir_opt_constant_folding);
392 st_nir_opts(nir);
393 st_finalize_nir_before_variants(nir);
394
395 if (st->allow_st_finalize_nir_twice)
396 st_finalize_nir(st, prog, NULL, nir, true);
397
398 nir_validate_shader(nir, "after st/glsl finalize_nir");
399
400 return nir;
401 }
402
403 void
404 st_prepare_vertex_program(struct st_program *stp)
405 {
406 struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
407
408 stvp->num_inputs = 0;
409 memset(stvp->input_to_index, ~0, sizeof(stvp->input_to_index));
410 memset(stvp->result_to_output, ~0, sizeof(stvp->result_to_output));
411
412 /* Determine number of inputs, the mappings between VERT_ATTRIB_x
413 * and TGSI generic input indexes, plus input attrib semantic info.
414 */
415 for (unsigned attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
416 if ((stp->Base.info.inputs_read & BITFIELD64_BIT(attr)) != 0) {
417 stvp->input_to_index[attr] = stvp->num_inputs;
418 stvp->index_to_input[stvp->num_inputs] = attr;
419 stvp->num_inputs++;
420
421 if ((stp->Base.DualSlotInputs & BITFIELD64_BIT(attr)) != 0) {
422 /* add placeholder for second part of a double attribute */
423 stvp->index_to_input[stvp->num_inputs] = ST_DOUBLE_ATTRIB_PLACEHOLDER;
424 stvp->num_inputs++;
425 }
426 }
427 }
428 /* pre-setup potentially unused edgeflag input */
429 stvp->input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
430 stvp->index_to_input[stvp->num_inputs] = VERT_ATTRIB_EDGEFLAG;
431
432 /* Compute mapping of vertex program outputs to slots. */
433 unsigned num_outputs = 0;
434 for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
435 if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr))
436 stvp->result_to_output[attr] = num_outputs++;
437 }
438 /* pre-setup potentially unused edgeflag output */
439 stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs;
440 }
441
442 void
443 st_translate_stream_output_info(struct gl_program *prog)
444 {
445 struct gl_transform_feedback_info *info = prog->sh.LinkedTransformFeedback;
446 if (!info)
447 return;
448
449 /* Determine the (default) output register mapping for each output. */
450 unsigned num_outputs = 0;
451 ubyte output_mapping[VARYING_SLOT_TESS_MAX];
452 memset(output_mapping, 0, sizeof(output_mapping));
453
454 for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
455 if (prog->info.outputs_written & BITFIELD64_BIT(attr))
456 output_mapping[attr] = num_outputs++;
457 }
458
459 /* Translate stream output info. */
460 struct pipe_stream_output_info *so_info =
461 &((struct st_program*)prog)->state.stream_output;
462
463 for (unsigned i = 0; i < info->NumOutputs; i++) {
464 so_info->output[i].register_index =
465 output_mapping[info->Outputs[i].OutputRegister];
466 so_info->output[i].start_component = info->Outputs[i].ComponentOffset;
467 so_info->output[i].num_components = info->Outputs[i].NumComponents;
468 so_info->output[i].output_buffer = info->Outputs[i].OutputBuffer;
469 so_info->output[i].dst_offset = info->Outputs[i].DstOffset;
470 so_info->output[i].stream = info->Outputs[i].StreamId;
471 }
472
473 for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
474 so_info->stride[i] = info->Buffers[i].Stride;
475 }
476 so_info->num_outputs = info->NumOutputs;
477 }
478
479 /**
480 * Translate a vertex program.
481 */
482 bool
483 st_translate_vertex_program(struct st_context *st,
484 struct st_program *stp)
485 {
486 struct ureg_program *ureg;
487 enum pipe_error error;
488 unsigned num_outputs = 0;
489 unsigned attr;
490 ubyte output_semantic_name[VARYING_SLOT_MAX] = {0};
491 ubyte output_semantic_index[VARYING_SLOT_MAX] = {0};
492
493 if (stp->Base.arb.IsPositionInvariant)
494 _mesa_insert_mvp_code(st->ctx, &stp->Base);
495
496 /* ARB_vp: */
497 if (!stp->glsl_to_tgsi) {
498 _mesa_remove_output_reads(&stp->Base, PROGRAM_OUTPUT);
499
500 /* This determines which states will be updated when the assembly
501 * shader is bound.
502 */
503 stp->affected_states = ST_NEW_VS_STATE |
504 ST_NEW_RASTERIZER |
505 ST_NEW_VERTEX_ARRAYS;
506
507 if (stp->Base.Parameters->NumParameters)
508 stp->affected_states |= ST_NEW_VS_CONSTANTS;
509
510 /* Translate to NIR if preferred. */
511 if (st->pipe->screen->get_shader_param(st->pipe->screen,
512 PIPE_SHADER_VERTEX,
513 PIPE_SHADER_CAP_PREFERRED_IR)) {
514 assert(!stp->glsl_to_tgsi);
515
516 if (stp->Base.nir)
517 ralloc_free(stp->Base.nir);
518
519 if (stp->serialized_nir) {
520 free(stp->serialized_nir);
521 stp->serialized_nir = NULL;
522 }
523
524 stp->state.type = PIPE_SHADER_IR_NIR;
525 stp->Base.nir = st_translate_prog_to_nir(st, &stp->Base,
526 MESA_SHADER_VERTEX);
527
528 /* We must update stp->Base.info after translation and before
529 * st_prepare_vertex_program is called, because inputs_read
530 * may become outdated after NIR optimization passes.
531 *
532 * For ffvp/ARB_vp inputs_read is populated based
533 * on declared attributes without taking their usage into
534 * consideration. When creating shader variants we expect
535 * that their inputs_read would match the base ones for
536 * input mapping to work properly.
537 */
538 nir_shader_gather_info(stp->Base.nir,
539 nir_shader_get_entrypoint(stp->Base.nir));
540 st_nir_assign_vs_in_locations(stp->Base.nir);
541 stp->Base.info = stp->Base.nir->info;
542
543 /* For st_draw_feedback, we need to generate TGSI too if draw doesn't
544 * use LLVM.
545 */
546 if (draw_has_llvm()) {
547 st_prepare_vertex_program(stp);
548 return true;
549 }
550 }
551 }
552
553 st_prepare_vertex_program(stp);
554
555 /* Get semantic names and indices. */
556 for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
557 if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr)) {
558 unsigned slot = num_outputs++;
559 unsigned semantic_name, semantic_index;
560 tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
561 &semantic_name, &semantic_index);
562 output_semantic_name[slot] = semantic_name;
563 output_semantic_index[slot] = semantic_index;
564 }
565 }
566 /* pre-setup potentially unused edgeflag output */
567 output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
568 output_semantic_index[num_outputs] = 0;
569
570 ureg = ureg_create_with_screen(PIPE_SHADER_VERTEX, st->pipe->screen);
571 if (ureg == NULL)
572 return false;
573
574 if (stp->Base.info.clip_distance_array_size)
575 ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
576 stp->Base.info.clip_distance_array_size);
577 if (stp->Base.info.cull_distance_array_size)
578 ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
579 stp->Base.info.cull_distance_array_size);
580
581 if (ST_DEBUG & DEBUG_MESA) {
582 _mesa_print_program(&stp->Base);
583 _mesa_print_program_parameters(st->ctx, &stp->Base);
584 debug_printf("\n");
585 }
586
587 struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
588
589 if (stp->glsl_to_tgsi) {
590 error = st_translate_program(st->ctx,
591 PIPE_SHADER_VERTEX,
592 ureg,
593 stp->glsl_to_tgsi,
594 &stp->Base,
595 /* inputs */
596 stvp->num_inputs,
597 stvp->input_to_index,
598 NULL, /* inputSlotToAttr */
599 NULL, /* input semantic name */
600 NULL, /* input semantic index */
601 NULL, /* interp mode */
602 /* outputs */
603 num_outputs,
604 stvp->result_to_output,
605 output_semantic_name,
606 output_semantic_index);
607
608 st_translate_stream_output_info(&stp->Base);
609
610 free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
611 } else
612 error = st_translate_mesa_program(st->ctx,
613 PIPE_SHADER_VERTEX,
614 ureg,
615 &stp->Base,
616 /* inputs */
617 stvp->num_inputs,
618 stvp->input_to_index,
619 NULL, /* input semantic name */
620 NULL, /* input semantic index */
621 NULL,
622 /* outputs */
623 num_outputs,
624 stvp->result_to_output,
625 output_semantic_name,
626 output_semantic_index);
627
628 if (error) {
629 debug_printf("%s: failed to translate Mesa program:\n", __func__);
630 _mesa_print_program(&stp->Base);
631 debug_assert(0);
632 return false;
633 }
634
635 stp->state.tokens = ureg_get_tokens(ureg, NULL);
636 ureg_destroy(ureg);
637
638 if (stp->glsl_to_tgsi) {
639 stp->glsl_to_tgsi = NULL;
640 st_store_ir_in_disk_cache(st, &stp->Base, false);
641 }
642
643 return stp->state.tokens != NULL;
644 }
645
646 static struct nir_shader *
647 get_nir_shader(struct st_context *st, struct st_program *stp)
648 {
649 if (stp->Base.nir) {
650 nir_shader *nir = stp->Base.nir;
651
652 /* The first shader variant takes ownership of NIR, so that there is
653 * no cloning. Additional shader variants are always generated from
654 * serialized NIR to save memory.
655 */
656 stp->Base.nir = NULL;
657 assert(stp->serialized_nir && stp->serialized_nir_size);
658 return nir;
659 }
660
661 struct blob_reader blob_reader;
662 const struct nir_shader_compiler_options *options =
663 st->ctx->Const.ShaderCompilerOptions[stp->Base.info.stage].NirOptions;
664
665 blob_reader_init(&blob_reader, stp->serialized_nir, stp->serialized_nir_size);
666 return nir_deserialize(NULL, options, &blob_reader);
667 }
668
669 static const gl_state_index16 depth_range_state[STATE_LENGTH] =
670 { STATE_DEPTH_RANGE };
671
672 static struct st_common_variant *
673 st_create_vp_variant(struct st_context *st,
674 struct st_program *stvp,
675 const struct st_common_variant_key *key)
676 {
677 struct st_common_variant *vpv = CALLOC_STRUCT(st_common_variant);
678 struct pipe_context *pipe = st->pipe;
679 struct pipe_screen *screen = pipe->screen;
680 struct pipe_shader_state state = {0};
681
682 static const gl_state_index16 point_size_state[STATE_LENGTH] =
683 { STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED, 0 };
684 struct gl_program_parameter_list *params = stvp->Base.Parameters;
685
686 vpv->key = *key;
687
688 state.stream_output = stvp->state.stream_output;
689
690 if (stvp->state.type == PIPE_SHADER_IR_NIR &&
691 (!key->is_draw_shader || draw_has_llvm())) {
692 bool finalize = false;
693
694 state.type = PIPE_SHADER_IR_NIR;
695 state.ir.nir = get_nir_shader(st, stvp);
696 if (key->clamp_color) {
697 NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
698 finalize = true;
699 }
700 if (key->passthrough_edgeflags) {
701 NIR_PASS_V(state.ir.nir, nir_lower_passthrough_edgeflags);
702 finalize = true;
703 }
704
705 if (key->lower_point_size) {
706 _mesa_add_state_reference(params, point_size_state);
707 NIR_PASS_V(state.ir.nir, nir_lower_point_size_mov,
708 point_size_state);
709 finalize = true;
710 }
711
712 if (key->lower_ucp) {
713 bool can_compact = screen->get_param(screen,
714 PIPE_CAP_NIR_COMPACT_ARRAYS);
715
716 bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;
717 gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH];
718 for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
719 if (use_eye) {
720 clipplane_state[i][0] = STATE_CLIPPLANE;
721 clipplane_state[i][1] = i;
722 } else {
723 clipplane_state[i][0] = STATE_INTERNAL;
724 clipplane_state[i][1] = STATE_CLIP_INTERNAL;
725 clipplane_state[i][2] = i;
726 }
727 _mesa_add_state_reference(params, clipplane_state[i]);
728 }
729
730 NIR_PASS_V(state.ir.nir, nir_lower_clip_vs, key->lower_ucp,
731 true, can_compact, clipplane_state);
732 NIR_PASS_V(state.ir.nir, nir_lower_io_to_temporaries,
733 nir_shader_get_entrypoint(state.ir.nir), true, false);
734 NIR_PASS_V(state.ir.nir, nir_lower_global_vars_to_local);
735 finalize = true;
736 }
737
738 if (finalize || !st->allow_st_finalize_nir_twice) {
739 st_finalize_nir(st, &stvp->Base, stvp->shader_program, state.ir.nir,
740 true);
741
742 /* Some of the lowering above may have introduced new varyings */
743 nir_shader_gather_info(state.ir.nir,
744 nir_shader_get_entrypoint(state.ir.nir));
745 }
746
747 if (ST_DEBUG & DEBUG_PRINT_IR)
748 nir_print_shader(state.ir.nir, stderr);
749
750 if (key->is_draw_shader)
751 vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
752 else
753 vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
754
755 return vpv;
756 }
757
758 state.type = PIPE_SHADER_IR_TGSI;
759 state.tokens = tgsi_dup_tokens(stvp->state.tokens);
760
761 /* Emulate features. */
762 if (key->clamp_color || key->passthrough_edgeflags) {
763 const struct tgsi_token *tokens;
764 unsigned flags =
765 (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
766 (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
767
768 tokens = tgsi_emulate(state.tokens, flags);
769
770 if (tokens) {
771 tgsi_free_tokens(state.tokens);
772 state.tokens = tokens;
773 } else {
774 fprintf(stderr, "mesa: cannot emulate deprecated features\n");
775 }
776 }
777
778 if (key->lower_depth_clamp) {
779 unsigned depth_range_const =
780 _mesa_add_state_reference(params, depth_range_state);
781
782 const struct tgsi_token *tokens;
783 tokens = st_tgsi_lower_depth_clamp(state.tokens, depth_range_const,
784 key->clip_negative_one_to_one);
785 if (tokens != state.tokens)
786 tgsi_free_tokens(state.tokens);
787 state.tokens = tokens;
788 }
789
790 if (ST_DEBUG & DEBUG_PRINT_IR)
791 tgsi_dump(state.tokens, 0);
792
793 if (key->is_draw_shader)
794 vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
795 else
796 vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
797
798 if (state.tokens) {
799 tgsi_free_tokens(state.tokens);
800 }
801
802 return vpv;
803 }
804
805
806 /**
807 * Find/create a vertex program variant.
808 */
809 struct st_common_variant *
810 st_get_vp_variant(struct st_context *st,
811 struct st_program *stp,
812 const struct st_common_variant_key *key)
813 {
814 struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
815 struct st_common_variant *vpv;
816
817 /* Search for existing variant */
818 for (vpv = st_common_variant(stp->variants); vpv;
819 vpv = st_common_variant(vpv->base.next)) {
820 if (memcmp(&vpv->key, key, sizeof(*key)) == 0) {
821 break;
822 }
823 }
824
825 if (!vpv) {
826 /* create now */
827 vpv = st_create_vp_variant(st, stp, key);
828 if (vpv) {
829 vpv->base.st = key->st;
830
831 unsigned num_inputs = stvp->num_inputs + key->passthrough_edgeflags;
832 for (unsigned index = 0; index < num_inputs; ++index) {
833 unsigned attr = stvp->index_to_input[index];
834 if (attr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
835 continue;
836 vpv->vert_attrib_mask |= 1u << attr;
837 }
838
839 /* insert into list */
840 vpv->base.next = stp->variants;
841 stp->variants = &vpv->base;
842 }
843 }
844
845 return vpv;
846 }
847
848
849 /**
850 * Translate a Mesa fragment shader into a TGSI shader.
851 */
852 bool
853 st_translate_fragment_program(struct st_context *st,
854 struct st_program *stfp)
855 {
856 /* Non-GLSL programs: */
857 if (!stfp->glsl_to_tgsi) {
858 _mesa_remove_output_reads(&stfp->Base, PROGRAM_OUTPUT);
859 if (st->ctx->Const.GLSLFragCoordIsSysVal)
860 _mesa_program_fragment_position_to_sysval(&stfp->Base);
861
862 /* This determines which states will be updated when the assembly
863 * shader is bound.
864 *
865 * fragment.position and glDrawPixels always use constants.
866 */
867 stfp->affected_states = ST_NEW_FS_STATE |
868 ST_NEW_SAMPLE_SHADING |
869 ST_NEW_FS_CONSTANTS;
870
871 if (stfp->ati_fs) {
872 /* Just set them for ATI_fs unconditionally. */
873 stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
874 ST_NEW_FS_SAMPLERS;
875 } else {
876 /* ARB_fp */
877 if (stfp->Base.SamplersUsed)
878 stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
879 ST_NEW_FS_SAMPLERS;
880 }
881
882 /* Translate to NIR. */
883 if (!stfp->ati_fs &&
884 st->pipe->screen->get_shader_param(st->pipe->screen,
885 PIPE_SHADER_FRAGMENT,
886 PIPE_SHADER_CAP_PREFERRED_IR)) {
887 nir_shader *nir =
888 st_translate_prog_to_nir(st, &stfp->Base, MESA_SHADER_FRAGMENT);
889
890 if (stfp->Base.nir)
891 ralloc_free(stfp->Base.nir);
892 if (stfp->serialized_nir) {
893 free(stfp->serialized_nir);
894 stfp->serialized_nir = NULL;
895 }
896 stfp->state.type = PIPE_SHADER_IR_NIR;
897 stfp->Base.nir = nir;
898 return true;
899 }
900 }
901
902 ubyte outputMapping[2 * FRAG_RESULT_MAX];
903 ubyte inputMapping[VARYING_SLOT_MAX];
904 ubyte inputSlotToAttr[VARYING_SLOT_MAX];
905 ubyte interpMode[PIPE_MAX_SHADER_INPUTS]; /* XXX size? */
906 GLuint attr;
907 GLbitfield64 inputsRead;
908 struct ureg_program *ureg;
909
910 GLboolean write_all = GL_FALSE;
911
912 ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
913 ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
914 uint fs_num_inputs = 0;
915
916 ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
917 ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
918 uint fs_num_outputs = 0;
919
920 memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
921
922 /*
923 * Convert Mesa program inputs to TGSI input register semantics.
924 */
925 inputsRead = stfp->Base.info.inputs_read;
926 for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
927 if ((inputsRead & BITFIELD64_BIT(attr)) != 0) {
928 const GLuint slot = fs_num_inputs++;
929
930 inputMapping[attr] = slot;
931 inputSlotToAttr[slot] = attr;
932
933 switch (attr) {
934 case VARYING_SLOT_POS:
935 input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
936 input_semantic_index[slot] = 0;
937 interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
938 break;
939 case VARYING_SLOT_COL0:
940 input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
941 input_semantic_index[slot] = 0;
942 interpMode[slot] = stfp->glsl_to_tgsi ?
943 TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
944 break;
945 case VARYING_SLOT_COL1:
946 input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
947 input_semantic_index[slot] = 1;
948 interpMode[slot] = stfp->glsl_to_tgsi ?
949 TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
950 break;
951 case VARYING_SLOT_FOGC:
952 input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
953 input_semantic_index[slot] = 0;
954 interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
955 break;
956 case VARYING_SLOT_FACE:
957 input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
958 input_semantic_index[slot] = 0;
959 interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
960 break;
961 case VARYING_SLOT_PRIMITIVE_ID:
962 input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
963 input_semantic_index[slot] = 0;
964 interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
965 break;
966 case VARYING_SLOT_LAYER:
967 input_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
968 input_semantic_index[slot] = 0;
969 interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
970 break;
971 case VARYING_SLOT_VIEWPORT:
972 input_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
973 input_semantic_index[slot] = 0;
974 interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
975 break;
976 case VARYING_SLOT_CLIP_DIST0:
977 input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
978 input_semantic_index[slot] = 0;
979 interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
980 break;
981 case VARYING_SLOT_CLIP_DIST1:
982 input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
983 input_semantic_index[slot] = 1;
984 interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
985 break;
986 case VARYING_SLOT_CULL_DIST0:
987 case VARYING_SLOT_CULL_DIST1:
988 /* these should have been lowered by GLSL */
989 assert(0);
990 break;
991 /* In most cases, there is nothing special about these
992 * inputs, so adopt a convention to use the generic
993 * semantic name and the mesa VARYING_SLOT_ number as the
994 * index.
995 *
996 * All that is required is that the vertex shader labels
997 * its own outputs similarly, and that the vertex shader
998 * generates at least every output required by the
999 * fragment shader plus fixed-function hardware (such as
1000 * BFC).
1001 *
1002 * However, some drivers may need us to identify the PNTC and TEXi
1003 * varyings if, for example, their capability to replace them with
1004 * sprite coordinates is limited.
1005 */
1006 case VARYING_SLOT_PNTC:
1007 if (st->needs_texcoord_semantic) {
1008 input_semantic_name[slot] = TGSI_SEMANTIC_PCOORD;
1009 input_semantic_index[slot] = 0;
1010 interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
1011 break;
1012 }
1013 /* fall through */
1014 case VARYING_SLOT_TEX0:
1015 case VARYING_SLOT_TEX1:
1016 case VARYING_SLOT_TEX2:
1017 case VARYING_SLOT_TEX3:
1018 case VARYING_SLOT_TEX4:
1019 case VARYING_SLOT_TEX5:
1020 case VARYING_SLOT_TEX6:
1021 case VARYING_SLOT_TEX7:
1022 if (st->needs_texcoord_semantic) {
1023 input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
1024 input_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
1025 interpMode[slot] = stfp->glsl_to_tgsi ?
1026 TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
1027 break;
1028 }
1029 /* fall through */
1030 case VARYING_SLOT_VAR0:
1031 default:
1032 /* Semantic indices should be zero-based because drivers may choose
1033 * to assign a fixed slot determined by that index.
1034 * This is useful because ARB_separate_shader_objects uses location
1035 * qualifiers for linkage, and if the semantic index corresponds to
1036 * these locations, linkage passes in the driver become unecessary.
1037 *
1038 * If needs_texcoord_semantic is true, no semantic indices will be
1039 * consumed for the TEXi varyings, and we can base the locations of
1040 * the user varyings on VAR0. Otherwise, we use TEX0 as base index.
1041 */
1042 assert(attr >= VARYING_SLOT_VAR0 || attr == VARYING_SLOT_PNTC ||
1043 (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
1044 input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
1045 input_semantic_index[slot] = st_get_generic_varying_index(st, attr);
1046 if (attr == VARYING_SLOT_PNTC)
1047 interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
1048 else {
1049 interpMode[slot] = stfp->glsl_to_tgsi ?
1050 TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
1051 }
1052 break;
1053 }
1054 }
1055 else {
1056 inputMapping[attr] = -1;
1057 }
1058 }
1059
1060 /*
1061 * Semantics and mapping for outputs
1062 */
1063 GLbitfield64 outputsWritten = stfp->Base.info.outputs_written;
1064
1065 /* if z is written, emit that first */
1066 if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
1067 fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
1068 fs_output_semantic_index[fs_num_outputs] = 0;
1069 outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
1070 fs_num_outputs++;
1071 outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
1072 }
1073
1074 if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
1075 fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
1076 fs_output_semantic_index[fs_num_outputs] = 0;
1077 outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
1078 fs_num_outputs++;
1079 outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
1080 }
1081
1082 if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
1083 fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_SAMPLEMASK;
1084 fs_output_semantic_index[fs_num_outputs] = 0;
1085 outputMapping[FRAG_RESULT_SAMPLE_MASK] = fs_num_outputs;
1086 fs_num_outputs++;
1087 outputsWritten &= ~(1 << FRAG_RESULT_SAMPLE_MASK);
1088 }
1089
1090 /* handle remaining outputs (color) */
1091 for (attr = 0; attr < ARRAY_SIZE(outputMapping); attr++) {
1092 const GLbitfield64 written = attr < FRAG_RESULT_MAX ? outputsWritten :
1093 stfp->Base.SecondaryOutputsWritten;
1094 const unsigned loc = attr % FRAG_RESULT_MAX;
1095
1096 if (written & BITFIELD64_BIT(loc)) {
1097 switch (loc) {
1098 case FRAG_RESULT_DEPTH:
1099 case FRAG_RESULT_STENCIL:
1100 case FRAG_RESULT_SAMPLE_MASK:
1101 /* handled above */
1102 assert(0);
1103 break;
1104 case FRAG_RESULT_COLOR:
1105 write_all = GL_TRUE; /* fallthrough */
1106 default: {
1107 int index;
1108 assert(loc == FRAG_RESULT_COLOR ||
1109 (FRAG_RESULT_DATA0 <= loc && loc < FRAG_RESULT_MAX));
1110
1111 index = (loc == FRAG_RESULT_COLOR) ? 0 : (loc - FRAG_RESULT_DATA0);
1112
1113 if (attr >= FRAG_RESULT_MAX) {
1114 /* Secondary color for dual source blending. */
1115 assert(index == 0);
1116 index++;
1117 }
1118
1119 fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
1120 fs_output_semantic_index[fs_num_outputs] = index;
1121 outputMapping[attr] = fs_num_outputs;
1122 break;
1123 }
1124 }
1125
1126 fs_num_outputs++;
1127 }
1128 }
1129
1130 ureg = ureg_create_with_screen(PIPE_SHADER_FRAGMENT, st->pipe->screen);
1131 if (ureg == NULL)
1132 return false;
1133
1134 if (ST_DEBUG & DEBUG_MESA) {
1135 _mesa_print_program(&stfp->Base);
1136 _mesa_print_program_parameters(st->ctx, &stfp->Base);
1137 debug_printf("\n");
1138 }
1139 if (write_all == GL_TRUE)
1140 ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1);
1141
1142 if (stfp->Base.info.fs.depth_layout != FRAG_DEPTH_LAYOUT_NONE) {
1143 switch (stfp->Base.info.fs.depth_layout) {
1144 case FRAG_DEPTH_LAYOUT_ANY:
1145 ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1146 TGSI_FS_DEPTH_LAYOUT_ANY);
1147 break;
1148 case FRAG_DEPTH_LAYOUT_GREATER:
1149 ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1150 TGSI_FS_DEPTH_LAYOUT_GREATER);
1151 break;
1152 case FRAG_DEPTH_LAYOUT_LESS:
1153 ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1154 TGSI_FS_DEPTH_LAYOUT_LESS);
1155 break;
1156 case FRAG_DEPTH_LAYOUT_UNCHANGED:
1157 ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1158 TGSI_FS_DEPTH_LAYOUT_UNCHANGED);
1159 break;
1160 default:
1161 assert(0);
1162 }
1163 }
1164
1165 if (stfp->glsl_to_tgsi) {
1166 st_translate_program(st->ctx,
1167 PIPE_SHADER_FRAGMENT,
1168 ureg,
1169 stfp->glsl_to_tgsi,
1170 &stfp->Base,
1171 /* inputs */
1172 fs_num_inputs,
1173 inputMapping,
1174 inputSlotToAttr,
1175 input_semantic_name,
1176 input_semantic_index,
1177 interpMode,
1178 /* outputs */
1179 fs_num_outputs,
1180 outputMapping,
1181 fs_output_semantic_name,
1182 fs_output_semantic_index);
1183
1184 free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
1185 } else if (stfp->ati_fs)
1186 st_translate_atifs_program(ureg,
1187 stfp->ati_fs,
1188 &stfp->Base,
1189 /* inputs */
1190 fs_num_inputs,
1191 inputMapping,
1192 input_semantic_name,
1193 input_semantic_index,
1194 interpMode,
1195 /* outputs */
1196 fs_num_outputs,
1197 outputMapping,
1198 fs_output_semantic_name,
1199 fs_output_semantic_index);
1200 else
1201 st_translate_mesa_program(st->ctx,
1202 PIPE_SHADER_FRAGMENT,
1203 ureg,
1204 &stfp->Base,
1205 /* inputs */
1206 fs_num_inputs,
1207 inputMapping,
1208 input_semantic_name,
1209 input_semantic_index,
1210 interpMode,
1211 /* outputs */
1212 fs_num_outputs,
1213 outputMapping,
1214 fs_output_semantic_name,
1215 fs_output_semantic_index);
1216
1217 stfp->state.tokens = ureg_get_tokens(ureg, NULL);
1218 ureg_destroy(ureg);
1219
1220 if (stfp->glsl_to_tgsi) {
1221 stfp->glsl_to_tgsi = NULL;
1222 st_store_ir_in_disk_cache(st, &stfp->Base, false);
1223 }
1224
1225 return stfp->state.tokens != NULL;
1226 }
1227
1228 static struct st_fp_variant *
1229 st_create_fp_variant(struct st_context *st,
1230 struct st_program *stfp,
1231 const struct st_fp_variant_key *key)
1232 {
1233 struct pipe_context *pipe = st->pipe;
1234 struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
1235 struct pipe_shader_state state = {0};
1236 struct gl_program_parameter_list *params = stfp->Base.Parameters;
1237 static const gl_state_index16 texcoord_state[STATE_LENGTH] =
1238 { STATE_INTERNAL, STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
1239 static const gl_state_index16 scale_state[STATE_LENGTH] =
1240 { STATE_INTERNAL, STATE_PT_SCALE };
1241 static const gl_state_index16 bias_state[STATE_LENGTH] =
1242 { STATE_INTERNAL, STATE_PT_BIAS };
1243 static const gl_state_index16 alpha_ref_state[STATE_LENGTH] =
1244 { STATE_INTERNAL, STATE_ALPHA_REF };
1245
1246 if (!variant)
1247 return NULL;
1248
1249 if (stfp->state.type == PIPE_SHADER_IR_NIR) {
1250 bool finalize = false;
1251
1252 state.type = PIPE_SHADER_IR_NIR;
1253 state.ir.nir = get_nir_shader(st, stfp);
1254
1255 if (key->clamp_color) {
1256 NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1257 finalize = true;
1258 }
1259
1260 if (key->lower_flatshade) {
1261 NIR_PASS_V(state.ir.nir, nir_lower_flatshade);
1262 finalize = true;
1263 }
1264
1265 if (key->lower_alpha_func != COMPARE_FUNC_NEVER) {
1266 _mesa_add_state_reference(params, alpha_ref_state);
1267 NIR_PASS_V(state.ir.nir, nir_lower_alpha_test, key->lower_alpha_func,
1268 false, alpha_ref_state);
1269 finalize = true;
1270 }
1271
1272 if (key->lower_two_sided_color) {
1273 NIR_PASS_V(state.ir.nir, nir_lower_two_sided_color);
1274 finalize = true;
1275 }
1276
1277 if (key->persample_shading) {
1278 nir_shader *shader = state.ir.nir;
1279 nir_foreach_variable(var, &shader->inputs)
1280 var->data.sample = true;
1281 finalize = true;
1282 }
1283
1284 assert(!(key->bitmap && key->drawpixels));
1285
1286 /* glBitmap */
1287 if (key->bitmap) {
1288 nir_lower_bitmap_options options = {0};
1289
1290 variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1291 options.sampler = variant->bitmap_sampler;
1292 options.swizzle_xxxx = st->bitmap.tex_format == PIPE_FORMAT_R8_UNORM;
1293
1294 NIR_PASS_V(state.ir.nir, nir_lower_bitmap, &options);
1295 finalize = true;
1296 }
1297
1298 /* glDrawPixels (color only) */
1299 if (key->drawpixels) {
1300 nir_lower_drawpixels_options options = {{0}};
1301 unsigned samplers_used = stfp->Base.SamplersUsed;
1302
1303 /* Find the first unused slot. */
1304 variant->drawpix_sampler = ffs(~samplers_used) - 1;
1305 options.drawpix_sampler = variant->drawpix_sampler;
1306 samplers_used |= (1 << variant->drawpix_sampler);
1307
1308 options.pixel_maps = key->pixelMaps;
1309 if (key->pixelMaps) {
1310 variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1311 options.pixelmap_sampler = variant->pixelmap_sampler;
1312 }
1313
1314 options.scale_and_bias = key->scaleAndBias;
1315 if (key->scaleAndBias) {
1316 _mesa_add_state_reference(params, scale_state);
1317 memcpy(options.scale_state_tokens, scale_state,
1318 sizeof(options.scale_state_tokens));
1319 _mesa_add_state_reference(params, bias_state);
1320 memcpy(options.bias_state_tokens, bias_state,
1321 sizeof(options.bias_state_tokens));
1322 }
1323
1324 _mesa_add_state_reference(params, texcoord_state);
1325 memcpy(options.texcoord_state_tokens, texcoord_state,
1326 sizeof(options.texcoord_state_tokens));
1327
1328 NIR_PASS_V(state.ir.nir, nir_lower_drawpixels, &options);
1329 finalize = true;
1330 }
1331
1332 if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1333 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1334 key->external.lower_ayuv || key->external.lower_xyuv)) {
1335
1336 st_nir_lower_samplers(pipe->screen, state.ir.nir,
1337 stfp->shader_program, &stfp->Base);
1338
1339 nir_lower_tex_options options = {0};
1340 options.lower_y_uv_external = key->external.lower_nv12;
1341 options.lower_y_u_v_external = key->external.lower_iyuv;
1342 options.lower_xy_uxvx_external = key->external.lower_xy_uxvx;
1343 options.lower_yx_xuxv_external = key->external.lower_yx_xuxv;
1344 options.lower_ayuv_external = key->external.lower_ayuv;
1345 options.lower_xyuv_external = key->external.lower_xyuv;
1346 NIR_PASS_V(state.ir.nir, nir_lower_tex, &options);
1347 finalize = true;
1348 }
1349
1350 if (finalize || !st->allow_st_finalize_nir_twice) {
1351 st_finalize_nir(st, &stfp->Base, stfp->shader_program, state.ir.nir,
1352 false);
1353 }
1354
1355 /* This pass needs to happen *after* nir_lower_sampler */
1356 if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1357 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1358 key->external.lower_ayuv || key->external.lower_xyuv)) {
1359 NIR_PASS_V(state.ir.nir, st_nir_lower_tex_src_plane,
1360 ~stfp->Base.SamplersUsed,
1361 key->external.lower_nv12 || key->external.lower_xy_uxvx ||
1362 key->external.lower_yx_xuxv,
1363 key->external.lower_iyuv);
1364 finalize = true;
1365 }
1366
1367 if (finalize || !st->allow_st_finalize_nir_twice) {
1368 /* Some of the lowering above may have introduced new varyings */
1369 nir_shader_gather_info(state.ir.nir,
1370 nir_shader_get_entrypoint(state.ir.nir));
1371
1372 struct pipe_screen *screen = pipe->screen;
1373 if (screen->finalize_nir)
1374 screen->finalize_nir(screen, state.ir.nir, false);
1375 }
1376
1377 if (ST_DEBUG & DEBUG_PRINT_IR)
1378 nir_print_shader(state.ir.nir, stderr);
1379
1380 variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1381 variant->key = *key;
1382
1383 return variant;
1384 }
1385
1386 state.tokens = stfp->state.tokens;
1387
1388 assert(!(key->bitmap && key->drawpixels));
1389
1390 /* Fix texture targets and add fog for ATI_fs */
1391 if (stfp->ati_fs) {
1392 const struct tgsi_token *tokens = st_fixup_atifs(state.tokens, key);
1393
1394 if (tokens)
1395 state.tokens = tokens;
1396 else
1397 fprintf(stderr, "mesa: cannot post-process ATI_fs\n");
1398 }
1399
1400 /* Emulate features. */
1401 if (key->clamp_color || key->persample_shading) {
1402 const struct tgsi_token *tokens;
1403 unsigned flags =
1404 (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
1405 (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
1406
1407 tokens = tgsi_emulate(state.tokens, flags);
1408
1409 if (tokens) {
1410 if (state.tokens != stfp->state.tokens)
1411 tgsi_free_tokens(state.tokens);
1412 state.tokens = tokens;
1413 } else
1414 fprintf(stderr, "mesa: cannot emulate deprecated features\n");
1415 }
1416
1417 /* glBitmap */
1418 if (key->bitmap) {
1419 const struct tgsi_token *tokens;
1420
1421 variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1422
1423 tokens = st_get_bitmap_shader(state.tokens,
1424 st->internal_target,
1425 variant->bitmap_sampler,
1426 st->needs_texcoord_semantic,
1427 st->bitmap.tex_format ==
1428 PIPE_FORMAT_R8_UNORM);
1429
1430 if (tokens) {
1431 if (state.tokens != stfp->state.tokens)
1432 tgsi_free_tokens(state.tokens);
1433 state.tokens = tokens;
1434 } else
1435 fprintf(stderr, "mesa: cannot create a shader for glBitmap\n");
1436 }
1437
1438 /* glDrawPixels (color only) */
1439 if (key->drawpixels) {
1440 const struct tgsi_token *tokens;
1441 unsigned scale_const = 0, bias_const = 0, texcoord_const = 0;
1442
1443 /* Find the first unused slot. */
1444 variant->drawpix_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1445
1446 if (key->pixelMaps) {
1447 unsigned samplers_used = stfp->Base.SamplersUsed |
1448 (1 << variant->drawpix_sampler);
1449
1450 variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1451 }
1452
1453 if (key->scaleAndBias) {
1454 scale_const = _mesa_add_state_reference(params, scale_state);
1455 bias_const = _mesa_add_state_reference(params, bias_state);
1456 }
1457
1458 texcoord_const = _mesa_add_state_reference(params, texcoord_state);
1459
1460 tokens = st_get_drawpix_shader(state.tokens,
1461 st->needs_texcoord_semantic,
1462 key->scaleAndBias, scale_const,
1463 bias_const, key->pixelMaps,
1464 variant->drawpix_sampler,
1465 variant->pixelmap_sampler,
1466 texcoord_const, st->internal_target);
1467
1468 if (tokens) {
1469 if (state.tokens != stfp->state.tokens)
1470 tgsi_free_tokens(state.tokens);
1471 state.tokens = tokens;
1472 } else
1473 fprintf(stderr, "mesa: cannot create a shader for glDrawPixels\n");
1474 }
1475
1476 if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1477 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1478 const struct tgsi_token *tokens;
1479
1480 /* samplers inserted would conflict, but this should be unpossible: */
1481 assert(!(key->bitmap || key->drawpixels));
1482
1483 tokens = st_tgsi_lower_yuv(state.tokens,
1484 ~stfp->Base.SamplersUsed,
1485 key->external.lower_nv12 ||
1486 key->external.lower_xy_uxvx ||
1487 key->external.lower_yx_xuxv,
1488 key->external.lower_iyuv);
1489 if (tokens) {
1490 if (state.tokens != stfp->state.tokens)
1491 tgsi_free_tokens(state.tokens);
1492 state.tokens = tokens;
1493 } else {
1494 fprintf(stderr, "mesa: cannot create a shader for samplerExternalOES\n");
1495 }
1496 }
1497
1498 if (key->lower_depth_clamp) {
1499 unsigned depth_range_const = _mesa_add_state_reference(params, depth_range_state);
1500
1501 const struct tgsi_token *tokens;
1502 tokens = st_tgsi_lower_depth_clamp_fs(state.tokens, depth_range_const);
1503 if (state.tokens != stfp->state.tokens)
1504 tgsi_free_tokens(state.tokens);
1505 state.tokens = tokens;
1506 }
1507
1508 if (ST_DEBUG & DEBUG_PRINT_IR)
1509 tgsi_dump(state.tokens, 0);
1510
1511 /* fill in variant */
1512 variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1513 variant->key = *key;
1514
1515 if (state.tokens != stfp->state.tokens)
1516 tgsi_free_tokens(state.tokens);
1517 return variant;
1518 }
1519
1520 /**
1521 * Translate fragment program if needed.
1522 */
1523 struct st_fp_variant *
1524 st_get_fp_variant(struct st_context *st,
1525 struct st_program *stfp,
1526 const struct st_fp_variant_key *key)
1527 {
1528 struct st_fp_variant *fpv;
1529
1530 /* Search for existing variant */
1531 for (fpv = st_fp_variant(stfp->variants); fpv;
1532 fpv = st_fp_variant(fpv->base.next)) {
1533 if (memcmp(&fpv->key, key, sizeof(*key)) == 0) {
1534 break;
1535 }
1536 }
1537
1538 if (!fpv) {
1539 /* create new */
1540 fpv = st_create_fp_variant(st, stfp, key);
1541 if (fpv) {
1542 fpv->base.st = key->st;
1543
1544 if (key->bitmap || key->drawpixels) {
1545 /* Regular variants should always come before the
1546 * bitmap & drawpixels variants, (unless there
1547 * are no regular variants) so that
1548 * st_update_fp can take a fast path when
1549 * shader_has_one_variant is set.
1550 */
1551 if (!stfp->variants) {
1552 stfp->variants = &fpv->base;
1553 } else {
1554 /* insert into list after the first one */
1555 fpv->base.next = stfp->variants->next;
1556 stfp->variants->next = &fpv->base;
1557 }
1558 } else {
1559 /* insert into list */
1560 fpv->base.next = stfp->variants;
1561 stfp->variants = &fpv->base;
1562 }
1563 }
1564 }
1565
1566 return fpv;
1567 }
1568
1569 /**
1570 * Translate a program. This is common code for geometry and tessellation
1571 * shaders.
1572 */
1573 bool
1574 st_translate_common_program(struct st_context *st,
1575 struct st_program *stp)
1576 {
1577 struct gl_program *prog = &stp->Base;
1578 enum pipe_shader_type stage =
1579 pipe_shader_type_from_mesa(stp->Base.info.stage);
1580 struct ureg_program *ureg = ureg_create_with_screen(stage, st->pipe->screen);
1581
1582 if (ureg == NULL)
1583 return false;
1584
1585 switch (stage) {
1586 case PIPE_SHADER_TESS_CTRL:
1587 ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT,
1588 stp->Base.info.tess.tcs_vertices_out);
1589 break;
1590
1591 case PIPE_SHADER_TESS_EVAL:
1592 if (stp->Base.info.tess.primitive_mode == GL_ISOLINES)
1593 ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, GL_LINES);
1594 else
1595 ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE,
1596 stp->Base.info.tess.primitive_mode);
1597
1598 STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL);
1599 STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 ==
1600 PIPE_TESS_SPACING_FRACTIONAL_ODD);
1601 STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 ==
1602 PIPE_TESS_SPACING_FRACTIONAL_EVEN);
1603
1604 ureg_property(ureg, TGSI_PROPERTY_TES_SPACING,
1605 (stp->Base.info.tess.spacing + 1) % 3);
1606
1607 ureg_property(ureg, TGSI_PROPERTY_TES_VERTEX_ORDER_CW,
1608 !stp->Base.info.tess.ccw);
1609 ureg_property(ureg, TGSI_PROPERTY_TES_POINT_MODE,
1610 stp->Base.info.tess.point_mode);
1611 break;
1612
1613 case PIPE_SHADER_GEOMETRY:
1614 ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM,
1615 stp->Base.info.gs.input_primitive);
1616 ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM,
1617 stp->Base.info.gs.output_primitive);
1618 ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
1619 stp->Base.info.gs.vertices_out);
1620 ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS,
1621 stp->Base.info.gs.invocations);
1622 break;
1623
1624 default:
1625 break;
1626 }
1627
1628 ubyte inputSlotToAttr[VARYING_SLOT_TESS_MAX];
1629 ubyte inputMapping[VARYING_SLOT_TESS_MAX];
1630 ubyte outputMapping[VARYING_SLOT_TESS_MAX];
1631 GLuint attr;
1632
1633 ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
1634 ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
1635 uint num_inputs = 0;
1636
1637 ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
1638 ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
1639 uint num_outputs = 0;
1640
1641 GLint i;
1642
1643 memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr));
1644 memset(inputMapping, 0, sizeof(inputMapping));
1645 memset(outputMapping, 0, sizeof(outputMapping));
1646 memset(&stp->state, 0, sizeof(stp->state));
1647
1648 if (prog->info.clip_distance_array_size)
1649 ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
1650 prog->info.clip_distance_array_size);
1651 if (prog->info.cull_distance_array_size)
1652 ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
1653 prog->info.cull_distance_array_size);
1654
1655 /*
1656 * Convert Mesa program inputs to TGSI input register semantics.
1657 */
1658 for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1659 if ((prog->info.inputs_read & BITFIELD64_BIT(attr)) == 0)
1660 continue;
1661
1662 unsigned slot = num_inputs++;
1663
1664 inputMapping[attr] = slot;
1665 inputSlotToAttr[slot] = attr;
1666
1667 unsigned semantic_name, semantic_index;
1668 tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1669 &semantic_name, &semantic_index);
1670 input_semantic_name[slot] = semantic_name;
1671 input_semantic_index[slot] = semantic_index;
1672 }
1673
1674 /* Also add patch inputs. */
1675 for (attr = 0; attr < 32; attr++) {
1676 if (prog->info.patch_inputs_read & (1u << attr)) {
1677 GLuint slot = num_inputs++;
1678 GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1679
1680 inputMapping[patch_attr] = slot;
1681 inputSlotToAttr[slot] = patch_attr;
1682 input_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1683 input_semantic_index[slot] = attr;
1684 }
1685 }
1686
1687 /* initialize output semantics to defaults */
1688 for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
1689 output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
1690 output_semantic_index[i] = 0;
1691 }
1692
1693 /*
1694 * Determine number of outputs, the (default) output register
1695 * mapping and the semantic information for each output.
1696 */
1697 for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1698 if (prog->info.outputs_written & BITFIELD64_BIT(attr)) {
1699 GLuint slot = num_outputs++;
1700
1701 outputMapping[attr] = slot;
1702
1703 unsigned semantic_name, semantic_index;
1704 tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1705 &semantic_name, &semantic_index);
1706 output_semantic_name[slot] = semantic_name;
1707 output_semantic_index[slot] = semantic_index;
1708 }
1709 }
1710
1711 /* Also add patch outputs. */
1712 for (attr = 0; attr < 32; attr++) {
1713 if (prog->info.patch_outputs_written & (1u << attr)) {
1714 GLuint slot = num_outputs++;
1715 GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1716
1717 outputMapping[patch_attr] = slot;
1718 output_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1719 output_semantic_index[slot] = attr;
1720 }
1721 }
1722
1723 st_translate_program(st->ctx,
1724 stage,
1725 ureg,
1726 stp->glsl_to_tgsi,
1727 prog,
1728 /* inputs */
1729 num_inputs,
1730 inputMapping,
1731 inputSlotToAttr,
1732 input_semantic_name,
1733 input_semantic_index,
1734 NULL,
1735 /* outputs */
1736 num_outputs,
1737 outputMapping,
1738 output_semantic_name,
1739 output_semantic_index);
1740
1741 stp->state.tokens = ureg_get_tokens(ureg, NULL);
1742
1743 ureg_destroy(ureg);
1744
1745 st_translate_stream_output_info(prog);
1746
1747 st_store_ir_in_disk_cache(st, prog, false);
1748
1749 if (ST_DEBUG & DEBUG_PRINT_IR && ST_DEBUG & DEBUG_MESA)
1750 _mesa_print_program(prog);
1751
1752 free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
1753 stp->glsl_to_tgsi = NULL;
1754 return true;
1755 }
1756
1757
1758 /**
1759 * Get/create a basic program variant.
1760 */
1761 struct st_variant *
1762 st_get_common_variant(struct st_context *st,
1763 struct st_program *prog,
1764 const struct st_common_variant_key *key)
1765 {
1766 struct pipe_context *pipe = st->pipe;
1767 struct st_variant *v;
1768 struct pipe_shader_state state = {0};
1769
1770 /* Search for existing variant */
1771 for (v = prog->variants; v; v = v->next) {
1772 if (memcmp(&st_common_variant(v)->key, key, sizeof(*key)) == 0)
1773 break;
1774 }
1775
1776 if (!v) {
1777 /* create new */
1778 v = (struct st_variant*)CALLOC_STRUCT(st_common_variant);
1779 if (v) {
1780 if (prog->state.type == PIPE_SHADER_IR_NIR) {
1781 bool finalize = false;
1782
1783 state.type = PIPE_SHADER_IR_NIR;
1784 state.ir.nir = get_nir_shader(st, prog);
1785
1786 if (key->clamp_color) {
1787 NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1788 finalize = true;
1789 }
1790
1791 state.stream_output = prog->state.stream_output;
1792
1793 if (finalize || !st->allow_st_finalize_nir_twice) {
1794 st_finalize_nir(st, &prog->Base, prog->shader_program,
1795 state.ir.nir, true);
1796 }
1797
1798 if (ST_DEBUG & DEBUG_PRINT_IR)
1799 nir_print_shader(state.ir.nir, stderr);
1800 } else {
1801 if (key->lower_depth_clamp) {
1802 struct gl_program_parameter_list *params = prog->Base.Parameters;
1803
1804 unsigned depth_range_const =
1805 _mesa_add_state_reference(params, depth_range_state);
1806
1807 const struct tgsi_token *tokens;
1808 tokens =
1809 st_tgsi_lower_depth_clamp(prog->state.tokens,
1810 depth_range_const,
1811 key->clip_negative_one_to_one);
1812
1813 if (tokens != prog->state.tokens)
1814 tgsi_free_tokens(prog->state.tokens);
1815
1816 prog->state.tokens = tokens;
1817 }
1818 state = prog->state;
1819
1820 if (ST_DEBUG & DEBUG_PRINT_IR)
1821 tgsi_dump(state.tokens, 0);
1822 }
1823 /* fill in new variant */
1824 switch (prog->Base.info.stage) {
1825 case MESA_SHADER_TESS_CTRL:
1826 v->driver_shader = pipe->create_tcs_state(pipe, &state);
1827 break;
1828 case MESA_SHADER_TESS_EVAL:
1829 v->driver_shader = pipe->create_tes_state(pipe, &state);
1830 break;
1831 case MESA_SHADER_GEOMETRY:
1832 v->driver_shader = pipe->create_gs_state(pipe, &state);
1833 break;
1834 case MESA_SHADER_COMPUTE: {
1835 struct pipe_compute_state cs = {0};
1836 cs.ir_type = state.type;
1837 cs.req_local_mem = prog->Base.info.cs.shared_size;
1838
1839 if (state.type == PIPE_SHADER_IR_NIR)
1840 cs.prog = state.ir.nir;
1841 else
1842 cs.prog = state.tokens;
1843
1844 v->driver_shader = pipe->create_compute_state(pipe, &cs);
1845 break;
1846 }
1847 default:
1848 assert(!"unhandled shader type");
1849 free(v);
1850 return NULL;
1851 }
1852
1853 st_common_variant(v)->key = *key;
1854 v->st = key->st;
1855
1856 /* insert into list */
1857 v->next = prog->variants;
1858 prog->variants = v;
1859 }
1860 }
1861
1862 return v;
1863 }
1864
1865
1866 /**
1867 * Vert/Geom/Frag programs have per-context variants. Free all the
1868 * variants attached to the given program which match the given context.
1869 */
1870 static void
1871 destroy_program_variants(struct st_context *st, struct gl_program *target)
1872 {
1873 if (!target || target == &_mesa_DummyProgram)
1874 return;
1875
1876 struct st_program *p = st_program(target);
1877 struct st_variant *v, **prevPtr = &p->variants;
1878 bool unbound = false;
1879
1880 for (v = p->variants; v; ) {
1881 struct st_variant *next = v->next;
1882 if (v->st == st) {
1883 if (!unbound) {
1884 st_unbind_program(st, p);
1885 unbound = true;
1886 }
1887
1888 /* unlink from list */
1889 *prevPtr = next;
1890 /* destroy this variant */
1891 delete_variant(st, v, target->Target);
1892 }
1893 else {
1894 prevPtr = &v->next;
1895 }
1896 v = next;
1897 }
1898 }
1899
1900
1901 /**
1902 * Callback for _mesa_HashWalk. Free all the shader's program variants
1903 * which match the given context.
1904 */
1905 static void
1906 destroy_shader_program_variants_cb(GLuint key, void *data, void *userData)
1907 {
1908 struct st_context *st = (struct st_context *) userData;
1909 struct gl_shader *shader = (struct gl_shader *) data;
1910
1911 switch (shader->Type) {
1912 case GL_SHADER_PROGRAM_MESA:
1913 {
1914 struct gl_shader_program *shProg = (struct gl_shader_program *) data;
1915 GLuint i;
1916
1917 for (i = 0; i < ARRAY_SIZE(shProg->_LinkedShaders); i++) {
1918 if (shProg->_LinkedShaders[i])
1919 destroy_program_variants(st, shProg->_LinkedShaders[i]->Program);
1920 }
1921 }
1922 break;
1923 case GL_VERTEX_SHADER:
1924 case GL_FRAGMENT_SHADER:
1925 case GL_GEOMETRY_SHADER:
1926 case GL_TESS_CONTROL_SHADER:
1927 case GL_TESS_EVALUATION_SHADER:
1928 case GL_COMPUTE_SHADER:
1929 break;
1930 default:
1931 assert(0);
1932 }
1933 }
1934
1935
1936 /**
1937 * Callback for _mesa_HashWalk. Free all the program variants which match
1938 * the given context.
1939 */
1940 static void
1941 destroy_program_variants_cb(GLuint key, void *data, void *userData)
1942 {
1943 struct st_context *st = (struct st_context *) userData;
1944 struct gl_program *program = (struct gl_program *) data;
1945 destroy_program_variants(st, program);
1946 }
1947
1948
1949 /**
1950 * Walk over all shaders and programs to delete any variants which
1951 * belong to the given context.
1952 * This is called during context tear-down.
1953 */
1954 void
1955 st_destroy_program_variants(struct st_context *st)
1956 {
1957 /* If shaders can be shared with other contexts, the last context will
1958 * call DeleteProgram on all shaders, releasing everything.
1959 */
1960 if (st->has_shareable_shaders)
1961 return;
1962
1963 /* ARB vert/frag program */
1964 _mesa_HashWalk(st->ctx->Shared->Programs,
1965 destroy_program_variants_cb, st);
1966
1967 /* GLSL vert/frag/geom shaders */
1968 _mesa_HashWalk(st->ctx->Shared->ShaderObjects,
1969 destroy_shader_program_variants_cb, st);
1970 }
1971
1972
1973 /**
1974 * Compile one shader variant.
1975 */
1976 static void
1977 st_precompile_shader_variant(struct st_context *st,
1978 struct gl_program *prog)
1979 {
1980 switch (prog->Target) {
1981 case GL_VERTEX_PROGRAM_ARB: {
1982 struct st_program *p = (struct st_program *)prog;
1983 struct st_common_variant_key key;
1984
1985 memset(&key, 0, sizeof(key));
1986
1987 key.st = st->has_shareable_shaders ? NULL : st;
1988 st_get_vp_variant(st, p, &key);
1989 break;
1990 }
1991
1992 case GL_FRAGMENT_PROGRAM_ARB: {
1993 struct st_program *p = (struct st_program *)prog;
1994 struct st_fp_variant_key key;
1995
1996 memset(&key, 0, sizeof(key));
1997
1998 key.st = st->has_shareable_shaders ? NULL : st;
1999 st_get_fp_variant(st, p, &key);
2000 break;
2001 }
2002
2003 case GL_TESS_CONTROL_PROGRAM_NV:
2004 case GL_TESS_EVALUATION_PROGRAM_NV:
2005 case GL_GEOMETRY_PROGRAM_NV:
2006 case GL_COMPUTE_PROGRAM_NV: {
2007 struct st_program *p = st_program(prog);
2008 struct st_common_variant_key key;
2009
2010 memset(&key, 0, sizeof(key));
2011
2012 key.st = st->has_shareable_shaders ? NULL : st;
2013 st_get_common_variant(st, p, &key);
2014 break;
2015 }
2016
2017 default:
2018 assert(0);
2019 }
2020 }
2021
2022 void
2023 st_serialize_nir(struct st_program *stp)
2024 {
2025 if (!stp->serialized_nir) {
2026 struct blob blob;
2027 size_t size;
2028
2029 blob_init(&blob);
2030 nir_serialize(&blob, stp->Base.nir, false);
2031 blob_finish_get_buffer(&blob, &stp->serialized_nir, &size);
2032 stp->serialized_nir_size = size;
2033 }
2034 }
2035
2036 void
2037 st_finalize_program(struct st_context *st, struct gl_program *prog)
2038 {
2039 if (st->current_program[prog->info.stage] == prog) {
2040 if (prog->info.stage == MESA_SHADER_VERTEX)
2041 st->dirty |= ST_NEW_VERTEX_PROGRAM(st, (struct st_program *)prog);
2042 else
2043 st->dirty |= ((struct st_program *)prog)->affected_states;
2044 }
2045
2046 if (prog->nir) {
2047 nir_sweep(prog->nir);
2048
2049 /* This is only needed for ARB_vp/fp programs and when the disk cache
2050 * is disabled. If the disk cache is enabled, GLSL programs are
2051 * serialized in write_nir_to_cache.
2052 */
2053 st_serialize_nir(st_program(prog));
2054 }
2055
2056 /* Create Gallium shaders now instead of on demand. */
2057 if (ST_DEBUG & DEBUG_PRECOMPILE ||
2058 st->shader_has_one_variant[prog->info.stage])
2059 st_precompile_shader_variant(st, prog);
2060 }