src/mesa/state_tracker/st_program.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   *   Brian Paul
  31   */
  32
  33
  34 #include "main/errors.h"
  35 #include "main/imports.h"
  36 #include "main/hash.h"
  37 #include "main/mtypes.h"
  38 #include "program/prog_parameter.h"
  39 #include "program/prog_print.h"
  40 #include "program/prog_to_nir.h"
  41 #include "program/programopt.h"
  42
  43 #include "compiler/nir/nir.h"
  44 #include "draw/draw_context.h"
  45
  46 #include "pipe/p_context.h"
  47 #include "pipe/p_defines.h"
  48 #include "pipe/p_shader_tokens.h"
  49 #include "draw/draw_context.h"
  50 #include "tgsi/tgsi_dump.h"
  51 #include "tgsi/tgsi_emulate.h"
  52 #include "tgsi/tgsi_parse.h"
  53 #include "tgsi/tgsi_ureg.h"
  54
  55 #include "st_debug.h"
  56 #include "st_cb_bitmap.h"
  57 #include "st_cb_drawpixels.h"
  58 #include "st_context.h"
  59 #include "st_tgsi_lower_depth_clamp.h"
  60 #include "st_tgsi_lower_yuv.h"
  61 #include "st_program.h"
  62 #include "st_mesa_to_tgsi.h"
  63 #include "st_atifs_to_tgsi.h"
  64 #include "st_nir.h"
  65 #include "st_shader_cache.h"
  66 #include "st_util.h"
  67 #include "cso_cache/cso_context.h"
  68
  69
  70
  71 static void
  72 set_affected_state_flags(uint64_t *states,
  73                          struct gl_program *prog,
  74                          uint64_t new_constants,
  75                          uint64_t new_sampler_views,
  76                          uint64_t new_samplers,
  77                          uint64_t new_images,
  78                          uint64_t new_ubos,
  79                          uint64_t new_ssbos,
  80                          uint64_t new_atomics)
  81 {
  82    if (prog->Parameters->NumParameters)
  83       *states |= new_constants;
  84
  85    if (prog->info.num_textures)
  86       *states |= new_sampler_views | new_samplers;
  87
  88    if (prog->info.num_images)
  89       *states |= new_images;
  90
  91    if (prog->info.num_ubos)
  92       *states |= new_ubos;
  93
  94    if (prog->info.num_ssbos)
  95       *states |= new_ssbos;
  96
  97    if (prog->info.num_abos)
  98       *states |= new_atomics;
  99 }
 100
 101 /**
 102  * This determines which states will be updated when the shader is bound.
 103  */
 104 void
 105 st_set_prog_affected_state_flags(struct gl_program *prog)
 106 {
 107    uint64_t *states;
 108
 109    switch (prog->info.stage) {
 110    case MESA_SHADER_VERTEX:
 111       states = &((struct st_program*)prog)->affected_states;
 112
 113       *states = ST_NEW_VS_STATE |
 114                 ST_NEW_RASTERIZER |
 115                 ST_NEW_VERTEX_ARRAYS;
 116
 117       set_affected_state_flags(states, prog,
 118                                ST_NEW_VS_CONSTANTS,
 119                                ST_NEW_VS_SAMPLER_VIEWS,
 120                                ST_NEW_VS_SAMPLERS,
 121                                ST_NEW_VS_IMAGES,
 122                                ST_NEW_VS_UBOS,
 123                                ST_NEW_VS_SSBOS,
 124                                ST_NEW_VS_ATOMICS);
 125       break;
 126
 127    case MESA_SHADER_TESS_CTRL:
 128       states = &(st_program(prog))->affected_states;
 129
 130       *states = ST_NEW_TCS_STATE;
 131
 132       set_affected_state_flags(states, prog,
 133                                ST_NEW_TCS_CONSTANTS,
 134                                ST_NEW_TCS_SAMPLER_VIEWS,
 135                                ST_NEW_TCS_SAMPLERS,
 136                                ST_NEW_TCS_IMAGES,
 137                                ST_NEW_TCS_UBOS,
 138                                ST_NEW_TCS_SSBOS,
 139                                ST_NEW_TCS_ATOMICS);
 140       break;
 141
 142    case MESA_SHADER_TESS_EVAL:
 143       states = &(st_program(prog))->affected_states;
 144
 145       *states = ST_NEW_TES_STATE |
 146                 ST_NEW_RASTERIZER;
 147
 148       set_affected_state_flags(states, prog,
 149                                ST_NEW_TES_CONSTANTS,
 150                                ST_NEW_TES_SAMPLER_VIEWS,
 151                                ST_NEW_TES_SAMPLERS,
 152                                ST_NEW_TES_IMAGES,
 153                                ST_NEW_TES_UBOS,
 154                                ST_NEW_TES_SSBOS,
 155                                ST_NEW_TES_ATOMICS);
 156       break;
 157
 158    case MESA_SHADER_GEOMETRY:
 159       states = &(st_program(prog))->affected_states;
 160
 161       *states = ST_NEW_GS_STATE |
 162                 ST_NEW_RASTERIZER;
 163
 164       set_affected_state_flags(states, prog,
 165                                ST_NEW_GS_CONSTANTS,
 166                                ST_NEW_GS_SAMPLER_VIEWS,
 167                                ST_NEW_GS_SAMPLERS,
 168                                ST_NEW_GS_IMAGES,
 169                                ST_NEW_GS_UBOS,
 170                                ST_NEW_GS_SSBOS,
 171                                ST_NEW_GS_ATOMICS);
 172       break;
 173
 174    case MESA_SHADER_FRAGMENT:
 175       states = &((struct st_program*)prog)->affected_states;
 176
 177       /* gl_FragCoord and glDrawPixels always use constants. */
 178       *states = ST_NEW_FS_STATE |
 179                 ST_NEW_SAMPLE_SHADING |
 180                 ST_NEW_FS_CONSTANTS;
 181
 182       set_affected_state_flags(states, prog,
 183                                ST_NEW_FS_CONSTANTS,
 184                                ST_NEW_FS_SAMPLER_VIEWS,
 185                                ST_NEW_FS_SAMPLERS,
 186                                ST_NEW_FS_IMAGES,
 187                                ST_NEW_FS_UBOS,
 188                                ST_NEW_FS_SSBOS,
 189                                ST_NEW_FS_ATOMICS);
 190       break;
 191
 192    case MESA_SHADER_COMPUTE:
 193       states = &((struct st_program*)prog)->affected_states;
 194
 195       *states = ST_NEW_CS_STATE;
 196
 197       set_affected_state_flags(states, prog,
 198                                ST_NEW_CS_CONSTANTS,
 199                                ST_NEW_CS_SAMPLER_VIEWS,
 200                                ST_NEW_CS_SAMPLERS,
 201                                ST_NEW_CS_IMAGES,
 202                                ST_NEW_CS_UBOS,
 203                                ST_NEW_CS_SSBOS,
 204                                ST_NEW_CS_ATOMICS);
 205       break;
 206
 207    default:
 208       unreachable("unhandled shader stage");
 209    }
 210 }
 211
 212
 213 /**
 214  * Delete a shader variant.  Note the caller must unlink the variant from
 215  * the linked list.
 216  */
 217 static void
 218 delete_variant(struct st_context *st, struct st_variant *v, GLenum target)
 219 {
 220    if (v->driver_shader) {
 221       if (st->has_shareable_shaders || v->st == st) {
 222          /* The shader's context matches the calling context, or we
 223           * don't care.
 224           */
 225          switch (target) {
 226          case GL_VERTEX_PROGRAM_ARB:
 227             cso_delete_vertex_shader(st->cso_context, v->driver_shader);
 228             break;
 229          case GL_TESS_CONTROL_PROGRAM_NV:
 230             cso_delete_tessctrl_shader(st->cso_context, v->driver_shader);
 231             break;
 232          case GL_TESS_EVALUATION_PROGRAM_NV:
 233             cso_delete_tesseval_shader(st->cso_context, v->driver_shader);
 234             break;
 235          case GL_GEOMETRY_PROGRAM_NV:
 236             cso_delete_geometry_shader(st->cso_context, v->driver_shader);
 237             break;
 238          case GL_FRAGMENT_PROGRAM_ARB:
 239             cso_delete_fragment_shader(st->cso_context, v->driver_shader);
 240             break;
 241          case GL_COMPUTE_PROGRAM_NV:
 242             cso_delete_compute_shader(st->cso_context, v->driver_shader);
 243             break;
 244          default:
 245             unreachable("bad shader type in delete_basic_variant");
 246          }
 247       } else {
 248          /* We can't delete a shader with a context different from the one
 249           * that created it.  Add it to the creating context's zombie list.
 250           */
 251          enum pipe_shader_type type =
 252             pipe_shader_type_from_mesa(_mesa_program_enum_to_shader_stage(target));
 253
 254          st_save_zombie_shader(v->st, type, v->driver_shader);
 255       }
 256    }
 257
 258    free(v);
 259 }
 260
 261
 262 /**
 263  * Free all basic program variants.
 264  */
 265 void
 266 st_release_variants(struct st_context *st, struct st_program *p)
 267 {
 268    struct st_variant *v;
 269
 270    for (v = p->variants; v; ) {
 271       struct st_variant *next = v->next;
 272       delete_variant(st, v, p->Base.Target);
 273       v = next;
 274    }
 275
 276    p->variants = NULL;
 277
 278    if (p->state.tokens) {
 279       ureg_free_tokens(p->state.tokens);
 280       p->state.tokens = NULL;
 281    }
 282
 283    /* Note: Any setup of ->ir.nir that has had pipe->create_*_state called on
 284     * it has resulted in the driver taking ownership of the NIR.  Those
 285     * callers should be NULLing out the nir field in any pipe_shader_state
 286     * that might have this called in order to indicate that.
 287     *
 288     * GLSL IR and ARB programs will have set gl_program->nir to the same
 289     * shader as ir->ir.nir, so it will be freed by _mesa_delete_program().
 290     */
 291 }
 292
 293 void
 294 st_finalize_nir_before_variants(struct nir_shader *nir)
 295 {
 296    NIR_PASS_V(nir, nir_opt_access);
 297
 298    NIR_PASS_V(nir, nir_split_var_copies);
 299    NIR_PASS_V(nir, nir_lower_var_copies);
 300    if (nir->options->lower_all_io_to_temps ||
 301        nir->options->lower_all_io_to_elements ||
 302        nir->info.stage == MESA_SHADER_VERTEX ||
 303        nir->info.stage == MESA_SHADER_GEOMETRY) {
 304       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
 305    } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
 306       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
 307    }
 308
 309    st_nir_assign_vs_in_locations(nir);
 310 }
 311
 312 /**
 313  * Translate ARB (asm) program to NIR
 314  */
 315 static nir_shader *
 316 st_translate_prog_to_nir(struct st_context *st, struct gl_program *prog,
 317                          gl_shader_stage stage)
 318 {
 319    struct pipe_screen *screen = st->pipe->screen;
 320    const struct gl_shader_compiler_options *options =
 321       &st->ctx->Const.ShaderCompilerOptions[stage];
 322
 323    /* Translate to NIR */
 324    nir_shader *nir = prog_to_nir(prog, options->NirOptions);
 325    NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
 326    nir_validate_shader(nir, "after st/ptn lower_regs_to_ssa");
 327
 328    NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, prog, screen);
 329    NIR_PASS_V(nir, nir_lower_system_values);
 330
 331    /* Optimise NIR */
 332    NIR_PASS_V(nir, nir_opt_constant_folding);
 333    st_nir_opts(nir);
 334    st_finalize_nir_before_variants(nir);
 335
 336    if (st->allow_st_finalize_nir_twice)
 337       st_finalize_nir(st, prog, NULL, nir, true);
 338
 339    nir_validate_shader(nir, "after st/glsl finalize_nir");
 340
 341    return nir;
 342 }
 343
 344 void
 345 st_prepare_vertex_program(struct st_program *stp)
 346 {
 347    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 348
 349    stvp->num_inputs = 0;
 350    memset(stvp->input_to_index, ~0, sizeof(stvp->input_to_index));
 351    memset(stvp->result_to_output, ~0, sizeof(stvp->result_to_output));
 352
 353    /* Determine number of inputs, the mappings between VERT_ATTRIB_x
 354     * and TGSI generic input indexes, plus input attrib semantic info.
 355     */
 356    for (unsigned attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
 357       if ((stp->Base.info.inputs_read & BITFIELD64_BIT(attr)) != 0) {
 358          stvp->input_to_index[attr] = stvp->num_inputs;
 359          stvp->index_to_input[stvp->num_inputs] = attr;
 360          stvp->num_inputs++;
 361
 362          if ((stp->Base.DualSlotInputs & BITFIELD64_BIT(attr)) != 0) {
 363             /* add placeholder for second part of a double attribute */
 364             stvp->index_to_input[stvp->num_inputs] = ST_DOUBLE_ATTRIB_PLACEHOLDER;
 365             stvp->num_inputs++;
 366          }
 367       }
 368    }
 369    /* pre-setup potentially unused edgeflag input */
 370    stvp->input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
 371    stvp->index_to_input[stvp->num_inputs] = VERT_ATTRIB_EDGEFLAG;
 372
 373    /* Compute mapping of vertex program outputs to slots. */
 374    unsigned num_outputs = 0;
 375    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 376       if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr))
 377          stvp->result_to_output[attr] = num_outputs++;
 378    }
 379    /* pre-setup potentially unused edgeflag output */
 380    stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs;
 381 }
 382
 383 void
 384 st_translate_stream_output_info(struct gl_program *prog)
 385 {
 386    struct gl_transform_feedback_info *info = prog->sh.LinkedTransformFeedback;
 387    if (!info)
 388       return;
 389
 390    /* Determine the (default) output register mapping for each output. */
 391    unsigned num_outputs = 0;
 392    ubyte output_mapping[VARYING_SLOT_TESS_MAX];
 393    memset(output_mapping, 0, sizeof(output_mapping));
 394
 395    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 396       if (prog->info.outputs_written & BITFIELD64_BIT(attr))
 397          output_mapping[attr] = num_outputs++;
 398    }
 399
 400    /* Translate stream output info. */
 401    struct pipe_stream_output_info *so_info =
 402       &((struct st_program*)prog)->state.stream_output;
 403
 404    for (unsigned i = 0; i < info->NumOutputs; i++) {
 405       so_info->output[i].register_index =
 406          output_mapping[info->Outputs[i].OutputRegister];
 407       so_info->output[i].start_component = info->Outputs[i].ComponentOffset;
 408       so_info->output[i].num_components = info->Outputs[i].NumComponents;
 409       so_info->output[i].output_buffer = info->Outputs[i].OutputBuffer;
 410       so_info->output[i].dst_offset = info->Outputs[i].DstOffset;
 411       so_info->output[i].stream = info->Outputs[i].StreamId;
 412    }
 413
 414    for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
 415       so_info->stride[i] = info->Buffers[i].Stride;
 416    }
 417    so_info->num_outputs = info->NumOutputs;
 418 }
 419
 420 /**
 421  * Translate a vertex program.
 422  */
 423 bool
 424 st_translate_vertex_program(struct st_context *st,
 425                             struct st_program *stp)
 426 {
 427    struct ureg_program *ureg;
 428    enum pipe_error error;
 429    unsigned num_outputs = 0;
 430    unsigned attr;
 431    ubyte output_semantic_name[VARYING_SLOT_MAX] = {0};
 432    ubyte output_semantic_index[VARYING_SLOT_MAX] = {0};
 433
 434    if (stp->Base.arb.IsPositionInvariant)
 435       _mesa_insert_mvp_code(st->ctx, &stp->Base);
 436
 437    st_prepare_vertex_program(stp);
 438
 439    /* ARB_vp: */
 440    if (!stp->glsl_to_tgsi) {
 441       _mesa_remove_output_reads(&stp->Base, PROGRAM_OUTPUT);
 442
 443       /* This determines which states will be updated when the assembly
 444        * shader is bound.
 445        */
 446       stp->affected_states = ST_NEW_VS_STATE |
 447                               ST_NEW_RASTERIZER |
 448                               ST_NEW_VERTEX_ARRAYS;
 449
 450       if (stp->Base.Parameters->NumParameters)
 451          stp->affected_states |= ST_NEW_VS_CONSTANTS;
 452
 453       /* No samplers are allowed in ARB_vp. */
 454    }
 455
 456    /* Get semantic names and indices. */
 457    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 458       if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr)) {
 459          unsigned slot = num_outputs++;
 460          unsigned semantic_name, semantic_index;
 461          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
 462                                       &semantic_name, &semantic_index);
 463          output_semantic_name[slot] = semantic_name;
 464          output_semantic_index[slot] = semantic_index;
 465       }
 466    }
 467    /* pre-setup potentially unused edgeflag output */
 468    output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
 469    output_semantic_index[num_outputs] = 0;
 470
 471    ureg = ureg_create_with_screen(PIPE_SHADER_VERTEX, st->pipe->screen);
 472    if (ureg == NULL)
 473       return false;
 474
 475    if (stp->Base.info.clip_distance_array_size)
 476       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
 477                     stp->Base.info.clip_distance_array_size);
 478    if (stp->Base.info.cull_distance_array_size)
 479       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
 480                     stp->Base.info.cull_distance_array_size);
 481
 482    if (ST_DEBUG & DEBUG_MESA) {
 483       _mesa_print_program(&stp->Base);
 484       _mesa_print_program_parameters(st->ctx, &stp->Base);
 485       debug_printf("\n");
 486    }
 487
 488    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 489
 490    if (stp->glsl_to_tgsi) {
 491       error = st_translate_program(st->ctx,
 492                                    PIPE_SHADER_VERTEX,
 493                                    ureg,
 494                                    stp->glsl_to_tgsi,
 495                                    &stp->Base,
 496                                    /* inputs */
 497                                    stvp->num_inputs,
 498                                    stvp->input_to_index,
 499                                    NULL, /* inputSlotToAttr */
 500                                    NULL, /* input semantic name */
 501                                    NULL, /* input semantic index */
 502                                    NULL, /* interp mode */
 503                                    /* outputs */
 504                                    num_outputs,
 505                                    stvp->result_to_output,
 506                                    output_semantic_name,
 507                                    output_semantic_index);
 508
 509       st_translate_stream_output_info(&stp->Base);
 510
 511       free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
 512    } else
 513       error = st_translate_mesa_program(st->ctx,
 514                                         PIPE_SHADER_VERTEX,
 515                                         ureg,
 516                                         &stp->Base,
 517                                         /* inputs */
 518                                         stvp->num_inputs,
 519                                         stvp->input_to_index,
 520                                         NULL, /* input semantic name */
 521                                         NULL, /* input semantic index */
 522                                         NULL,
 523                                         /* outputs */
 524                                         num_outputs,
 525                                         stvp->result_to_output,
 526                                         output_semantic_name,
 527                                         output_semantic_index);
 528
 529    if (error) {
 530       debug_printf("%s: failed to translate Mesa program:\n", __func__);
 531       _mesa_print_program(&stp->Base);
 532       debug_assert(0);
 533       return false;
 534    }
 535
 536    stp->state.tokens = ureg_get_tokens(ureg, NULL);
 537    ureg_destroy(ureg);
 538
 539    if (stp->glsl_to_tgsi) {
 540       stp->glsl_to_tgsi = NULL;
 541       st_store_ir_in_disk_cache(st, &stp->Base, false);
 542    }
 543
 544    /* Translate to NIR.
 545     *
 546     * This must be done after the translation to TGSI is done, because
 547     * we'll pass the NIR shader to the driver and the TGSI version to
 548     * the draw module for the select/feedback/rasterpos code.
 549     */
 550    if (st->pipe->screen->get_shader_param(st->pipe->screen,
 551                                           PIPE_SHADER_VERTEX,
 552                                           PIPE_SHADER_CAP_PREFERRED_IR)) {
 553       assert(!stp->glsl_to_tgsi);
 554
 555       nir_shader *nir =
 556          st_translate_prog_to_nir(st, &stp->Base, MESA_SHADER_VERTEX);
 557
 558       if (stp->Base.nir)
 559          ralloc_free(stp->Base.nir);
 560       stp->state.type = PIPE_SHADER_IR_NIR;
 561       stp->Base.nir = nir;
 562       return true;
 563    }
 564
 565    return stp->state.tokens != NULL;
 566 }
 567
 568 static const gl_state_index16 depth_range_state[STATE_LENGTH] =
 569    { STATE_DEPTH_RANGE };
 570
 571 static struct st_common_variant *
 572 st_create_vp_variant(struct st_context *st,
 573                      struct st_program *stvp,
 574                      const struct st_common_variant_key *key)
 575 {
 576    struct st_common_variant *vpv = CALLOC_STRUCT(st_common_variant);
 577    struct pipe_context *pipe = st->pipe;
 578    struct pipe_screen *screen = pipe->screen;
 579    struct pipe_shader_state state = {0};
 580
 581    static const gl_state_index16 point_size_state[STATE_LENGTH] =
 582       { STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED, 0 };
 583    struct gl_program_parameter_list *params = stvp->Base.Parameters;
 584
 585    vpv->key = *key;
 586
 587    state.stream_output = stvp->state.stream_output;
 588
 589    if (stvp->state.type == PIPE_SHADER_IR_NIR) {
 590       bool finalize = false;
 591
 592       state.type = PIPE_SHADER_IR_NIR;
 593       state.ir.nir = nir_shader_clone(NULL, stvp->Base.nir);
 594       if (key->clamp_color) {
 595          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
 596          finalize = true;
 597       }
 598       if (key->passthrough_edgeflags) {
 599          NIR_PASS_V(state.ir.nir, nir_lower_passthrough_edgeflags);
 600          finalize = true;
 601       }
 602
 603       if (key->lower_point_size) {
 604          _mesa_add_state_reference(params, point_size_state);
 605          NIR_PASS_V(state.ir.nir, nir_lower_point_size_mov,
 606                     point_size_state);
 607          finalize = true;
 608       }
 609
 610       if (key->lower_ucp) {
 611          bool can_compact = screen->get_param(screen,
 612                                               PIPE_CAP_NIR_COMPACT_ARRAYS);
 613
 614          bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;
 615          gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH];
 616          for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
 617             if (use_eye) {
 618                clipplane_state[i][0] = STATE_CLIPPLANE;
 619                clipplane_state[i][1] = i;
 620             } else {
 621                clipplane_state[i][0] = STATE_INTERNAL;
 622                clipplane_state[i][1] = STATE_CLIP_INTERNAL;
 623                clipplane_state[i][2] = i;
 624             }
 625             _mesa_add_state_reference(params, clipplane_state[i]);
 626          }
 627
 628          NIR_PASS_V(state.ir.nir, nir_lower_clip_vs, key->lower_ucp,
 629                     true, can_compact, clipplane_state);
 630          NIR_PASS_V(state.ir.nir, nir_lower_io_to_temporaries,
 631                     nir_shader_get_entrypoint(state.ir.nir), true, false);
 632          NIR_PASS_V(state.ir.nir, nir_lower_global_vars_to_local);
 633          finalize = true;
 634       }
 635
 636       if (finalize || !st->allow_st_finalize_nir_twice) {
 637          st_finalize_nir(st, &stvp->Base, stvp->shader_program, state.ir.nir,
 638                          true);
 639
 640          /* Some of the lowering above may have introduced new varyings */
 641          nir_shader_gather_info(state.ir.nir,
 642                                 nir_shader_get_entrypoint(state.ir.nir));
 643       }
 644
 645       if (ST_DEBUG & DEBUG_PRINT_IR)
 646          nir_print_shader(state.ir.nir, stderr);
 647
 648       if (key->is_draw_shader)
 649          vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
 650       else
 651          vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
 652
 653       return vpv;
 654    }
 655
 656    state.type = PIPE_SHADER_IR_TGSI;
 657    state.tokens = tgsi_dup_tokens(stvp->state.tokens);
 658
 659    /* Emulate features. */
 660    if (key->clamp_color || key->passthrough_edgeflags) {
 661       const struct tgsi_token *tokens;
 662       unsigned flags =
 663          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
 664          (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
 665
 666       tokens = tgsi_emulate(state.tokens, flags);
 667
 668       if (tokens) {
 669          tgsi_free_tokens(state.tokens);
 670          state.tokens = tokens;
 671       } else {
 672          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
 673       }
 674    }
 675
 676    if (key->lower_depth_clamp) {
 677       unsigned depth_range_const =
 678             _mesa_add_state_reference(params, depth_range_state);
 679
 680       const struct tgsi_token *tokens;
 681       tokens = st_tgsi_lower_depth_clamp(state.tokens, depth_range_const,
 682                                          key->clip_negative_one_to_one);
 683       if (tokens != state.tokens)
 684          tgsi_free_tokens(state.tokens);
 685       state.tokens = tokens;
 686    }
 687
 688    if (ST_DEBUG & DEBUG_PRINT_IR)
 689       tgsi_dump(state.tokens, 0);
 690
 691    if (key->is_draw_shader)
 692       vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
 693    else
 694       vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
 695
 696    return vpv;
 697 }
 698
 699
 700 /**
 701  * Find/create a vertex program variant.
 702  */
 703 struct st_common_variant *
 704 st_get_vp_variant(struct st_context *st,
 705                   struct st_program *stp,
 706                   const struct st_common_variant_key *key)
 707 {
 708    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 709    struct st_common_variant *vpv;
 710
 711    /* Search for existing variant */
 712    for (vpv = st_common_variant(stp->variants); vpv;
 713         vpv = st_common_variant(vpv->base.next)) {
 714       if (memcmp(&vpv->key, key, sizeof(*key)) == 0) {
 715          break;
 716       }
 717    }
 718
 719    if (!vpv) {
 720       /* create now */
 721       vpv = st_create_vp_variant(st, stp, key);
 722       if (vpv) {
 723          vpv->base.st = key->st;
 724
 725          unsigned num_inputs = stvp->num_inputs + key->passthrough_edgeflags;
 726          for (unsigned index = 0; index < num_inputs; ++index) {
 727             unsigned attr = stvp->index_to_input[index];
 728             if (attr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
 729                continue;
 730             vpv->vert_attrib_mask |= 1u << attr;
 731          }
 732
 733          /* insert into list */
 734          vpv->base.next = stp->variants;
 735          stp->variants = &vpv->base;
 736       }
 737    }
 738
 739    return vpv;
 740 }
 741
 742
 743 /**
 744  * Translate a Mesa fragment shader into a TGSI shader.
 745  */
 746 bool
 747 st_translate_fragment_program(struct st_context *st,
 748                               struct st_program *stfp)
 749 {
 750    /* Non-GLSL programs: */
 751    if (!stfp->glsl_to_tgsi) {
 752       _mesa_remove_output_reads(&stfp->Base, PROGRAM_OUTPUT);
 753       if (st->ctx->Const.GLSLFragCoordIsSysVal)
 754          _mesa_program_fragment_position_to_sysval(&stfp->Base);
 755
 756       /* This determines which states will be updated when the assembly
 757        * shader is bound.
 758        *
 759        * fragment.position and glDrawPixels always use constants.
 760        */
 761       stfp->affected_states = ST_NEW_FS_STATE |
 762                               ST_NEW_SAMPLE_SHADING |
 763                               ST_NEW_FS_CONSTANTS;
 764
 765       if (stfp->ati_fs) {
 766          /* Just set them for ATI_fs unconditionally. */
 767          stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 768                                   ST_NEW_FS_SAMPLERS;
 769       } else {
 770          /* ARB_fp */
 771          if (stfp->Base.SamplersUsed)
 772             stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 773                                      ST_NEW_FS_SAMPLERS;
 774       }
 775
 776       /* Translate to NIR. */
 777       if (!stfp->ati_fs &&
 778           st->pipe->screen->get_shader_param(st->pipe->screen,
 779                                              PIPE_SHADER_FRAGMENT,
 780                                              PIPE_SHADER_CAP_PREFERRED_IR)) {
 781          nir_shader *nir =
 782             st_translate_prog_to_nir(st, &stfp->Base, MESA_SHADER_FRAGMENT);
 783
 784          if (stfp->Base.nir)
 785             ralloc_free(stfp->Base.nir);
 786          stfp->state.type = PIPE_SHADER_IR_NIR;
 787          stfp->Base.nir = nir;
 788          return true;
 789       }
 790    }
 791
 792    ubyte outputMapping[2 * FRAG_RESULT_MAX];
 793    ubyte inputMapping[VARYING_SLOT_MAX];
 794    ubyte inputSlotToAttr[VARYING_SLOT_MAX];
 795    ubyte interpMode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
 796    GLuint attr;
 797    GLbitfield64 inputsRead;
 798    struct ureg_program *ureg;
 799
 800    GLboolean write_all = GL_FALSE;
 801
 802    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
 803    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
 804    uint fs_num_inputs = 0;
 805
 806    ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
 807    ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
 808    uint fs_num_outputs = 0;
 809
 810    memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
 811
 812    /*
 813     * Convert Mesa program inputs to TGSI input register semantics.
 814     */
 815    inputsRead = stfp->Base.info.inputs_read;
 816    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 817       if ((inputsRead & BITFIELD64_BIT(attr)) != 0) {
 818          const GLuint slot = fs_num_inputs++;
 819
 820          inputMapping[attr] = slot;
 821          inputSlotToAttr[slot] = attr;
 822
 823          switch (attr) {
 824          case VARYING_SLOT_POS:
 825             input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
 826             input_semantic_index[slot] = 0;
 827             interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 828             break;
 829          case VARYING_SLOT_COL0:
 830             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 831             input_semantic_index[slot] = 0;
 832             interpMode[slot] = stfp->glsl_to_tgsi ?
 833                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 834             break;
 835          case VARYING_SLOT_COL1:
 836             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 837             input_semantic_index[slot] = 1;
 838             interpMode[slot] = stfp->glsl_to_tgsi ?
 839                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 840             break;
 841          case VARYING_SLOT_FOGC:
 842             input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
 843             input_semantic_index[slot] = 0;
 844             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 845             break;
 846          case VARYING_SLOT_FACE:
 847             input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
 848             input_semantic_index[slot] = 0;
 849             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 850             break;
 851          case VARYING_SLOT_PRIMITIVE_ID:
 852             input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
 853             input_semantic_index[slot] = 0;
 854             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 855             break;
 856          case VARYING_SLOT_LAYER:
 857             input_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
 858             input_semantic_index[slot] = 0;
 859             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 860             break;
 861          case VARYING_SLOT_VIEWPORT:
 862             input_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
 863             input_semantic_index[slot] = 0;
 864             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 865             break;
 866          case VARYING_SLOT_CLIP_DIST0:
 867             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
 868             input_semantic_index[slot] = 0;
 869             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 870             break;
 871          case VARYING_SLOT_CLIP_DIST1:
 872             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
 873             input_semantic_index[slot] = 1;
 874             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 875             break;
 876          case VARYING_SLOT_CULL_DIST0:
 877          case VARYING_SLOT_CULL_DIST1:
 878             /* these should have been lowered by GLSL */
 879             assert(0);
 880             break;
 881             /* In most cases, there is nothing special about these
 882              * inputs, so adopt a convention to use the generic
 883              * semantic name and the mesa VARYING_SLOT_ number as the
 884              * index.
 885              *
 886              * All that is required is that the vertex shader labels
 887              * its own outputs similarly, and that the vertex shader
 888              * generates at least every output required by the
 889              * fragment shader plus fixed-function hardware (such as
 890              * BFC).
 891              *
 892              * However, some drivers may need us to identify the PNTC and TEXi
 893              * varyings if, for example, their capability to replace them with
 894              * sprite coordinates is limited.
 895              */
 896          case VARYING_SLOT_PNTC:
 897             if (st->needs_texcoord_semantic) {
 898                input_semantic_name[slot] = TGSI_SEMANTIC_PCOORD;
 899                input_semantic_index[slot] = 0;
 900                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 901                break;
 902             }
 903             /* fall through */
 904          case VARYING_SLOT_TEX0:
 905          case VARYING_SLOT_TEX1:
 906          case VARYING_SLOT_TEX2:
 907          case VARYING_SLOT_TEX3:
 908          case VARYING_SLOT_TEX4:
 909          case VARYING_SLOT_TEX5:
 910          case VARYING_SLOT_TEX6:
 911          case VARYING_SLOT_TEX7:
 912             if (st->needs_texcoord_semantic) {
 913                input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
 914                input_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
 915                interpMode[slot] = stfp->glsl_to_tgsi ?
 916                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
 917                break;
 918             }
 919             /* fall through */
 920          case VARYING_SLOT_VAR0:
 921          default:
 922             /* Semantic indices should be zero-based because drivers may choose
 923              * to assign a fixed slot determined by that index.
 924              * This is useful because ARB_separate_shader_objects uses location
 925              * qualifiers for linkage, and if the semantic index corresponds to
 926              * these locations, linkage passes in the driver become unecessary.
 927              *
 928              * If needs_texcoord_semantic is true, no semantic indices will be
 929              * consumed for the TEXi varyings, and we can base the locations of
 930              * the user varyings on VAR0.  Otherwise, we use TEX0 as base index.
 931              */
 932             assert(attr >= VARYING_SLOT_VAR0 || attr == VARYING_SLOT_PNTC ||
 933                    (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
 934             input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
 935             input_semantic_index[slot] = st_get_generic_varying_index(st, attr);
 936             if (attr == VARYING_SLOT_PNTC)
 937                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 938             else {
 939                interpMode[slot] = stfp->glsl_to_tgsi ?
 940                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
 941             }
 942             break;
 943          }
 944       }
 945       else {
 946          inputMapping[attr] = -1;
 947       }
 948    }
 949
 950    /*
 951     * Semantics and mapping for outputs
 952     */
 953    GLbitfield64 outputsWritten = stfp->Base.info.outputs_written;
 954
 955    /* if z is written, emit that first */
 956    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
 957       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
 958       fs_output_semantic_index[fs_num_outputs] = 0;
 959       outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
 960       fs_num_outputs++;
 961       outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
 962    }
 963
 964    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
 965       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
 966       fs_output_semantic_index[fs_num_outputs] = 0;
 967       outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
 968       fs_num_outputs++;
 969       outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
 970    }
 971
 972    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
 973       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_SAMPLEMASK;
 974       fs_output_semantic_index[fs_num_outputs] = 0;
 975       outputMapping[FRAG_RESULT_SAMPLE_MASK] = fs_num_outputs;
 976       fs_num_outputs++;
 977       outputsWritten &= ~(1 << FRAG_RESULT_SAMPLE_MASK);
 978    }
 979
 980    /* handle remaining outputs (color) */
 981    for (attr = 0; attr < ARRAY_SIZE(outputMapping); attr++) {
 982       const GLbitfield64 written = attr < FRAG_RESULT_MAX ? outputsWritten :
 983          stfp->Base.SecondaryOutputsWritten;
 984       const unsigned loc = attr % FRAG_RESULT_MAX;
 985
 986       if (written & BITFIELD64_BIT(loc)) {
 987          switch (loc) {
 988          case FRAG_RESULT_DEPTH:
 989          case FRAG_RESULT_STENCIL:
 990          case FRAG_RESULT_SAMPLE_MASK:
 991             /* handled above */
 992             assert(0);
 993             break;
 994          case FRAG_RESULT_COLOR:
 995             write_all = GL_TRUE; /* fallthrough */
 996          default: {
 997             int index;
 998             assert(loc == FRAG_RESULT_COLOR ||
 999                    (FRAG_RESULT_DATA0 <= loc && loc < FRAG_RESULT_MAX));
1000
1001             index = (loc == FRAG_RESULT_COLOR) ? 0 : (loc - FRAG_RESULT_DATA0);
1002
1003             if (attr >= FRAG_RESULT_MAX) {
1004                /* Secondary color for dual source blending. */
1005                assert(index == 0);
1006                index++;
1007             }
1008
1009             fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
1010             fs_output_semantic_index[fs_num_outputs] = index;
1011             outputMapping[attr] = fs_num_outputs;
1012             break;
1013          }
1014          }
1015
1016          fs_num_outputs++;
1017       }
1018    }
1019
1020    ureg = ureg_create_with_screen(PIPE_SHADER_FRAGMENT, st->pipe->screen);
1021    if (ureg == NULL)
1022       return false;
1023
1024    if (ST_DEBUG & DEBUG_MESA) {
1025       _mesa_print_program(&stfp->Base);
1026       _mesa_print_program_parameters(st->ctx, &stfp->Base);
1027       debug_printf("\n");
1028    }
1029    if (write_all == GL_TRUE)
1030       ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1);
1031
1032    if (stfp->Base.info.fs.depth_layout != FRAG_DEPTH_LAYOUT_NONE) {
1033       switch (stfp->Base.info.fs.depth_layout) {
1034       case FRAG_DEPTH_LAYOUT_ANY:
1035          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1036                        TGSI_FS_DEPTH_LAYOUT_ANY);
1037          break;
1038       case FRAG_DEPTH_LAYOUT_GREATER:
1039          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1040                        TGSI_FS_DEPTH_LAYOUT_GREATER);
1041          break;
1042       case FRAG_DEPTH_LAYOUT_LESS:
1043          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1044                        TGSI_FS_DEPTH_LAYOUT_LESS);
1045          break;
1046       case FRAG_DEPTH_LAYOUT_UNCHANGED:
1047          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1048                        TGSI_FS_DEPTH_LAYOUT_UNCHANGED);
1049          break;
1050       default:
1051          assert(0);
1052       }
1053    }
1054
1055    if (stfp->glsl_to_tgsi) {
1056       st_translate_program(st->ctx,
1057                            PIPE_SHADER_FRAGMENT,
1058                            ureg,
1059                            stfp->glsl_to_tgsi,
1060                            &stfp->Base,
1061                            /* inputs */
1062                            fs_num_inputs,
1063                            inputMapping,
1064                            inputSlotToAttr,
1065                            input_semantic_name,
1066                            input_semantic_index,
1067                            interpMode,
1068                            /* outputs */
1069                            fs_num_outputs,
1070                            outputMapping,
1071                            fs_output_semantic_name,
1072                            fs_output_semantic_index);
1073
1074       free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
1075    } else if (stfp->ati_fs)
1076       st_translate_atifs_program(ureg,
1077                                  stfp->ati_fs,
1078                                  &stfp->Base,
1079                                  /* inputs */
1080                                  fs_num_inputs,
1081                                  inputMapping,
1082                                  input_semantic_name,
1083                                  input_semantic_index,
1084                                  interpMode,
1085                                  /* outputs */
1086                                  fs_num_outputs,
1087                                  outputMapping,
1088                                  fs_output_semantic_name,
1089                                  fs_output_semantic_index);
1090    else
1091       st_translate_mesa_program(st->ctx,
1092                                 PIPE_SHADER_FRAGMENT,
1093                                 ureg,
1094                                 &stfp->Base,
1095                                 /* inputs */
1096                                 fs_num_inputs,
1097                                 inputMapping,
1098                                 input_semantic_name,
1099                                 input_semantic_index,
1100                                 interpMode,
1101                                 /* outputs */
1102                                 fs_num_outputs,
1103                                 outputMapping,
1104                                 fs_output_semantic_name,
1105                                 fs_output_semantic_index);
1106
1107    stfp->state.tokens = ureg_get_tokens(ureg, NULL);
1108    ureg_destroy(ureg);
1109
1110    if (stfp->glsl_to_tgsi) {
1111       stfp->glsl_to_tgsi = NULL;
1112       st_store_ir_in_disk_cache(st, &stfp->Base, false);
1113    }
1114
1115    return stfp->state.tokens != NULL;
1116 }
1117
1118 static struct st_fp_variant *
1119 st_create_fp_variant(struct st_context *st,
1120                      struct st_program *stfp,
1121                      const struct st_fp_variant_key *key)
1122 {
1123    struct pipe_context *pipe = st->pipe;
1124    struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
1125    struct pipe_shader_state state = {0};
1126    struct gl_program_parameter_list *params = stfp->Base.Parameters;
1127    static const gl_state_index16 texcoord_state[STATE_LENGTH] =
1128       { STATE_INTERNAL, STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
1129    static const gl_state_index16 scale_state[STATE_LENGTH] =
1130       { STATE_INTERNAL, STATE_PT_SCALE };
1131    static const gl_state_index16 bias_state[STATE_LENGTH] =
1132       { STATE_INTERNAL, STATE_PT_BIAS };
1133    static const gl_state_index16 alpha_ref_state[STATE_LENGTH] =
1134       { STATE_INTERNAL, STATE_ALPHA_REF };
1135
1136    if (!variant)
1137       return NULL;
1138
1139    if (stfp->state.type == PIPE_SHADER_IR_NIR) {
1140       bool finalize = false;
1141
1142       state.type = PIPE_SHADER_IR_NIR;
1143       state.ir.nir = nir_shader_clone(NULL, stfp->Base.nir);
1144
1145       if (key->clamp_color) {
1146          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1147          finalize = true;
1148       }
1149
1150       if (key->lower_flatshade) {
1151          NIR_PASS_V(state.ir.nir, nir_lower_flatshade);
1152          finalize = true;
1153       }
1154
1155       if (key->lower_alpha_func != COMPARE_FUNC_NEVER) {
1156          _mesa_add_state_reference(params, alpha_ref_state);
1157          NIR_PASS_V(state.ir.nir, nir_lower_alpha_test, key->lower_alpha_func,
1158                     false, alpha_ref_state);
1159          finalize = true;
1160       }
1161
1162       if (key->lower_two_sided_color) {
1163          NIR_PASS_V(state.ir.nir, nir_lower_two_sided_color);
1164          finalize = true;
1165       }
1166
1167       if (key->persample_shading) {
1168           nir_shader *shader = state.ir.nir;
1169           nir_foreach_variable(var, &shader->inputs)
1170              var->data.sample = true;
1171           finalize = true;
1172       }
1173
1174       assert(!(key->bitmap && key->drawpixels));
1175
1176       /* glBitmap */
1177       if (key->bitmap) {
1178          nir_lower_bitmap_options options = {0};
1179
1180          variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1181          options.sampler = variant->bitmap_sampler;
1182          options.swizzle_xxxx = st->bitmap.tex_format == PIPE_FORMAT_R8_UNORM;
1183
1184          NIR_PASS_V(state.ir.nir, nir_lower_bitmap, &options);
1185          finalize = true;
1186       }
1187
1188       /* glDrawPixels (color only) */
1189       if (key->drawpixels) {
1190          nir_lower_drawpixels_options options = {{0}};
1191          unsigned samplers_used = stfp->Base.SamplersUsed;
1192
1193          /* Find the first unused slot. */
1194          variant->drawpix_sampler = ffs(~samplers_used) - 1;
1195          options.drawpix_sampler = variant->drawpix_sampler;
1196          samplers_used |= (1 << variant->drawpix_sampler);
1197
1198          options.pixel_maps = key->pixelMaps;
1199          if (key->pixelMaps) {
1200             variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1201             options.pixelmap_sampler = variant->pixelmap_sampler;
1202          }
1203
1204          options.scale_and_bias = key->scaleAndBias;
1205          if (key->scaleAndBias) {
1206             _mesa_add_state_reference(params, scale_state);
1207             memcpy(options.scale_state_tokens, scale_state,
1208                    sizeof(options.scale_state_tokens));
1209             _mesa_add_state_reference(params, bias_state);
1210             memcpy(options.bias_state_tokens, bias_state,
1211                    sizeof(options.bias_state_tokens));
1212          }
1213
1214          _mesa_add_state_reference(params, texcoord_state);
1215          memcpy(options.texcoord_state_tokens, texcoord_state,
1216                 sizeof(options.texcoord_state_tokens));
1217
1218          NIR_PASS_V(state.ir.nir, nir_lower_drawpixels, &options);
1219          finalize = true;
1220       }
1221
1222       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1223                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1224                    key->external.lower_ayuv || key->external.lower_xyuv)) {
1225          nir_lower_tex_options options = {0};
1226          options.lower_y_uv_external = key->external.lower_nv12;
1227          options.lower_y_u_v_external = key->external.lower_iyuv;
1228          options.lower_xy_uxvx_external = key->external.lower_xy_uxvx;
1229          options.lower_yx_xuxv_external = key->external.lower_yx_xuxv;
1230          options.lower_ayuv_external = key->external.lower_ayuv;
1231          options.lower_xyuv_external = key->external.lower_xyuv;
1232          NIR_PASS_V(state.ir.nir, nir_lower_tex, &options);
1233          finalize = true;
1234       }
1235
1236       if (finalize || !st->allow_st_finalize_nir_twice) {
1237          st_finalize_nir(st, &stfp->Base, stfp->shader_program, state.ir.nir,
1238                          false);
1239       }
1240
1241       /* This pass needs to happen *after* nir_lower_sampler */
1242       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1243                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1244          NIR_PASS_V(state.ir.nir, st_nir_lower_tex_src_plane,
1245                     ~stfp->Base.SamplersUsed,
1246                     key->external.lower_nv12 || key->external.lower_xy_uxvx ||
1247                        key->external.lower_yx_xuxv,
1248                     key->external.lower_iyuv);
1249          finalize = true;
1250       }
1251
1252       if (finalize || !st->allow_st_finalize_nir_twice) {
1253          /* Some of the lowering above may have introduced new varyings */
1254          nir_shader_gather_info(state.ir.nir,
1255                                 nir_shader_get_entrypoint(state.ir.nir));
1256
1257          struct pipe_screen *screen = pipe->screen;
1258          if (screen->finalize_nir)
1259             screen->finalize_nir(screen, state.ir.nir, false);
1260       }
1261
1262       if (ST_DEBUG & DEBUG_PRINT_IR)
1263          nir_print_shader(state.ir.nir, stderr);
1264
1265       variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1266       variant->key = *key;
1267
1268       return variant;
1269    }
1270
1271    state.tokens = stfp->state.tokens;
1272
1273    assert(!(key->bitmap && key->drawpixels));
1274
1275    /* Fix texture targets and add fog for ATI_fs */
1276    if (stfp->ati_fs) {
1277       const struct tgsi_token *tokens = st_fixup_atifs(state.tokens, key);
1278
1279       if (tokens)
1280          state.tokens = tokens;
1281       else
1282          fprintf(stderr, "mesa: cannot post-process ATI_fs\n");
1283    }
1284
1285    /* Emulate features. */
1286    if (key->clamp_color || key->persample_shading) {
1287       const struct tgsi_token *tokens;
1288       unsigned flags =
1289          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
1290          (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
1291
1292       tokens = tgsi_emulate(state.tokens, flags);
1293
1294       if (tokens) {
1295          if (state.tokens != stfp->state.tokens)
1296             tgsi_free_tokens(state.tokens);
1297          state.tokens = tokens;
1298       } else
1299          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
1300    }
1301
1302    /* glBitmap */
1303    if (key->bitmap) {
1304       const struct tgsi_token *tokens;
1305
1306       variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1307
1308       tokens = st_get_bitmap_shader(state.tokens,
1309                                     st->internal_target,
1310                                     variant->bitmap_sampler,
1311                                     st->needs_texcoord_semantic,
1312                                     st->bitmap.tex_format ==
1313                                     PIPE_FORMAT_R8_UNORM);
1314
1315       if (tokens) {
1316          if (state.tokens != stfp->state.tokens)
1317             tgsi_free_tokens(state.tokens);
1318          state.tokens = tokens;
1319       } else
1320          fprintf(stderr, "mesa: cannot create a shader for glBitmap\n");
1321    }
1322
1323    /* glDrawPixels (color only) */
1324    if (key->drawpixels) {
1325       const struct tgsi_token *tokens;
1326       unsigned scale_const = 0, bias_const = 0, texcoord_const = 0;
1327
1328       /* Find the first unused slot. */
1329       variant->drawpix_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1330
1331       if (key->pixelMaps) {
1332          unsigned samplers_used = stfp->Base.SamplersUsed |
1333                                   (1 << variant->drawpix_sampler);
1334
1335          variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1336       }
1337
1338       if (key->scaleAndBias) {
1339          scale_const = _mesa_add_state_reference(params, scale_state);
1340          bias_const = _mesa_add_state_reference(params, bias_state);
1341       }
1342
1343       texcoord_const = _mesa_add_state_reference(params, texcoord_state);
1344
1345       tokens = st_get_drawpix_shader(state.tokens,
1346                                      st->needs_texcoord_semantic,
1347                                      key->scaleAndBias, scale_const,
1348                                      bias_const, key->pixelMaps,
1349                                      variant->drawpix_sampler,
1350                                      variant->pixelmap_sampler,
1351                                      texcoord_const, st->internal_target);
1352
1353       if (tokens) {
1354          if (state.tokens != stfp->state.tokens)
1355             tgsi_free_tokens(state.tokens);
1356          state.tokens = tokens;
1357       } else
1358          fprintf(stderr, "mesa: cannot create a shader for glDrawPixels\n");
1359    }
1360
1361    if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1362                 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1363       const struct tgsi_token *tokens;
1364
1365       /* samplers inserted would conflict, but this should be unpossible: */
1366       assert(!(key->bitmap || key->drawpixels));
1367
1368       tokens = st_tgsi_lower_yuv(state.tokens,
1369                                  ~stfp->Base.SamplersUsed,
1370                                  key->external.lower_nv12 ||
1371                                     key->external.lower_xy_uxvx ||
1372                                     key->external.lower_yx_xuxv,
1373                                  key->external.lower_iyuv);
1374       if (tokens) {
1375          if (state.tokens != stfp->state.tokens)
1376             tgsi_free_tokens(state.tokens);
1377          state.tokens = tokens;
1378       } else {
1379          fprintf(stderr, "mesa: cannot create a shader for samplerExternalOES\n");
1380       }
1381    }
1382
1383    if (key->lower_depth_clamp) {
1384       unsigned depth_range_const = _mesa_add_state_reference(params, depth_range_state);
1385
1386       const struct tgsi_token *tokens;
1387       tokens = st_tgsi_lower_depth_clamp_fs(state.tokens, depth_range_const);
1388       if (state.tokens != stfp->state.tokens)
1389          tgsi_free_tokens(state.tokens);
1390       state.tokens = tokens;
1391    }
1392
1393    if (ST_DEBUG & DEBUG_PRINT_IR)
1394       tgsi_dump(state.tokens, 0);
1395
1396    /* fill in variant */
1397    variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1398    variant->key = *key;
1399
1400    if (state.tokens != stfp->state.tokens)
1401       tgsi_free_tokens(state.tokens);
1402    return variant;
1403 }
1404
1405 /**
1406  * Translate fragment program if needed.
1407  */
1408 struct st_fp_variant *
1409 st_get_fp_variant(struct st_context *st,
1410                   struct st_program *stfp,
1411                   const struct st_fp_variant_key *key)
1412 {
1413    struct st_fp_variant *fpv;
1414
1415    /* Search for existing variant */
1416    for (fpv = st_fp_variant(stfp->variants); fpv;
1417         fpv = st_fp_variant(fpv->base.next)) {
1418       if (memcmp(&fpv->key, key, sizeof(*key)) == 0) {
1419          break;
1420       }
1421    }
1422
1423    if (!fpv) {
1424       /* create new */
1425       fpv = st_create_fp_variant(st, stfp, key);
1426       if (fpv) {
1427          fpv->base.st = key->st;
1428
1429          if (key->bitmap || key->drawpixels) {
1430             /* Regular variants should always come before the
1431              * bitmap & drawpixels variants, (unless there
1432              * are no regular variants) so that
1433              * st_update_fp can take a fast path when
1434              * shader_has_one_variant is set.
1435              */
1436             if (!stfp->variants) {
1437                stfp->variants = &fpv->base;
1438             } else {
1439                /* insert into list after the first one */
1440                fpv->base.next = stfp->variants->next;
1441                stfp->variants->next = &fpv->base;
1442             }
1443          } else {
1444             /* insert into list */
1445             fpv->base.next = stfp->variants;
1446             stfp->variants = &fpv->base;
1447          }
1448       }
1449    }
1450
1451    return fpv;
1452 }
1453
1454 /**
1455  * Translate a program. This is common code for geometry and tessellation
1456  * shaders.
1457  */
1458 bool
1459 st_translate_common_program(struct st_context *st,
1460                             struct st_program *stp)
1461 {
1462    struct gl_program *prog = &stp->Base;
1463    enum pipe_shader_type stage =
1464       pipe_shader_type_from_mesa(stp->Base.info.stage);
1465    struct ureg_program *ureg = ureg_create_with_screen(stage, st->pipe->screen);
1466
1467    if (ureg == NULL)
1468       return false;
1469
1470    switch (stage) {
1471    case PIPE_SHADER_TESS_CTRL:
1472       ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT,
1473                     stp->Base.info.tess.tcs_vertices_out);
1474       break;
1475
1476    case PIPE_SHADER_TESS_EVAL:
1477       if (stp->Base.info.tess.primitive_mode == GL_ISOLINES)
1478          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, GL_LINES);
1479       else
1480          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE,
1481                        stp->Base.info.tess.primitive_mode);
1482
1483       STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL);
1484       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 ==
1485                     PIPE_TESS_SPACING_FRACTIONAL_ODD);
1486       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 ==
1487                     PIPE_TESS_SPACING_FRACTIONAL_EVEN);
1488
1489       ureg_property(ureg, TGSI_PROPERTY_TES_SPACING,
1490                     (stp->Base.info.tess.spacing + 1) % 3);
1491
1492       ureg_property(ureg, TGSI_PROPERTY_TES_VERTEX_ORDER_CW,
1493                     !stp->Base.info.tess.ccw);
1494       ureg_property(ureg, TGSI_PROPERTY_TES_POINT_MODE,
1495                     stp->Base.info.tess.point_mode);
1496       break;
1497
1498    case PIPE_SHADER_GEOMETRY:
1499       ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM,
1500                     stp->Base.info.gs.input_primitive);
1501       ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM,
1502                     stp->Base.info.gs.output_primitive);
1503       ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
1504                     stp->Base.info.gs.vertices_out);
1505       ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS,
1506                     stp->Base.info.gs.invocations);
1507       break;
1508
1509    default:
1510       break;
1511    }
1512
1513    ubyte inputSlotToAttr[VARYING_SLOT_TESS_MAX];
1514    ubyte inputMapping[VARYING_SLOT_TESS_MAX];
1515    ubyte outputMapping[VARYING_SLOT_TESS_MAX];
1516    GLuint attr;
1517
1518    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
1519    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
1520    uint num_inputs = 0;
1521
1522    ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
1523    ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
1524    uint num_outputs = 0;
1525
1526    GLint i;
1527
1528    memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr));
1529    memset(inputMapping, 0, sizeof(inputMapping));
1530    memset(outputMapping, 0, sizeof(outputMapping));
1531    memset(&stp->state, 0, sizeof(stp->state));
1532
1533    if (prog->info.clip_distance_array_size)
1534       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
1535                     prog->info.clip_distance_array_size);
1536    if (prog->info.cull_distance_array_size)
1537       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
1538                     prog->info.cull_distance_array_size);
1539
1540    /*
1541     * Convert Mesa program inputs to TGSI input register semantics.
1542     */
1543    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1544       if ((prog->info.inputs_read & BITFIELD64_BIT(attr)) == 0)
1545          continue;
1546
1547       unsigned slot = num_inputs++;
1548
1549       inputMapping[attr] = slot;
1550       inputSlotToAttr[slot] = attr;
1551
1552       unsigned semantic_name, semantic_index;
1553       tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1554                                    &semantic_name, &semantic_index);
1555       input_semantic_name[slot] = semantic_name;
1556       input_semantic_index[slot] = semantic_index;
1557    }
1558
1559    /* Also add patch inputs. */
1560    for (attr = 0; attr < 32; attr++) {
1561       if (prog->info.patch_inputs_read & (1u << attr)) {
1562          GLuint slot = num_inputs++;
1563          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1564
1565          inputMapping[patch_attr] = slot;
1566          inputSlotToAttr[slot] = patch_attr;
1567          input_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1568          input_semantic_index[slot] = attr;
1569       }
1570    }
1571
1572    /* initialize output semantics to defaults */
1573    for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
1574       output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
1575       output_semantic_index[i] = 0;
1576    }
1577
1578    /*
1579     * Determine number of outputs, the (default) output register
1580     * mapping and the semantic information for each output.
1581     */
1582    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1583       if (prog->info.outputs_written & BITFIELD64_BIT(attr)) {
1584          GLuint slot = num_outputs++;
1585
1586          outputMapping[attr] = slot;
1587
1588          unsigned semantic_name, semantic_index;
1589          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1590                                       &semantic_name, &semantic_index);
1591          output_semantic_name[slot] = semantic_name;
1592          output_semantic_index[slot] = semantic_index;
1593       }
1594    }
1595
1596    /* Also add patch outputs. */
1597    for (attr = 0; attr < 32; attr++) {
1598       if (prog->info.patch_outputs_written & (1u << attr)) {
1599          GLuint slot = num_outputs++;
1600          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1601
1602          outputMapping[patch_attr] = slot;
1603          output_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1604          output_semantic_index[slot] = attr;
1605       }
1606    }
1607
1608    st_translate_program(st->ctx,
1609                         stage,
1610                         ureg,
1611                         stp->glsl_to_tgsi,
1612                         prog,
1613                         /* inputs */
1614                         num_inputs,
1615                         inputMapping,
1616                         inputSlotToAttr,
1617                         input_semantic_name,
1618                         input_semantic_index,
1619                         NULL,
1620                         /* outputs */
1621                         num_outputs,
1622                         outputMapping,
1623                         output_semantic_name,
1624                         output_semantic_index);
1625
1626    stp->state.tokens = ureg_get_tokens(ureg, NULL);
1627
1628    ureg_destroy(ureg);
1629
1630    st_translate_stream_output_info(prog);
1631
1632    st_store_ir_in_disk_cache(st, prog, false);
1633
1634    if (ST_DEBUG & DEBUG_PRINT_IR && ST_DEBUG & DEBUG_MESA)
1635       _mesa_print_program(prog);
1636
1637    free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
1638    stp->glsl_to_tgsi = NULL;
1639    return true;
1640 }
1641
1642
1643 /**
1644  * Get/create a basic program variant.
1645  */
1646 struct st_variant *
1647 st_get_common_variant(struct st_context *st,
1648                       struct st_program *prog,
1649                       const struct st_common_variant_key *key)
1650 {
1651    struct pipe_context *pipe = st->pipe;
1652    struct st_variant *v;
1653    struct pipe_shader_state state = {0};
1654
1655    /* Search for existing variant */
1656    for (v = prog->variants; v; v = v->next) {
1657       if (memcmp(&st_common_variant(v)->key, key, sizeof(*key)) == 0)
1658          break;
1659    }
1660
1661    if (!v) {
1662       /* create new */
1663       v = (struct st_variant*)CALLOC_STRUCT(st_common_variant);
1664       if (v) {
1665          if (prog->state.type == PIPE_SHADER_IR_NIR) {
1666             bool finalize = false;
1667
1668             state.type = PIPE_SHADER_IR_NIR;
1669             state.ir.nir = nir_shader_clone(NULL, prog->Base.nir);
1670
1671             if (key->clamp_color) {
1672                NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1673                finalize = true;
1674             }
1675
1676             state.stream_output = prog->state.stream_output;
1677
1678             if (finalize || !st->allow_st_finalize_nir_twice) {
1679                st_finalize_nir(st, &prog->Base, prog->shader_program,
1680                                state.ir.nir, true);
1681             }
1682
1683             if (ST_DEBUG & DEBUG_PRINT_IR)
1684                nir_print_shader(state.ir.nir, stderr);
1685          } else {
1686             if (key->lower_depth_clamp) {
1687                struct gl_program_parameter_list *params = prog->Base.Parameters;
1688
1689                unsigned depth_range_const =
1690                      _mesa_add_state_reference(params, depth_range_state);
1691
1692                const struct tgsi_token *tokens;
1693                tokens =
1694                      st_tgsi_lower_depth_clamp(prog->state.tokens,
1695                                                depth_range_const,
1696                                                key->clip_negative_one_to_one);
1697
1698                if (tokens != prog->state.tokens)
1699                   tgsi_free_tokens(prog->state.tokens);
1700
1701                prog->state.tokens = tokens;
1702             }
1703             state = prog->state;
1704
1705             if (ST_DEBUG & DEBUG_PRINT_IR)
1706                tgsi_dump(state.tokens, 0);
1707          }
1708          /* fill in new variant */
1709          switch (prog->Base.info.stage) {
1710          case MESA_SHADER_TESS_CTRL:
1711             v->driver_shader = pipe->create_tcs_state(pipe, &state);
1712             break;
1713          case MESA_SHADER_TESS_EVAL:
1714             v->driver_shader = pipe->create_tes_state(pipe, &state);
1715             break;
1716          case MESA_SHADER_GEOMETRY:
1717             v->driver_shader = pipe->create_gs_state(pipe, &state);
1718             break;
1719          case MESA_SHADER_COMPUTE: {
1720             struct pipe_compute_state cs = {0};
1721             cs.ir_type = state.type;
1722             cs.req_local_mem = prog->Base.info.cs.shared_size;
1723
1724             if (state.type == PIPE_SHADER_IR_NIR)
1725                cs.prog = state.ir.nir;
1726             else
1727                cs.prog = state.tokens;
1728
1729             v->driver_shader = pipe->create_compute_state(pipe, &cs);
1730             break;
1731          }
1732          default:
1733             assert(!"unhandled shader type");
1734             free(v);
1735             return NULL;
1736          }
1737
1738          st_common_variant(v)->key = *key;
1739          v->st = key->st;
1740
1741          /* insert into list */
1742          v->next = prog->variants;
1743          prog->variants = v;
1744       }
1745    }
1746
1747    return v;
1748 }
1749
1750
1751 /**
1752  * Vert/Geom/Frag programs have per-context variants.  Free all the
1753  * variants attached to the given program which match the given context.
1754  */
1755 static void
1756 destroy_program_variants(struct st_context *st, struct gl_program *target)
1757 {
1758    if (!target || target == &_mesa_DummyProgram)
1759       return;
1760
1761    struct st_program *p = st_program(target);
1762    struct st_variant *v, **prevPtr = &p->variants;
1763
1764    for (v = p->variants; v; ) {
1765       struct st_variant *next = v->next;
1766       if (v->st == st) {
1767          /* unlink from list */
1768          *prevPtr = next;
1769          /* destroy this variant */
1770          delete_variant(st, v, target->Target);
1771       }
1772       else {
1773          prevPtr = &v->next;
1774       }
1775       v = next;
1776    }
1777 }
1778
1779
1780 /**
1781  * Callback for _mesa_HashWalk.  Free all the shader's program variants
1782  * which match the given context.
1783  */
1784 static void
1785 destroy_shader_program_variants_cb(GLuint key, void *data, void *userData)
1786 {
1787    struct st_context *st = (struct st_context *) userData;
1788    struct gl_shader *shader = (struct gl_shader *) data;
1789
1790    switch (shader->Type) {
1791    case GL_SHADER_PROGRAM_MESA:
1792       {
1793          struct gl_shader_program *shProg = (struct gl_shader_program *) data;
1794          GLuint i;
1795
1796          for (i = 0; i < ARRAY_SIZE(shProg->_LinkedShaders); i++) {
1797             if (shProg->_LinkedShaders[i])
1798                destroy_program_variants(st, shProg->_LinkedShaders[i]->Program);
1799          }
1800       }
1801       break;
1802    case GL_VERTEX_SHADER:
1803    case GL_FRAGMENT_SHADER:
1804    case GL_GEOMETRY_SHADER:
1805    case GL_TESS_CONTROL_SHADER:
1806    case GL_TESS_EVALUATION_SHADER:
1807    case GL_COMPUTE_SHADER:
1808       break;
1809    default:
1810       assert(0);
1811    }
1812 }
1813
1814
1815 /**
1816  * Callback for _mesa_HashWalk.  Free all the program variants which match
1817  * the given context.
1818  */
1819 static void
1820 destroy_program_variants_cb(GLuint key, void *data, void *userData)
1821 {
1822    struct st_context *st = (struct st_context *) userData;
1823    struct gl_program *program = (struct gl_program *) data;
1824    destroy_program_variants(st, program);
1825 }
1826
1827
1828 /**
1829  * Walk over all shaders and programs to delete any variants which
1830  * belong to the given context.
1831  * This is called during context tear-down.
1832  */
1833 void
1834 st_destroy_program_variants(struct st_context *st)
1835 {
1836    /* If shaders can be shared with other contexts, the last context will
1837     * call DeleteProgram on all shaders, releasing everything.
1838     */
1839    if (st->has_shareable_shaders)
1840       return;
1841
1842    /* ARB vert/frag program */
1843    _mesa_HashWalk(st->ctx->Shared->Programs,
1844                   destroy_program_variants_cb, st);
1845
1846    /* GLSL vert/frag/geom shaders */
1847    _mesa_HashWalk(st->ctx->Shared->ShaderObjects,
1848                   destroy_shader_program_variants_cb, st);
1849 }
1850
1851
1852 /**
1853  * Compile one shader variant.
1854  */
1855 static void
1856 st_precompile_shader_variant(struct st_context *st,
1857                              struct gl_program *prog)
1858 {
1859    switch (prog->Target) {
1860    case GL_VERTEX_PROGRAM_ARB: {
1861       struct st_program *p = (struct st_program *)prog;
1862       struct st_common_variant_key key;
1863
1864       memset(&key, 0, sizeof(key));
1865
1866       key.st = st->has_shareable_shaders ? NULL : st;
1867       st_get_vp_variant(st, p, &key);
1868       break;
1869    }
1870
1871    case GL_FRAGMENT_PROGRAM_ARB: {
1872       struct st_program *p = (struct st_program *)prog;
1873       struct st_fp_variant_key key;
1874
1875       memset(&key, 0, sizeof(key));
1876
1877       key.st = st->has_shareable_shaders ? NULL : st;
1878       st_get_fp_variant(st, p, &key);
1879       break;
1880    }
1881
1882    case GL_TESS_CONTROL_PROGRAM_NV:
1883    case GL_TESS_EVALUATION_PROGRAM_NV:
1884    case GL_GEOMETRY_PROGRAM_NV:
1885    case GL_COMPUTE_PROGRAM_NV: {
1886       struct st_program *p = st_program(prog);
1887       struct st_common_variant_key key;
1888
1889       memset(&key, 0, sizeof(key));
1890
1891       key.st = st->has_shareable_shaders ? NULL : st;
1892       st_get_common_variant(st, p, &key);
1893       break;
1894    }
1895
1896    default:
1897       assert(0);
1898    }
1899 }
1900
1901 void
1902 st_finalize_program(struct st_context *st, struct gl_program *prog)
1903 {
1904    if (st->current_program[prog->info.stage] == prog) {
1905       if (prog->info.stage == MESA_SHADER_VERTEX)
1906          st->dirty |= ST_NEW_VERTEX_PROGRAM(st, (struct st_program *)prog);
1907       else
1908          st->dirty |= ((struct st_program *)prog)->affected_states;
1909    }
1910
1911    if (prog->nir)
1912       nir_sweep(prog->nir);
1913
1914    /* Create Gallium shaders now instead of on demand. */
1915    if (ST_DEBUG & DEBUG_PRECOMPILE ||
1916        st->shader_has_one_variant[prog->info.stage])
1917       st_precompile_shader_variant(st, prog);
1918 }