src/mesa/state_tracker/st_program.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   *   Brian Paul
  31   */
  32
  33
  34 #include "main/errors.h"
  35 #include "main/imports.h"
  36 #include "main/hash.h"
  37 #include "main/mtypes.h"
  38 #include "program/prog_parameter.h"
  39 #include "program/prog_print.h"
  40 #include "program/prog_to_nir.h"
  41 #include "program/programopt.h"
  42
  43 #include "compiler/nir/nir.h"
  44 #include "draw/draw_context.h"
  45
  46 #include "pipe/p_context.h"
  47 #include "pipe/p_defines.h"
  48 #include "pipe/p_shader_tokens.h"
  49 #include "draw/draw_context.h"
  50 #include "tgsi/tgsi_dump.h"
  51 #include "tgsi/tgsi_emulate.h"
  52 #include "tgsi/tgsi_parse.h"
  53 #include "tgsi/tgsi_ureg.h"
  54
  55 #include "st_debug.h"
  56 #include "st_cb_bitmap.h"
  57 #include "st_cb_drawpixels.h"
  58 #include "st_context.h"
  59 #include "st_tgsi_lower_depth_clamp.h"
  60 #include "st_tgsi_lower_yuv.h"
  61 #include "st_program.h"
  62 #include "st_mesa_to_tgsi.h"
  63 #include "st_atifs_to_tgsi.h"
  64 #include "st_nir.h"
  65 #include "st_shader_cache.h"
  66 #include "st_util.h"
  67 #include "cso_cache/cso_context.h"
  68
  69
  70
  71 static void
  72 set_affected_state_flags(uint64_t *states,
  73                          struct gl_program *prog,
  74                          uint64_t new_constants,
  75                          uint64_t new_sampler_views,
  76                          uint64_t new_samplers,
  77                          uint64_t new_images,
  78                          uint64_t new_ubos,
  79                          uint64_t new_ssbos,
  80                          uint64_t new_atomics)
  81 {
  82    if (prog->Parameters->NumParameters)
  83       *states |= new_constants;
  84
  85    if (prog->info.num_textures)
  86       *states |= new_sampler_views | new_samplers;
  87
  88    if (prog->info.num_images)
  89       *states |= new_images;
  90
  91    if (prog->info.num_ubos)
  92       *states |= new_ubos;
  93
  94    if (prog->info.num_ssbos)
  95       *states |= new_ssbos;
  96
  97    if (prog->info.num_abos)
  98       *states |= new_atomics;
  99 }
 100
 101 /**
 102  * This determines which states will be updated when the shader is bound.
 103  */
 104 void
 105 st_set_prog_affected_state_flags(struct gl_program *prog)
 106 {
 107    uint64_t *states;
 108
 109    switch (prog->info.stage) {
 110    case MESA_SHADER_VERTEX:
 111       states = &((struct st_program*)prog)->affected_states;
 112
 113       *states = ST_NEW_VS_STATE |
 114                 ST_NEW_RASTERIZER |
 115                 ST_NEW_VERTEX_ARRAYS;
 116
 117       set_affected_state_flags(states, prog,
 118                                ST_NEW_VS_CONSTANTS,
 119                                ST_NEW_VS_SAMPLER_VIEWS,
 120                                ST_NEW_VS_SAMPLERS,
 121                                ST_NEW_VS_IMAGES,
 122                                ST_NEW_VS_UBOS,
 123                                ST_NEW_VS_SSBOS,
 124                                ST_NEW_VS_ATOMICS);
 125       break;
 126
 127    case MESA_SHADER_TESS_CTRL:
 128       states = &(st_program(prog))->affected_states;
 129
 130       *states = ST_NEW_TCS_STATE;
 131
 132       set_affected_state_flags(states, prog,
 133                                ST_NEW_TCS_CONSTANTS,
 134                                ST_NEW_TCS_SAMPLER_VIEWS,
 135                                ST_NEW_TCS_SAMPLERS,
 136                                ST_NEW_TCS_IMAGES,
 137                                ST_NEW_TCS_UBOS,
 138                                ST_NEW_TCS_SSBOS,
 139                                ST_NEW_TCS_ATOMICS);
 140       break;
 141
 142    case MESA_SHADER_TESS_EVAL:
 143       states = &(st_program(prog))->affected_states;
 144
 145       *states = ST_NEW_TES_STATE |
 146                 ST_NEW_RASTERIZER;
 147
 148       set_affected_state_flags(states, prog,
 149                                ST_NEW_TES_CONSTANTS,
 150                                ST_NEW_TES_SAMPLER_VIEWS,
 151                                ST_NEW_TES_SAMPLERS,
 152                                ST_NEW_TES_IMAGES,
 153                                ST_NEW_TES_UBOS,
 154                                ST_NEW_TES_SSBOS,
 155                                ST_NEW_TES_ATOMICS);
 156       break;
 157
 158    case MESA_SHADER_GEOMETRY:
 159       states = &(st_program(prog))->affected_states;
 160
 161       *states = ST_NEW_GS_STATE |
 162                 ST_NEW_RASTERIZER;
 163
 164       set_affected_state_flags(states, prog,
 165                                ST_NEW_GS_CONSTANTS,
 166                                ST_NEW_GS_SAMPLER_VIEWS,
 167                                ST_NEW_GS_SAMPLERS,
 168                                ST_NEW_GS_IMAGES,
 169                                ST_NEW_GS_UBOS,
 170                                ST_NEW_GS_SSBOS,
 171                                ST_NEW_GS_ATOMICS);
 172       break;
 173
 174    case MESA_SHADER_FRAGMENT:
 175       states = &((struct st_program*)prog)->affected_states;
 176
 177       /* gl_FragCoord and glDrawPixels always use constants. */
 178       *states = ST_NEW_FS_STATE |
 179                 ST_NEW_SAMPLE_SHADING |
 180                 ST_NEW_FS_CONSTANTS;
 181
 182       set_affected_state_flags(states, prog,
 183                                ST_NEW_FS_CONSTANTS,
 184                                ST_NEW_FS_SAMPLER_VIEWS,
 185                                ST_NEW_FS_SAMPLERS,
 186                                ST_NEW_FS_IMAGES,
 187                                ST_NEW_FS_UBOS,
 188                                ST_NEW_FS_SSBOS,
 189                                ST_NEW_FS_ATOMICS);
 190       break;
 191
 192    case MESA_SHADER_COMPUTE:
 193       states = &((struct st_program*)prog)->affected_states;
 194
 195       *states = ST_NEW_CS_STATE;
 196
 197       set_affected_state_flags(states, prog,
 198                                ST_NEW_CS_CONSTANTS,
 199                                ST_NEW_CS_SAMPLER_VIEWS,
 200                                ST_NEW_CS_SAMPLERS,
 201                                ST_NEW_CS_IMAGES,
 202                                ST_NEW_CS_UBOS,
 203                                ST_NEW_CS_SSBOS,
 204                                ST_NEW_CS_ATOMICS);
 205       break;
 206
 207    default:
 208       unreachable("unhandled shader stage");
 209    }
 210 }
 211
 212
 213 /**
 214  * Delete a shader variant.  Note the caller must unlink the variant from
 215  * the linked list.
 216  */
 217 static void
 218 delete_variant(struct st_context *st, struct st_variant *v, GLenum target)
 219 {
 220    if (v->driver_shader) {
 221       if (st->has_shareable_shaders || v->st == st) {
 222          /* The shader's context matches the calling context, or we
 223           * don't care.
 224           */
 225          switch (target) {
 226          case GL_VERTEX_PROGRAM_ARB:
 227             cso_delete_vertex_shader(st->cso_context, v->driver_shader);
 228             break;
 229          case GL_TESS_CONTROL_PROGRAM_NV:
 230             cso_delete_tessctrl_shader(st->cso_context, v->driver_shader);
 231             break;
 232          case GL_TESS_EVALUATION_PROGRAM_NV:
 233             cso_delete_tesseval_shader(st->cso_context, v->driver_shader);
 234             break;
 235          case GL_GEOMETRY_PROGRAM_NV:
 236             cso_delete_geometry_shader(st->cso_context, v->driver_shader);
 237             break;
 238          case GL_FRAGMENT_PROGRAM_ARB:
 239             cso_delete_fragment_shader(st->cso_context, v->driver_shader);
 240             break;
 241          case GL_COMPUTE_PROGRAM_NV:
 242             cso_delete_compute_shader(st->cso_context, v->driver_shader);
 243             break;
 244          default:
 245             unreachable("bad shader type in delete_basic_variant");
 246          }
 247       } else {
 248          /* We can't delete a shader with a context different from the one
 249           * that created it.  Add it to the creating context's zombie list.
 250           */
 251          enum pipe_shader_type type =
 252             pipe_shader_type_from_mesa(_mesa_program_enum_to_shader_stage(target));
 253
 254          st_save_zombie_shader(v->st, type, v->driver_shader);
 255       }
 256    }
 257
 258    free(v);
 259 }
 260
 261
 262 /**
 263  * Free all basic program variants.
 264  */
 265 void
 266 st_release_variants(struct st_context *st, struct st_program *p)
 267 {
 268    struct st_variant *v;
 269
 270    for (v = p->variants; v; ) {
 271       struct st_variant *next = v->next;
 272       delete_variant(st, v, p->Base.Target);
 273       v = next;
 274    }
 275
 276    p->variants = NULL;
 277
 278    if (p->state.tokens) {
 279       ureg_free_tokens(p->state.tokens);
 280       p->state.tokens = NULL;
 281    }
 282
 283    /* Note: Any setup of ->ir.nir that has had pipe->create_*_state called on
 284     * it has resulted in the driver taking ownership of the NIR.  Those
 285     * callers should be NULLing out the nir field in any pipe_shader_state
 286     * that might have this called in order to indicate that.
 287     *
 288     * GLSL IR and ARB programs will have set gl_program->nir to the same
 289     * shader as ir->ir.nir, so it will be freed by _mesa_delete_program().
 290     */
 291 }
 292
 293 void
 294 st_finalize_nir_before_variants(struct nir_shader *nir)
 295 {
 296    NIR_PASS_V(nir, nir_opt_access);
 297
 298    NIR_PASS_V(nir, nir_split_var_copies);
 299    NIR_PASS_V(nir, nir_lower_var_copies);
 300    if (nir->options->lower_all_io_to_temps ||
 301        nir->options->lower_all_io_to_elements ||
 302        nir->info.stage == MESA_SHADER_VERTEX ||
 303        nir->info.stage == MESA_SHADER_GEOMETRY) {
 304       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
 305    } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
 306       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
 307    }
 308
 309    st_nir_assign_vs_in_locations(nir);
 310 }
 311
 312 /**
 313  * Translate ARB (asm) program to NIR
 314  */
 315 static nir_shader *
 316 st_translate_prog_to_nir(struct st_context *st, struct gl_program *prog,
 317                          gl_shader_stage stage)
 318 {
 319    struct pipe_screen *screen = st->pipe->screen;
 320    const struct gl_shader_compiler_options *options =
 321       &st->ctx->Const.ShaderCompilerOptions[stage];
 322
 323    /* Translate to NIR */
 324    nir_shader *nir = prog_to_nir(prog, options->NirOptions);
 325    NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
 326    nir_validate_shader(nir, "after st/ptn lower_regs_to_ssa");
 327
 328    NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, prog, screen);
 329    NIR_PASS_V(nir, nir_lower_system_values);
 330
 331    /* Optimise NIR */
 332    NIR_PASS_V(nir, nir_opt_constant_folding);
 333    st_nir_opts(nir);
 334    st_finalize_nir_before_variants(nir);
 335
 336    if (st->allow_st_finalize_nir_twice)
 337       st_finalize_nir(st, prog, NULL, nir, true);
 338
 339    nir_validate_shader(nir, "after st/glsl finalize_nir");
 340
 341    return nir;
 342 }
 343
 344 void
 345 st_prepare_vertex_program(struct st_program *stp)
 346 {
 347    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 348
 349    stvp->num_inputs = 0;
 350    memset(stvp->input_to_index, ~0, sizeof(stvp->input_to_index));
 351    memset(stvp->result_to_output, ~0, sizeof(stvp->result_to_output));
 352
 353    /* Determine number of inputs, the mappings between VERT_ATTRIB_x
 354     * and TGSI generic input indexes, plus input attrib semantic info.
 355     */
 356    for (unsigned attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
 357       if ((stp->Base.info.inputs_read & BITFIELD64_BIT(attr)) != 0) {
 358          stvp->input_to_index[attr] = stvp->num_inputs;
 359          stvp->index_to_input[stvp->num_inputs] = attr;
 360          stvp->num_inputs++;
 361
 362          if ((stp->Base.DualSlotInputs & BITFIELD64_BIT(attr)) != 0) {
 363             /* add placeholder for second part of a double attribute */
 364             stvp->index_to_input[stvp->num_inputs] = ST_DOUBLE_ATTRIB_PLACEHOLDER;
 365             stvp->num_inputs++;
 366          }
 367       }
 368    }
 369    /* pre-setup potentially unused edgeflag input */
 370    stvp->input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
 371    stvp->index_to_input[stvp->num_inputs] = VERT_ATTRIB_EDGEFLAG;
 372
 373    /* Compute mapping of vertex program outputs to slots. */
 374    unsigned num_outputs = 0;
 375    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 376       if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr))
 377          stvp->result_to_output[attr] = num_outputs++;
 378    }
 379    /* pre-setup potentially unused edgeflag output */
 380    stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs;
 381 }
 382
 383 void
 384 st_translate_stream_output_info(struct gl_program *prog)
 385 {
 386    struct gl_transform_feedback_info *info = prog->sh.LinkedTransformFeedback;
 387    if (!info)
 388       return;
 389
 390    /* Determine the (default) output register mapping for each output. */
 391    unsigned num_outputs = 0;
 392    ubyte output_mapping[VARYING_SLOT_TESS_MAX];
 393    memset(output_mapping, 0, sizeof(output_mapping));
 394
 395    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 396       if (prog->info.outputs_written & BITFIELD64_BIT(attr))
 397          output_mapping[attr] = num_outputs++;
 398    }
 399
 400    /* Translate stream output info. */
 401    struct pipe_stream_output_info *so_info =
 402       &((struct st_program*)prog)->state.stream_output;
 403
 404    for (unsigned i = 0; i < info->NumOutputs; i++) {
 405       so_info->output[i].register_index =
 406          output_mapping[info->Outputs[i].OutputRegister];
 407       so_info->output[i].start_component = info->Outputs[i].ComponentOffset;
 408       so_info->output[i].num_components = info->Outputs[i].NumComponents;
 409       so_info->output[i].output_buffer = info->Outputs[i].OutputBuffer;
 410       so_info->output[i].dst_offset = info->Outputs[i].DstOffset;
 411       so_info->output[i].stream = info->Outputs[i].StreamId;
 412    }
 413
 414    for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
 415       so_info->stride[i] = info->Buffers[i].Stride;
 416    }
 417    so_info->num_outputs = info->NumOutputs;
 418 }
 419
 420 /**
 421  * Translate a vertex program.
 422  */
 423 bool
 424 st_translate_vertex_program(struct st_context *st,
 425                             struct st_program *stp)
 426 {
 427    struct ureg_program *ureg;
 428    enum pipe_error error;
 429    unsigned num_outputs = 0;
 430    unsigned attr;
 431    ubyte output_semantic_name[VARYING_SLOT_MAX] = {0};
 432    ubyte output_semantic_index[VARYING_SLOT_MAX] = {0};
 433
 434    if (stp->Base.arb.IsPositionInvariant)
 435       _mesa_insert_mvp_code(st->ctx, &stp->Base);
 436
 437    st_prepare_vertex_program(stp);
 438
 439    /* ARB_vp: */
 440    if (!stp->glsl_to_tgsi) {
 441       _mesa_remove_output_reads(&stp->Base, PROGRAM_OUTPUT);
 442
 443       /* This determines which states will be updated when the assembly
 444        * shader is bound.
 445        */
 446       stp->affected_states = ST_NEW_VS_STATE |
 447                               ST_NEW_RASTERIZER |
 448                               ST_NEW_VERTEX_ARRAYS;
 449
 450       if (stp->Base.Parameters->NumParameters)
 451          stp->affected_states |= ST_NEW_VS_CONSTANTS;
 452
 453       /* Translate to NIR if preferred. */
 454       if (st->pipe->screen->get_shader_param(st->pipe->screen,
 455                                              PIPE_SHADER_VERTEX,
 456                                              PIPE_SHADER_CAP_PREFERRED_IR)) {
 457          assert(!stp->glsl_to_tgsi);
 458
 459          if (stp->Base.nir)
 460             ralloc_free(stp->Base.nir);
 461
 462          stp->state.type = PIPE_SHADER_IR_NIR;
 463          stp->Base.nir = st_translate_prog_to_nir(st, &stp->Base,
 464                                                   MESA_SHADER_VERTEX);
 465          /* For st_draw_feedback, we need to generate TGSI too if draw doesn't
 466           * use LLVM.
 467           */
 468          if (draw_has_llvm())
 469             return true;
 470       }
 471    }
 472
 473    /* Get semantic names and indices. */
 474    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 475       if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr)) {
 476          unsigned slot = num_outputs++;
 477          unsigned semantic_name, semantic_index;
 478          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
 479                                       &semantic_name, &semantic_index);
 480          output_semantic_name[slot] = semantic_name;
 481          output_semantic_index[slot] = semantic_index;
 482       }
 483    }
 484    /* pre-setup potentially unused edgeflag output */
 485    output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
 486    output_semantic_index[num_outputs] = 0;
 487
 488    ureg = ureg_create_with_screen(PIPE_SHADER_VERTEX, st->pipe->screen);
 489    if (ureg == NULL)
 490       return false;
 491
 492    if (stp->Base.info.clip_distance_array_size)
 493       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
 494                     stp->Base.info.clip_distance_array_size);
 495    if (stp->Base.info.cull_distance_array_size)
 496       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
 497                     stp->Base.info.cull_distance_array_size);
 498
 499    if (ST_DEBUG & DEBUG_MESA) {
 500       _mesa_print_program(&stp->Base);
 501       _mesa_print_program_parameters(st->ctx, &stp->Base);
 502       debug_printf("\n");
 503    }
 504
 505    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 506
 507    if (stp->glsl_to_tgsi) {
 508       error = st_translate_program(st->ctx,
 509                                    PIPE_SHADER_VERTEX,
 510                                    ureg,
 511                                    stp->glsl_to_tgsi,
 512                                    &stp->Base,
 513                                    /* inputs */
 514                                    stvp->num_inputs,
 515                                    stvp->input_to_index,
 516                                    NULL, /* inputSlotToAttr */
 517                                    NULL, /* input semantic name */
 518                                    NULL, /* input semantic index */
 519                                    NULL, /* interp mode */
 520                                    /* outputs */
 521                                    num_outputs,
 522                                    stvp->result_to_output,
 523                                    output_semantic_name,
 524                                    output_semantic_index);
 525
 526       st_translate_stream_output_info(&stp->Base);
 527
 528       free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
 529    } else
 530       error = st_translate_mesa_program(st->ctx,
 531                                         PIPE_SHADER_VERTEX,
 532                                         ureg,
 533                                         &stp->Base,
 534                                         /* inputs */
 535                                         stvp->num_inputs,
 536                                         stvp->input_to_index,
 537                                         NULL, /* input semantic name */
 538                                         NULL, /* input semantic index */
 539                                         NULL,
 540                                         /* outputs */
 541                                         num_outputs,
 542                                         stvp->result_to_output,
 543                                         output_semantic_name,
 544                                         output_semantic_index);
 545
 546    if (error) {
 547       debug_printf("%s: failed to translate Mesa program:\n", __func__);
 548       _mesa_print_program(&stp->Base);
 549       debug_assert(0);
 550       return false;
 551    }
 552
 553    stp->state.tokens = ureg_get_tokens(ureg, NULL);
 554    ureg_destroy(ureg);
 555
 556    if (stp->glsl_to_tgsi) {
 557       stp->glsl_to_tgsi = NULL;
 558       st_store_ir_in_disk_cache(st, &stp->Base, false);
 559    }
 560
 561    return stp->state.tokens != NULL;
 562 }
 563
 564 static const gl_state_index16 depth_range_state[STATE_LENGTH] =
 565    { STATE_DEPTH_RANGE };
 566
 567 static struct st_common_variant *
 568 st_create_vp_variant(struct st_context *st,
 569                      struct st_program *stvp,
 570                      const struct st_common_variant_key *key)
 571 {
 572    struct st_common_variant *vpv = CALLOC_STRUCT(st_common_variant);
 573    struct pipe_context *pipe = st->pipe;
 574    struct pipe_screen *screen = pipe->screen;
 575    struct pipe_shader_state state = {0};
 576
 577    static const gl_state_index16 point_size_state[STATE_LENGTH] =
 578       { STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED, 0 };
 579    struct gl_program_parameter_list *params = stvp->Base.Parameters;
 580
 581    vpv->key = *key;
 582
 583    state.stream_output = stvp->state.stream_output;
 584
 585    if (stvp->state.type == PIPE_SHADER_IR_NIR &&
 586        (!key->is_draw_shader || draw_has_llvm())) {
 587       bool finalize = false;
 588
 589       state.type = PIPE_SHADER_IR_NIR;
 590       state.ir.nir = nir_shader_clone(NULL, stvp->Base.nir);
 591       if (key->clamp_color) {
 592          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
 593          finalize = true;
 594       }
 595       if (key->passthrough_edgeflags) {
 596          NIR_PASS_V(state.ir.nir, nir_lower_passthrough_edgeflags);
 597          finalize = true;
 598       }
 599
 600       if (key->lower_point_size) {
 601          _mesa_add_state_reference(params, point_size_state);
 602          NIR_PASS_V(state.ir.nir, nir_lower_point_size_mov,
 603                     point_size_state);
 604          finalize = true;
 605       }
 606
 607       if (key->lower_ucp) {
 608          bool can_compact = screen->get_param(screen,
 609                                               PIPE_CAP_NIR_COMPACT_ARRAYS);
 610
 611          bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;
 612          gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH];
 613          for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
 614             if (use_eye) {
 615                clipplane_state[i][0] = STATE_CLIPPLANE;
 616                clipplane_state[i][1] = i;
 617             } else {
 618                clipplane_state[i][0] = STATE_INTERNAL;
 619                clipplane_state[i][1] = STATE_CLIP_INTERNAL;
 620                clipplane_state[i][2] = i;
 621             }
 622             _mesa_add_state_reference(params, clipplane_state[i]);
 623          }
 624
 625          NIR_PASS_V(state.ir.nir, nir_lower_clip_vs, key->lower_ucp,
 626                     true, can_compact, clipplane_state);
 627          NIR_PASS_V(state.ir.nir, nir_lower_io_to_temporaries,
 628                     nir_shader_get_entrypoint(state.ir.nir), true, false);
 629          NIR_PASS_V(state.ir.nir, nir_lower_global_vars_to_local);
 630          finalize = true;
 631       }
 632
 633       if (finalize || !st->allow_st_finalize_nir_twice) {
 634          st_finalize_nir(st, &stvp->Base, stvp->shader_program, state.ir.nir,
 635                          true);
 636
 637          /* Some of the lowering above may have introduced new varyings */
 638          nir_shader_gather_info(state.ir.nir,
 639                                 nir_shader_get_entrypoint(state.ir.nir));
 640       }
 641
 642       if (ST_DEBUG & DEBUG_PRINT_IR)
 643          nir_print_shader(state.ir.nir, stderr);
 644
 645       if (key->is_draw_shader)
 646          vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
 647       else
 648          vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
 649
 650       return vpv;
 651    }
 652
 653    state.type = PIPE_SHADER_IR_TGSI;
 654    state.tokens = tgsi_dup_tokens(stvp->state.tokens);
 655
 656    /* Emulate features. */
 657    if (key->clamp_color || key->passthrough_edgeflags) {
 658       const struct tgsi_token *tokens;
 659       unsigned flags =
 660          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
 661          (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
 662
 663       tokens = tgsi_emulate(state.tokens, flags);
 664
 665       if (tokens) {
 666          tgsi_free_tokens(state.tokens);
 667          state.tokens = tokens;
 668       } else {
 669          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
 670       }
 671    }
 672
 673    if (key->lower_depth_clamp) {
 674       unsigned depth_range_const =
 675             _mesa_add_state_reference(params, depth_range_state);
 676
 677       const struct tgsi_token *tokens;
 678       tokens = st_tgsi_lower_depth_clamp(state.tokens, depth_range_const,
 679                                          key->clip_negative_one_to_one);
 680       if (tokens != state.tokens)
 681          tgsi_free_tokens(state.tokens);
 682       state.tokens = tokens;
 683    }
 684
 685    if (ST_DEBUG & DEBUG_PRINT_IR)
 686       tgsi_dump(state.tokens, 0);
 687
 688    if (key->is_draw_shader)
 689       vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
 690    else
 691       vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
 692
 693    return vpv;
 694 }
 695
 696
 697 /**
 698  * Find/create a vertex program variant.
 699  */
 700 struct st_common_variant *
 701 st_get_vp_variant(struct st_context *st,
 702                   struct st_program *stp,
 703                   const struct st_common_variant_key *key)
 704 {
 705    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 706    struct st_common_variant *vpv;
 707
 708    /* Search for existing variant */
 709    for (vpv = st_common_variant(stp->variants); vpv;
 710         vpv = st_common_variant(vpv->base.next)) {
 711       if (memcmp(&vpv->key, key, sizeof(*key)) == 0) {
 712          break;
 713       }
 714    }
 715
 716    if (!vpv) {
 717       /* create now */
 718       vpv = st_create_vp_variant(st, stp, key);
 719       if (vpv) {
 720          vpv->base.st = key->st;
 721
 722          unsigned num_inputs = stvp->num_inputs + key->passthrough_edgeflags;
 723          for (unsigned index = 0; index < num_inputs; ++index) {
 724             unsigned attr = stvp->index_to_input[index];
 725             if (attr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
 726                continue;
 727             vpv->vert_attrib_mask |= 1u << attr;
 728          }
 729
 730          /* insert into list */
 731          vpv->base.next = stp->variants;
 732          stp->variants = &vpv->base;
 733       }
 734    }
 735
 736    return vpv;
 737 }
 738
 739
 740 /**
 741  * Translate a Mesa fragment shader into a TGSI shader.
 742  */
 743 bool
 744 st_translate_fragment_program(struct st_context *st,
 745                               struct st_program *stfp)
 746 {
 747    /* Non-GLSL programs: */
 748    if (!stfp->glsl_to_tgsi) {
 749       _mesa_remove_output_reads(&stfp->Base, PROGRAM_OUTPUT);
 750       if (st->ctx->Const.GLSLFragCoordIsSysVal)
 751          _mesa_program_fragment_position_to_sysval(&stfp->Base);
 752
 753       /* This determines which states will be updated when the assembly
 754        * shader is bound.
 755        *
 756        * fragment.position and glDrawPixels always use constants.
 757        */
 758       stfp->affected_states = ST_NEW_FS_STATE |
 759                               ST_NEW_SAMPLE_SHADING |
 760                               ST_NEW_FS_CONSTANTS;
 761
 762       if (stfp->ati_fs) {
 763          /* Just set them for ATI_fs unconditionally. */
 764          stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 765                                   ST_NEW_FS_SAMPLERS;
 766       } else {
 767          /* ARB_fp */
 768          if (stfp->Base.SamplersUsed)
 769             stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 770                                      ST_NEW_FS_SAMPLERS;
 771       }
 772
 773       /* Translate to NIR. */
 774       if (!stfp->ati_fs &&
 775           st->pipe->screen->get_shader_param(st->pipe->screen,
 776                                              PIPE_SHADER_FRAGMENT,
 777                                              PIPE_SHADER_CAP_PREFERRED_IR)) {
 778          nir_shader *nir =
 779             st_translate_prog_to_nir(st, &stfp->Base, MESA_SHADER_FRAGMENT);
 780
 781          if (stfp->Base.nir)
 782             ralloc_free(stfp->Base.nir);
 783          stfp->state.type = PIPE_SHADER_IR_NIR;
 784          stfp->Base.nir = nir;
 785          return true;
 786       }
 787    }
 788
 789    ubyte outputMapping[2 * FRAG_RESULT_MAX];
 790    ubyte inputMapping[VARYING_SLOT_MAX];
 791    ubyte inputSlotToAttr[VARYING_SLOT_MAX];
 792    ubyte interpMode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
 793    GLuint attr;
 794    GLbitfield64 inputsRead;
 795    struct ureg_program *ureg;
 796
 797    GLboolean write_all = GL_FALSE;
 798
 799    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
 800    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
 801    uint fs_num_inputs = 0;
 802
 803    ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
 804    ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
 805    uint fs_num_outputs = 0;
 806
 807    memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
 808
 809    /*
 810     * Convert Mesa program inputs to TGSI input register semantics.
 811     */
 812    inputsRead = stfp->Base.info.inputs_read;
 813    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 814       if ((inputsRead & BITFIELD64_BIT(attr)) != 0) {
 815          const GLuint slot = fs_num_inputs++;
 816
 817          inputMapping[attr] = slot;
 818          inputSlotToAttr[slot] = attr;
 819
 820          switch (attr) {
 821          case VARYING_SLOT_POS:
 822             input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
 823             input_semantic_index[slot] = 0;
 824             interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 825             break;
 826          case VARYING_SLOT_COL0:
 827             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 828             input_semantic_index[slot] = 0;
 829             interpMode[slot] = stfp->glsl_to_tgsi ?
 830                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 831             break;
 832          case VARYING_SLOT_COL1:
 833             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 834             input_semantic_index[slot] = 1;
 835             interpMode[slot] = stfp->glsl_to_tgsi ?
 836                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 837             break;
 838          case VARYING_SLOT_FOGC:
 839             input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
 840             input_semantic_index[slot] = 0;
 841             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 842             break;
 843          case VARYING_SLOT_FACE:
 844             input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
 845             input_semantic_index[slot] = 0;
 846             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 847             break;
 848          case VARYING_SLOT_PRIMITIVE_ID:
 849             input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
 850             input_semantic_index[slot] = 0;
 851             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 852             break;
 853          case VARYING_SLOT_LAYER:
 854             input_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
 855             input_semantic_index[slot] = 0;
 856             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 857             break;
 858          case VARYING_SLOT_VIEWPORT:
 859             input_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
 860             input_semantic_index[slot] = 0;
 861             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 862             break;
 863          case VARYING_SLOT_CLIP_DIST0:
 864             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
 865             input_semantic_index[slot] = 0;
 866             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 867             break;
 868          case VARYING_SLOT_CLIP_DIST1:
 869             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
 870             input_semantic_index[slot] = 1;
 871             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 872             break;
 873          case VARYING_SLOT_CULL_DIST0:
 874          case VARYING_SLOT_CULL_DIST1:
 875             /* these should have been lowered by GLSL */
 876             assert(0);
 877             break;
 878             /* In most cases, there is nothing special about these
 879              * inputs, so adopt a convention to use the generic
 880              * semantic name and the mesa VARYING_SLOT_ number as the
 881              * index.
 882              *
 883              * All that is required is that the vertex shader labels
 884              * its own outputs similarly, and that the vertex shader
 885              * generates at least every output required by the
 886              * fragment shader plus fixed-function hardware (such as
 887              * BFC).
 888              *
 889              * However, some drivers may need us to identify the PNTC and TEXi
 890              * varyings if, for example, their capability to replace them with
 891              * sprite coordinates is limited.
 892              */
 893          case VARYING_SLOT_PNTC:
 894             if (st->needs_texcoord_semantic) {
 895                input_semantic_name[slot] = TGSI_SEMANTIC_PCOORD;
 896                input_semantic_index[slot] = 0;
 897                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 898                break;
 899             }
 900             /* fall through */
 901          case VARYING_SLOT_TEX0:
 902          case VARYING_SLOT_TEX1:
 903          case VARYING_SLOT_TEX2:
 904          case VARYING_SLOT_TEX3:
 905          case VARYING_SLOT_TEX4:
 906          case VARYING_SLOT_TEX5:
 907          case VARYING_SLOT_TEX6:
 908          case VARYING_SLOT_TEX7:
 909             if (st->needs_texcoord_semantic) {
 910                input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
 911                input_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
 912                interpMode[slot] = stfp->glsl_to_tgsi ?
 913                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
 914                break;
 915             }
 916             /* fall through */
 917          case VARYING_SLOT_VAR0:
 918          default:
 919             /* Semantic indices should be zero-based because drivers may choose
 920              * to assign a fixed slot determined by that index.
 921              * This is useful because ARB_separate_shader_objects uses location
 922              * qualifiers for linkage, and if the semantic index corresponds to
 923              * these locations, linkage passes in the driver become unecessary.
 924              *
 925              * If needs_texcoord_semantic is true, no semantic indices will be
 926              * consumed for the TEXi varyings, and we can base the locations of
 927              * the user varyings on VAR0.  Otherwise, we use TEX0 as base index.
 928              */
 929             assert(attr >= VARYING_SLOT_VAR0 || attr == VARYING_SLOT_PNTC ||
 930                    (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
 931             input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
 932             input_semantic_index[slot] = st_get_generic_varying_index(st, attr);
 933             if (attr == VARYING_SLOT_PNTC)
 934                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 935             else {
 936                interpMode[slot] = stfp->glsl_to_tgsi ?
 937                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
 938             }
 939             break;
 940          }
 941       }
 942       else {
 943          inputMapping[attr] = -1;
 944       }
 945    }
 946
 947    /*
 948     * Semantics and mapping for outputs
 949     */
 950    GLbitfield64 outputsWritten = stfp->Base.info.outputs_written;
 951
 952    /* if z is written, emit that first */
 953    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
 954       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
 955       fs_output_semantic_index[fs_num_outputs] = 0;
 956       outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
 957       fs_num_outputs++;
 958       outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
 959    }
 960
 961    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
 962       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
 963       fs_output_semantic_index[fs_num_outputs] = 0;
 964       outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
 965       fs_num_outputs++;
 966       outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
 967    }
 968
 969    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
 970       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_SAMPLEMASK;
 971       fs_output_semantic_index[fs_num_outputs] = 0;
 972       outputMapping[FRAG_RESULT_SAMPLE_MASK] = fs_num_outputs;
 973       fs_num_outputs++;
 974       outputsWritten &= ~(1 << FRAG_RESULT_SAMPLE_MASK);
 975    }
 976
 977    /* handle remaining outputs (color) */
 978    for (attr = 0; attr < ARRAY_SIZE(outputMapping); attr++) {
 979       const GLbitfield64 written = attr < FRAG_RESULT_MAX ? outputsWritten :
 980          stfp->Base.SecondaryOutputsWritten;
 981       const unsigned loc = attr % FRAG_RESULT_MAX;
 982
 983       if (written & BITFIELD64_BIT(loc)) {
 984          switch (loc) {
 985          case FRAG_RESULT_DEPTH:
 986          case FRAG_RESULT_STENCIL:
 987          case FRAG_RESULT_SAMPLE_MASK:
 988             /* handled above */
 989             assert(0);
 990             break;
 991          case FRAG_RESULT_COLOR:
 992             write_all = GL_TRUE; /* fallthrough */
 993          default: {
 994             int index;
 995             assert(loc == FRAG_RESULT_COLOR ||
 996                    (FRAG_RESULT_DATA0 <= loc && loc < FRAG_RESULT_MAX));
 997
 998             index = (loc == FRAG_RESULT_COLOR) ? 0 : (loc - FRAG_RESULT_DATA0);
 999
1000             if (attr >= FRAG_RESULT_MAX) {
1001                /* Secondary color for dual source blending. */
1002                assert(index == 0);
1003                index++;
1004             }
1005
1006             fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
1007             fs_output_semantic_index[fs_num_outputs] = index;
1008             outputMapping[attr] = fs_num_outputs;
1009             break;
1010          }
1011          }
1012
1013          fs_num_outputs++;
1014       }
1015    }
1016
1017    ureg = ureg_create_with_screen(PIPE_SHADER_FRAGMENT, st->pipe->screen);
1018    if (ureg == NULL)
1019       return false;
1020
1021    if (ST_DEBUG & DEBUG_MESA) {
1022       _mesa_print_program(&stfp->Base);
1023       _mesa_print_program_parameters(st->ctx, &stfp->Base);
1024       debug_printf("\n");
1025    }
1026    if (write_all == GL_TRUE)
1027       ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1);
1028
1029    if (stfp->Base.info.fs.depth_layout != FRAG_DEPTH_LAYOUT_NONE) {
1030       switch (stfp->Base.info.fs.depth_layout) {
1031       case FRAG_DEPTH_LAYOUT_ANY:
1032          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1033                        TGSI_FS_DEPTH_LAYOUT_ANY);
1034          break;
1035       case FRAG_DEPTH_LAYOUT_GREATER:
1036          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1037                        TGSI_FS_DEPTH_LAYOUT_GREATER);
1038          break;
1039       case FRAG_DEPTH_LAYOUT_LESS:
1040          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1041                        TGSI_FS_DEPTH_LAYOUT_LESS);
1042          break;
1043       case FRAG_DEPTH_LAYOUT_UNCHANGED:
1044          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1045                        TGSI_FS_DEPTH_LAYOUT_UNCHANGED);
1046          break;
1047       default:
1048          assert(0);
1049       }
1050    }
1051
1052    if (stfp->glsl_to_tgsi) {
1053       st_translate_program(st->ctx,
1054                            PIPE_SHADER_FRAGMENT,
1055                            ureg,
1056                            stfp->glsl_to_tgsi,
1057                            &stfp->Base,
1058                            /* inputs */
1059                            fs_num_inputs,
1060                            inputMapping,
1061                            inputSlotToAttr,
1062                            input_semantic_name,
1063                            input_semantic_index,
1064                            interpMode,
1065                            /* outputs */
1066                            fs_num_outputs,
1067                            outputMapping,
1068                            fs_output_semantic_name,
1069                            fs_output_semantic_index);
1070
1071       free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
1072    } else if (stfp->ati_fs)
1073       st_translate_atifs_program(ureg,
1074                                  stfp->ati_fs,
1075                                  &stfp->Base,
1076                                  /* inputs */
1077                                  fs_num_inputs,
1078                                  inputMapping,
1079                                  input_semantic_name,
1080                                  input_semantic_index,
1081                                  interpMode,
1082                                  /* outputs */
1083                                  fs_num_outputs,
1084                                  outputMapping,
1085                                  fs_output_semantic_name,
1086                                  fs_output_semantic_index);
1087    else
1088       st_translate_mesa_program(st->ctx,
1089                                 PIPE_SHADER_FRAGMENT,
1090                                 ureg,
1091                                 &stfp->Base,
1092                                 /* inputs */
1093                                 fs_num_inputs,
1094                                 inputMapping,
1095                                 input_semantic_name,
1096                                 input_semantic_index,
1097                                 interpMode,
1098                                 /* outputs */
1099                                 fs_num_outputs,
1100                                 outputMapping,
1101                                 fs_output_semantic_name,
1102                                 fs_output_semantic_index);
1103
1104    stfp->state.tokens = ureg_get_tokens(ureg, NULL);
1105    ureg_destroy(ureg);
1106
1107    if (stfp->glsl_to_tgsi) {
1108       stfp->glsl_to_tgsi = NULL;
1109       st_store_ir_in_disk_cache(st, &stfp->Base, false);
1110    }
1111
1112    return stfp->state.tokens != NULL;
1113 }
1114
1115 static struct st_fp_variant *
1116 st_create_fp_variant(struct st_context *st,
1117                      struct st_program *stfp,
1118                      const struct st_fp_variant_key *key)
1119 {
1120    struct pipe_context *pipe = st->pipe;
1121    struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
1122    struct pipe_shader_state state = {0};
1123    struct gl_program_parameter_list *params = stfp->Base.Parameters;
1124    static const gl_state_index16 texcoord_state[STATE_LENGTH] =
1125       { STATE_INTERNAL, STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
1126    static const gl_state_index16 scale_state[STATE_LENGTH] =
1127       { STATE_INTERNAL, STATE_PT_SCALE };
1128    static const gl_state_index16 bias_state[STATE_LENGTH] =
1129       { STATE_INTERNAL, STATE_PT_BIAS };
1130    static const gl_state_index16 alpha_ref_state[STATE_LENGTH] =
1131       { STATE_INTERNAL, STATE_ALPHA_REF };
1132
1133    if (!variant)
1134       return NULL;
1135
1136    if (stfp->state.type == PIPE_SHADER_IR_NIR) {
1137       bool finalize = false;
1138
1139       state.type = PIPE_SHADER_IR_NIR;
1140       state.ir.nir = nir_shader_clone(NULL, stfp->Base.nir);
1141
1142       if (key->clamp_color) {
1143          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1144          finalize = true;
1145       }
1146
1147       if (key->lower_flatshade) {
1148          NIR_PASS_V(state.ir.nir, nir_lower_flatshade);
1149          finalize = true;
1150       }
1151
1152       if (key->lower_alpha_func != COMPARE_FUNC_NEVER) {
1153          _mesa_add_state_reference(params, alpha_ref_state);
1154          NIR_PASS_V(state.ir.nir, nir_lower_alpha_test, key->lower_alpha_func,
1155                     false, alpha_ref_state);
1156          finalize = true;
1157       }
1158
1159       if (key->lower_two_sided_color) {
1160          NIR_PASS_V(state.ir.nir, nir_lower_two_sided_color);
1161          finalize = true;
1162       }
1163
1164       if (key->persample_shading) {
1165           nir_shader *shader = state.ir.nir;
1166           nir_foreach_variable(var, &shader->inputs)
1167              var->data.sample = true;
1168           finalize = true;
1169       }
1170
1171       assert(!(key->bitmap && key->drawpixels));
1172
1173       /* glBitmap */
1174       if (key->bitmap) {
1175          nir_lower_bitmap_options options = {0};
1176
1177          variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1178          options.sampler = variant->bitmap_sampler;
1179          options.swizzle_xxxx = st->bitmap.tex_format == PIPE_FORMAT_R8_UNORM;
1180
1181          NIR_PASS_V(state.ir.nir, nir_lower_bitmap, &options);
1182          finalize = true;
1183       }
1184
1185       /* glDrawPixels (color only) */
1186       if (key->drawpixels) {
1187          nir_lower_drawpixels_options options = {{0}};
1188          unsigned samplers_used = stfp->Base.SamplersUsed;
1189
1190          /* Find the first unused slot. */
1191          variant->drawpix_sampler = ffs(~samplers_used) - 1;
1192          options.drawpix_sampler = variant->drawpix_sampler;
1193          samplers_used |= (1 << variant->drawpix_sampler);
1194
1195          options.pixel_maps = key->pixelMaps;
1196          if (key->pixelMaps) {
1197             variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1198             options.pixelmap_sampler = variant->pixelmap_sampler;
1199          }
1200
1201          options.scale_and_bias = key->scaleAndBias;
1202          if (key->scaleAndBias) {
1203             _mesa_add_state_reference(params, scale_state);
1204             memcpy(options.scale_state_tokens, scale_state,
1205                    sizeof(options.scale_state_tokens));
1206             _mesa_add_state_reference(params, bias_state);
1207             memcpy(options.bias_state_tokens, bias_state,
1208                    sizeof(options.bias_state_tokens));
1209          }
1210
1211          _mesa_add_state_reference(params, texcoord_state);
1212          memcpy(options.texcoord_state_tokens, texcoord_state,
1213                 sizeof(options.texcoord_state_tokens));
1214
1215          NIR_PASS_V(state.ir.nir, nir_lower_drawpixels, &options);
1216          finalize = true;
1217       }
1218
1219       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1220                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1221                    key->external.lower_ayuv || key->external.lower_xyuv)) {
1222          nir_lower_tex_options options = {0};
1223          options.lower_y_uv_external = key->external.lower_nv12;
1224          options.lower_y_u_v_external = key->external.lower_iyuv;
1225          options.lower_xy_uxvx_external = key->external.lower_xy_uxvx;
1226          options.lower_yx_xuxv_external = key->external.lower_yx_xuxv;
1227          options.lower_ayuv_external = key->external.lower_ayuv;
1228          options.lower_xyuv_external = key->external.lower_xyuv;
1229          NIR_PASS_V(state.ir.nir, nir_lower_tex, &options);
1230          finalize = true;
1231       }
1232
1233       if (finalize || !st->allow_st_finalize_nir_twice) {
1234          st_finalize_nir(st, &stfp->Base, stfp->shader_program, state.ir.nir,
1235                          false);
1236       }
1237
1238       /* This pass needs to happen *after* nir_lower_sampler */
1239       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1240                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1241          NIR_PASS_V(state.ir.nir, st_nir_lower_tex_src_plane,
1242                     ~stfp->Base.SamplersUsed,
1243                     key->external.lower_nv12 || key->external.lower_xy_uxvx ||
1244                        key->external.lower_yx_xuxv,
1245                     key->external.lower_iyuv);
1246          finalize = true;
1247       }
1248
1249       if (finalize || !st->allow_st_finalize_nir_twice) {
1250          /* Some of the lowering above may have introduced new varyings */
1251          nir_shader_gather_info(state.ir.nir,
1252                                 nir_shader_get_entrypoint(state.ir.nir));
1253
1254          struct pipe_screen *screen = pipe->screen;
1255          if (screen->finalize_nir)
1256             screen->finalize_nir(screen, state.ir.nir, false);
1257       }
1258
1259       if (ST_DEBUG & DEBUG_PRINT_IR)
1260          nir_print_shader(state.ir.nir, stderr);
1261
1262       variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1263       variant->key = *key;
1264
1265       return variant;
1266    }
1267
1268    state.tokens = stfp->state.tokens;
1269
1270    assert(!(key->bitmap && key->drawpixels));
1271
1272    /* Fix texture targets and add fog for ATI_fs */
1273    if (stfp->ati_fs) {
1274       const struct tgsi_token *tokens = st_fixup_atifs(state.tokens, key);
1275
1276       if (tokens)
1277          state.tokens = tokens;
1278       else
1279          fprintf(stderr, "mesa: cannot post-process ATI_fs\n");
1280    }
1281
1282    /* Emulate features. */
1283    if (key->clamp_color || key->persample_shading) {
1284       const struct tgsi_token *tokens;
1285       unsigned flags =
1286          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
1287          (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
1288
1289       tokens = tgsi_emulate(state.tokens, flags);
1290
1291       if (tokens) {
1292          if (state.tokens != stfp->state.tokens)
1293             tgsi_free_tokens(state.tokens);
1294          state.tokens = tokens;
1295       } else
1296          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
1297    }
1298
1299    /* glBitmap */
1300    if (key->bitmap) {
1301       const struct tgsi_token *tokens;
1302
1303       variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1304
1305       tokens = st_get_bitmap_shader(state.tokens,
1306                                     st->internal_target,
1307                                     variant->bitmap_sampler,
1308                                     st->needs_texcoord_semantic,
1309                                     st->bitmap.tex_format ==
1310                                     PIPE_FORMAT_R8_UNORM);
1311
1312       if (tokens) {
1313          if (state.tokens != stfp->state.tokens)
1314             tgsi_free_tokens(state.tokens);
1315          state.tokens = tokens;
1316       } else
1317          fprintf(stderr, "mesa: cannot create a shader for glBitmap\n");
1318    }
1319
1320    /* glDrawPixels (color only) */
1321    if (key->drawpixels) {
1322       const struct tgsi_token *tokens;
1323       unsigned scale_const = 0, bias_const = 0, texcoord_const = 0;
1324
1325       /* Find the first unused slot. */
1326       variant->drawpix_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1327
1328       if (key->pixelMaps) {
1329          unsigned samplers_used = stfp->Base.SamplersUsed |
1330                                   (1 << variant->drawpix_sampler);
1331
1332          variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1333       }
1334
1335       if (key->scaleAndBias) {
1336          scale_const = _mesa_add_state_reference(params, scale_state);
1337          bias_const = _mesa_add_state_reference(params, bias_state);
1338       }
1339
1340       texcoord_const = _mesa_add_state_reference(params, texcoord_state);
1341
1342       tokens = st_get_drawpix_shader(state.tokens,
1343                                      st->needs_texcoord_semantic,
1344                                      key->scaleAndBias, scale_const,
1345                                      bias_const, key->pixelMaps,
1346                                      variant->drawpix_sampler,
1347                                      variant->pixelmap_sampler,
1348                                      texcoord_const, st->internal_target);
1349
1350       if (tokens) {
1351          if (state.tokens != stfp->state.tokens)
1352             tgsi_free_tokens(state.tokens);
1353          state.tokens = tokens;
1354       } else
1355          fprintf(stderr, "mesa: cannot create a shader for glDrawPixels\n");
1356    }
1357
1358    if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1359                 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1360       const struct tgsi_token *tokens;
1361
1362       /* samplers inserted would conflict, but this should be unpossible: */
1363       assert(!(key->bitmap || key->drawpixels));
1364
1365       tokens = st_tgsi_lower_yuv(state.tokens,
1366                                  ~stfp->Base.SamplersUsed,
1367                                  key->external.lower_nv12 ||
1368                                     key->external.lower_xy_uxvx ||
1369                                     key->external.lower_yx_xuxv,
1370                                  key->external.lower_iyuv);
1371       if (tokens) {
1372          if (state.tokens != stfp->state.tokens)
1373             tgsi_free_tokens(state.tokens);
1374          state.tokens = tokens;
1375       } else {
1376          fprintf(stderr, "mesa: cannot create a shader for samplerExternalOES\n");
1377       }
1378    }
1379
1380    if (key->lower_depth_clamp) {
1381       unsigned depth_range_const = _mesa_add_state_reference(params, depth_range_state);
1382
1383       const struct tgsi_token *tokens;
1384       tokens = st_tgsi_lower_depth_clamp_fs(state.tokens, depth_range_const);
1385       if (state.tokens != stfp->state.tokens)
1386          tgsi_free_tokens(state.tokens);
1387       state.tokens = tokens;
1388    }
1389
1390    if (ST_DEBUG & DEBUG_PRINT_IR)
1391       tgsi_dump(state.tokens, 0);
1392
1393    /* fill in variant */
1394    variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1395    variant->key = *key;
1396
1397    if (state.tokens != stfp->state.tokens)
1398       tgsi_free_tokens(state.tokens);
1399    return variant;
1400 }
1401
1402 /**
1403  * Translate fragment program if needed.
1404  */
1405 struct st_fp_variant *
1406 st_get_fp_variant(struct st_context *st,
1407                   struct st_program *stfp,
1408                   const struct st_fp_variant_key *key)
1409 {
1410    struct st_fp_variant *fpv;
1411
1412    /* Search for existing variant */
1413    for (fpv = st_fp_variant(stfp->variants); fpv;
1414         fpv = st_fp_variant(fpv->base.next)) {
1415       if (memcmp(&fpv->key, key, sizeof(*key)) == 0) {
1416          break;
1417       }
1418    }
1419
1420    if (!fpv) {
1421       /* create new */
1422       fpv = st_create_fp_variant(st, stfp, key);
1423       if (fpv) {
1424          fpv->base.st = key->st;
1425
1426          if (key->bitmap || key->drawpixels) {
1427             /* Regular variants should always come before the
1428              * bitmap & drawpixels variants, (unless there
1429              * are no regular variants) so that
1430              * st_update_fp can take a fast path when
1431              * shader_has_one_variant is set.
1432              */
1433             if (!stfp->variants) {
1434                stfp->variants = &fpv->base;
1435             } else {
1436                /* insert into list after the first one */
1437                fpv->base.next = stfp->variants->next;
1438                stfp->variants->next = &fpv->base;
1439             }
1440          } else {
1441             /* insert into list */
1442             fpv->base.next = stfp->variants;
1443             stfp->variants = &fpv->base;
1444          }
1445       }
1446    }
1447
1448    return fpv;
1449 }
1450
1451 /**
1452  * Translate a program. This is common code for geometry and tessellation
1453  * shaders.
1454  */
1455 bool
1456 st_translate_common_program(struct st_context *st,
1457                             struct st_program *stp)
1458 {
1459    struct gl_program *prog = &stp->Base;
1460    enum pipe_shader_type stage =
1461       pipe_shader_type_from_mesa(stp->Base.info.stage);
1462    struct ureg_program *ureg = ureg_create_with_screen(stage, st->pipe->screen);
1463
1464    if (ureg == NULL)
1465       return false;
1466
1467    switch (stage) {
1468    case PIPE_SHADER_TESS_CTRL:
1469       ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT,
1470                     stp->Base.info.tess.tcs_vertices_out);
1471       break;
1472
1473    case PIPE_SHADER_TESS_EVAL:
1474       if (stp->Base.info.tess.primitive_mode == GL_ISOLINES)
1475          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, GL_LINES);
1476       else
1477          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE,
1478                        stp->Base.info.tess.primitive_mode);
1479
1480       STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL);
1481       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 ==
1482                     PIPE_TESS_SPACING_FRACTIONAL_ODD);
1483       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 ==
1484                     PIPE_TESS_SPACING_FRACTIONAL_EVEN);
1485
1486       ureg_property(ureg, TGSI_PROPERTY_TES_SPACING,
1487                     (stp->Base.info.tess.spacing + 1) % 3);
1488
1489       ureg_property(ureg, TGSI_PROPERTY_TES_VERTEX_ORDER_CW,
1490                     !stp->Base.info.tess.ccw);
1491       ureg_property(ureg, TGSI_PROPERTY_TES_POINT_MODE,
1492                     stp->Base.info.tess.point_mode);
1493       break;
1494
1495    case PIPE_SHADER_GEOMETRY:
1496       ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM,
1497                     stp->Base.info.gs.input_primitive);
1498       ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM,
1499                     stp->Base.info.gs.output_primitive);
1500       ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
1501                     stp->Base.info.gs.vertices_out);
1502       ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS,
1503                     stp->Base.info.gs.invocations);
1504       break;
1505
1506    default:
1507       break;
1508    }
1509
1510    ubyte inputSlotToAttr[VARYING_SLOT_TESS_MAX];
1511    ubyte inputMapping[VARYING_SLOT_TESS_MAX];
1512    ubyte outputMapping[VARYING_SLOT_TESS_MAX];
1513    GLuint attr;
1514
1515    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
1516    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
1517    uint num_inputs = 0;
1518
1519    ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
1520    ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
1521    uint num_outputs = 0;
1522
1523    GLint i;
1524
1525    memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr));
1526    memset(inputMapping, 0, sizeof(inputMapping));
1527    memset(outputMapping, 0, sizeof(outputMapping));
1528    memset(&stp->state, 0, sizeof(stp->state));
1529
1530    if (prog->info.clip_distance_array_size)
1531       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
1532                     prog->info.clip_distance_array_size);
1533    if (prog->info.cull_distance_array_size)
1534       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
1535                     prog->info.cull_distance_array_size);
1536
1537    /*
1538     * Convert Mesa program inputs to TGSI input register semantics.
1539     */
1540    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1541       if ((prog->info.inputs_read & BITFIELD64_BIT(attr)) == 0)
1542          continue;
1543
1544       unsigned slot = num_inputs++;
1545
1546       inputMapping[attr] = slot;
1547       inputSlotToAttr[slot] = attr;
1548
1549       unsigned semantic_name, semantic_index;
1550       tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1551                                    &semantic_name, &semantic_index);
1552       input_semantic_name[slot] = semantic_name;
1553       input_semantic_index[slot] = semantic_index;
1554    }
1555
1556    /* Also add patch inputs. */
1557    for (attr = 0; attr < 32; attr++) {
1558       if (prog->info.patch_inputs_read & (1u << attr)) {
1559          GLuint slot = num_inputs++;
1560          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1561
1562          inputMapping[patch_attr] = slot;
1563          inputSlotToAttr[slot] = patch_attr;
1564          input_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1565          input_semantic_index[slot] = attr;
1566       }
1567    }
1568
1569    /* initialize output semantics to defaults */
1570    for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
1571       output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
1572       output_semantic_index[i] = 0;
1573    }
1574
1575    /*
1576     * Determine number of outputs, the (default) output register
1577     * mapping and the semantic information for each output.
1578     */
1579    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1580       if (prog->info.outputs_written & BITFIELD64_BIT(attr)) {
1581          GLuint slot = num_outputs++;
1582
1583          outputMapping[attr] = slot;
1584
1585          unsigned semantic_name, semantic_index;
1586          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1587                                       &semantic_name, &semantic_index);
1588          output_semantic_name[slot] = semantic_name;
1589          output_semantic_index[slot] = semantic_index;
1590       }
1591    }
1592
1593    /* Also add patch outputs. */
1594    for (attr = 0; attr < 32; attr++) {
1595       if (prog->info.patch_outputs_written & (1u << attr)) {
1596          GLuint slot = num_outputs++;
1597          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1598
1599          outputMapping[patch_attr] = slot;
1600          output_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1601          output_semantic_index[slot] = attr;
1602       }
1603    }
1604
1605    st_translate_program(st->ctx,
1606                         stage,
1607                         ureg,
1608                         stp->glsl_to_tgsi,
1609                         prog,
1610                         /* inputs */
1611                         num_inputs,
1612                         inputMapping,
1613                         inputSlotToAttr,
1614                         input_semantic_name,
1615                         input_semantic_index,
1616                         NULL,
1617                         /* outputs */
1618                         num_outputs,
1619                         outputMapping,
1620                         output_semantic_name,
1621                         output_semantic_index);
1622
1623    stp->state.tokens = ureg_get_tokens(ureg, NULL);
1624
1625    ureg_destroy(ureg);
1626
1627    st_translate_stream_output_info(prog);
1628
1629    st_store_ir_in_disk_cache(st, prog, false);
1630
1631    if (ST_DEBUG & DEBUG_PRINT_IR && ST_DEBUG & DEBUG_MESA)
1632       _mesa_print_program(prog);
1633
1634    free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
1635    stp->glsl_to_tgsi = NULL;
1636    return true;
1637 }
1638
1639
1640 /**
1641  * Get/create a basic program variant.
1642  */
1643 struct st_variant *
1644 st_get_common_variant(struct st_context *st,
1645                       struct st_program *prog,
1646                       const struct st_common_variant_key *key)
1647 {
1648    struct pipe_context *pipe = st->pipe;
1649    struct st_variant *v;
1650    struct pipe_shader_state state = {0};
1651
1652    /* Search for existing variant */
1653    for (v = prog->variants; v; v = v->next) {
1654       if (memcmp(&st_common_variant(v)->key, key, sizeof(*key)) == 0)
1655          break;
1656    }
1657
1658    if (!v) {
1659       /* create new */
1660       v = (struct st_variant*)CALLOC_STRUCT(st_common_variant);
1661       if (v) {
1662          if (prog->state.type == PIPE_SHADER_IR_NIR) {
1663             bool finalize = false;
1664
1665             state.type = PIPE_SHADER_IR_NIR;
1666             state.ir.nir = nir_shader_clone(NULL, prog->Base.nir);
1667
1668             if (key->clamp_color) {
1669                NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1670                finalize = true;
1671             }
1672
1673             state.stream_output = prog->state.stream_output;
1674
1675             if (finalize || !st->allow_st_finalize_nir_twice) {
1676                st_finalize_nir(st, &prog->Base, prog->shader_program,
1677                                state.ir.nir, true);
1678             }
1679
1680             if (ST_DEBUG & DEBUG_PRINT_IR)
1681                nir_print_shader(state.ir.nir, stderr);
1682          } else {
1683             if (key->lower_depth_clamp) {
1684                struct gl_program_parameter_list *params = prog->Base.Parameters;
1685
1686                unsigned depth_range_const =
1687                      _mesa_add_state_reference(params, depth_range_state);
1688
1689                const struct tgsi_token *tokens;
1690                tokens =
1691                      st_tgsi_lower_depth_clamp(prog->state.tokens,
1692                                                depth_range_const,
1693                                                key->clip_negative_one_to_one);
1694
1695                if (tokens != prog->state.tokens)
1696                   tgsi_free_tokens(prog->state.tokens);
1697
1698                prog->state.tokens = tokens;
1699             }
1700             state = prog->state;
1701
1702             if (ST_DEBUG & DEBUG_PRINT_IR)
1703                tgsi_dump(state.tokens, 0);
1704          }
1705          /* fill in new variant */
1706          switch (prog->Base.info.stage) {
1707          case MESA_SHADER_TESS_CTRL:
1708             v->driver_shader = pipe->create_tcs_state(pipe, &state);
1709             break;
1710          case MESA_SHADER_TESS_EVAL:
1711             v->driver_shader = pipe->create_tes_state(pipe, &state);
1712             break;
1713          case MESA_SHADER_GEOMETRY:
1714             v->driver_shader = pipe->create_gs_state(pipe, &state);
1715             break;
1716          case MESA_SHADER_COMPUTE: {
1717             struct pipe_compute_state cs = {0};
1718             cs.ir_type = state.type;
1719             cs.req_local_mem = prog->Base.info.cs.shared_size;
1720
1721             if (state.type == PIPE_SHADER_IR_NIR)
1722                cs.prog = state.ir.nir;
1723             else
1724                cs.prog = state.tokens;
1725
1726             v->driver_shader = pipe->create_compute_state(pipe, &cs);
1727             break;
1728          }
1729          default:
1730             assert(!"unhandled shader type");
1731             free(v);
1732             return NULL;
1733          }
1734
1735          st_common_variant(v)->key = *key;
1736          v->st = key->st;
1737
1738          /* insert into list */
1739          v->next = prog->variants;
1740          prog->variants = v;
1741       }
1742    }
1743
1744    return v;
1745 }
1746
1747
1748 /**
1749  * Vert/Geom/Frag programs have per-context variants.  Free all the
1750  * variants attached to the given program which match the given context.
1751  */
1752 static void
1753 destroy_program_variants(struct st_context *st, struct gl_program *target)
1754 {
1755    if (!target || target == &_mesa_DummyProgram)
1756       return;
1757
1758    struct st_program *p = st_program(target);
1759    struct st_variant *v, **prevPtr = &p->variants;
1760
1761    for (v = p->variants; v; ) {
1762       struct st_variant *next = v->next;
1763       if (v->st == st) {
1764          /* unlink from list */
1765          *prevPtr = next;
1766          /* destroy this variant */
1767          delete_variant(st, v, target->Target);
1768       }
1769       else {
1770          prevPtr = &v->next;
1771       }
1772       v = next;
1773    }
1774 }
1775
1776
1777 /**
1778  * Callback for _mesa_HashWalk.  Free all the shader's program variants
1779  * which match the given context.
1780  */
1781 static void
1782 destroy_shader_program_variants_cb(GLuint key, void *data, void *userData)
1783 {
1784    struct st_context *st = (struct st_context *) userData;
1785    struct gl_shader *shader = (struct gl_shader *) data;
1786
1787    switch (shader->Type) {
1788    case GL_SHADER_PROGRAM_MESA:
1789       {
1790          struct gl_shader_program *shProg = (struct gl_shader_program *) data;
1791          GLuint i;
1792
1793          for (i = 0; i < ARRAY_SIZE(shProg->_LinkedShaders); i++) {
1794             if (shProg->_LinkedShaders[i])
1795                destroy_program_variants(st, shProg->_LinkedShaders[i]->Program);
1796          }
1797       }
1798       break;
1799    case GL_VERTEX_SHADER:
1800    case GL_FRAGMENT_SHADER:
1801    case GL_GEOMETRY_SHADER:
1802    case GL_TESS_CONTROL_SHADER:
1803    case GL_TESS_EVALUATION_SHADER:
1804    case GL_COMPUTE_SHADER:
1805       break;
1806    default:
1807       assert(0);
1808    }
1809 }
1810
1811
1812 /**
1813  * Callback for _mesa_HashWalk.  Free all the program variants which match
1814  * the given context.
1815  */
1816 static void
1817 destroy_program_variants_cb(GLuint key, void *data, void *userData)
1818 {
1819    struct st_context *st = (struct st_context *) userData;
1820    struct gl_program *program = (struct gl_program *) data;
1821    destroy_program_variants(st, program);
1822 }
1823
1824
1825 /**
1826  * Walk over all shaders and programs to delete any variants which
1827  * belong to the given context.
1828  * This is called during context tear-down.
1829  */
1830 void
1831 st_destroy_program_variants(struct st_context *st)
1832 {
1833    /* If shaders can be shared with other contexts, the last context will
1834     * call DeleteProgram on all shaders, releasing everything.
1835     */
1836    if (st->has_shareable_shaders)
1837       return;
1838
1839    /* ARB vert/frag program */
1840    _mesa_HashWalk(st->ctx->Shared->Programs,
1841                   destroy_program_variants_cb, st);
1842
1843    /* GLSL vert/frag/geom shaders */
1844    _mesa_HashWalk(st->ctx->Shared->ShaderObjects,
1845                   destroy_shader_program_variants_cb, st);
1846 }
1847
1848
1849 /**
1850  * Compile one shader variant.
1851  */
1852 static void
1853 st_precompile_shader_variant(struct st_context *st,
1854                              struct gl_program *prog)
1855 {
1856    switch (prog->Target) {
1857    case GL_VERTEX_PROGRAM_ARB: {
1858       struct st_program *p = (struct st_program *)prog;
1859       struct st_common_variant_key key;
1860
1861       memset(&key, 0, sizeof(key));
1862
1863       key.st = st->has_shareable_shaders ? NULL : st;
1864       st_get_vp_variant(st, p, &key);
1865       break;
1866    }
1867
1868    case GL_FRAGMENT_PROGRAM_ARB: {
1869       struct st_program *p = (struct st_program *)prog;
1870       struct st_fp_variant_key key;
1871
1872       memset(&key, 0, sizeof(key));
1873
1874       key.st = st->has_shareable_shaders ? NULL : st;
1875       st_get_fp_variant(st, p, &key);
1876       break;
1877    }
1878
1879    case GL_TESS_CONTROL_PROGRAM_NV:
1880    case GL_TESS_EVALUATION_PROGRAM_NV:
1881    case GL_GEOMETRY_PROGRAM_NV:
1882    case GL_COMPUTE_PROGRAM_NV: {
1883       struct st_program *p = st_program(prog);
1884       struct st_common_variant_key key;
1885
1886       memset(&key, 0, sizeof(key));
1887
1888       key.st = st->has_shareable_shaders ? NULL : st;
1889       st_get_common_variant(st, p, &key);
1890       break;
1891    }
1892
1893    default:
1894       assert(0);
1895    }
1896 }
1897
1898 void
1899 st_finalize_program(struct st_context *st, struct gl_program *prog)
1900 {
1901    if (st->current_program[prog->info.stage] == prog) {
1902       if (prog->info.stage == MESA_SHADER_VERTEX)
1903          st->dirty |= ST_NEW_VERTEX_PROGRAM(st, (struct st_program *)prog);
1904       else
1905          st->dirty |= ((struct st_program *)prog)->affected_states;
1906    }
1907
1908    if (prog->nir)
1909       nir_sweep(prog->nir);
1910
1911    /* Create Gallium shaders now instead of on demand. */
1912    if (ST_DEBUG & DEBUG_PRECOMPILE ||
1913        st->shader_has_one_variant[prog->info.stage])
1914       st_precompile_shader_variant(st, prog);
1915 }