src/mesa/state_tracker/st_program.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   *   Brian Paul
  31   */
  32
  33
  34 #include "main/errors.h"
  35 #include "main/imports.h"
  36 #include "main/hash.h"
  37 #include "main/mtypes.h"
  38 #include "program/prog_parameter.h"
  39 #include "program/prog_print.h"
  40 #include "program/prog_to_nir.h"
  41 #include "program/programopt.h"
  42
  43 #include "compiler/nir/nir.h"
  44 #include "draw/draw_context.h"
  45
  46 #include "pipe/p_context.h"
  47 #include "pipe/p_defines.h"
  48 #include "pipe/p_shader_tokens.h"
  49 #include "draw/draw_context.h"
  50 #include "tgsi/tgsi_dump.h"
  51 #include "tgsi/tgsi_emulate.h"
  52 #include "tgsi/tgsi_parse.h"
  53 #include "tgsi/tgsi_ureg.h"
  54
  55 #include "st_debug.h"
  56 #include "st_cb_bitmap.h"
  57 #include "st_cb_drawpixels.h"
  58 #include "st_context.h"
  59 #include "st_tgsi_lower_depth_clamp.h"
  60 #include "st_tgsi_lower_yuv.h"
  61 #include "st_program.h"
  62 #include "st_mesa_to_tgsi.h"
  63 #include "st_atifs_to_tgsi.h"
  64 #include "st_nir.h"
  65 #include "st_shader_cache.h"
  66 #include "st_util.h"
  67 #include "cso_cache/cso_context.h"
  68
  69
  70
  71 static void
  72 set_affected_state_flags(uint64_t *states,
  73                          struct gl_program *prog,
  74                          uint64_t new_constants,
  75                          uint64_t new_sampler_views,
  76                          uint64_t new_samplers,
  77                          uint64_t new_images,
  78                          uint64_t new_ubos,
  79                          uint64_t new_ssbos,
  80                          uint64_t new_atomics)
  81 {
  82    if (prog->Parameters->NumParameters)
  83       *states |= new_constants;
  84
  85    if (prog->info.num_textures)
  86       *states |= new_sampler_views | new_samplers;
  87
  88    if (prog->info.num_images)
  89       *states |= new_images;
  90
  91    if (prog->info.num_ubos)
  92       *states |= new_ubos;
  93
  94    if (prog->info.num_ssbos)
  95       *states |= new_ssbos;
  96
  97    if (prog->info.num_abos)
  98       *states |= new_atomics;
  99 }
 100
 101 /**
 102  * This determines which states will be updated when the shader is bound.
 103  */
 104 void
 105 st_set_prog_affected_state_flags(struct gl_program *prog)
 106 {
 107    uint64_t *states;
 108
 109    switch (prog->info.stage) {
 110    case MESA_SHADER_VERTEX:
 111       states = &((struct st_program*)prog)->affected_states;
 112
 113       *states = ST_NEW_VS_STATE |
 114                 ST_NEW_RASTERIZER |
 115                 ST_NEW_VERTEX_ARRAYS;
 116
 117       set_affected_state_flags(states, prog,
 118                                ST_NEW_VS_CONSTANTS,
 119                                ST_NEW_VS_SAMPLER_VIEWS,
 120                                ST_NEW_VS_SAMPLERS,
 121                                ST_NEW_VS_IMAGES,
 122                                ST_NEW_VS_UBOS,
 123                                ST_NEW_VS_SSBOS,
 124                                ST_NEW_VS_ATOMICS);
 125       break;
 126
 127    case MESA_SHADER_TESS_CTRL:
 128       states = &(st_program(prog))->affected_states;
 129
 130       *states = ST_NEW_TCS_STATE;
 131
 132       set_affected_state_flags(states, prog,
 133                                ST_NEW_TCS_CONSTANTS,
 134                                ST_NEW_TCS_SAMPLER_VIEWS,
 135                                ST_NEW_TCS_SAMPLERS,
 136                                ST_NEW_TCS_IMAGES,
 137                                ST_NEW_TCS_UBOS,
 138                                ST_NEW_TCS_SSBOS,
 139                                ST_NEW_TCS_ATOMICS);
 140       break;
 141
 142    case MESA_SHADER_TESS_EVAL:
 143       states = &(st_program(prog))->affected_states;
 144
 145       *states = ST_NEW_TES_STATE |
 146                 ST_NEW_RASTERIZER;
 147
 148       set_affected_state_flags(states, prog,
 149                                ST_NEW_TES_CONSTANTS,
 150                                ST_NEW_TES_SAMPLER_VIEWS,
 151                                ST_NEW_TES_SAMPLERS,
 152                                ST_NEW_TES_IMAGES,
 153                                ST_NEW_TES_UBOS,
 154                                ST_NEW_TES_SSBOS,
 155                                ST_NEW_TES_ATOMICS);
 156       break;
 157
 158    case MESA_SHADER_GEOMETRY:
 159       states = &(st_program(prog))->affected_states;
 160
 161       *states = ST_NEW_GS_STATE |
 162                 ST_NEW_RASTERIZER;
 163
 164       set_affected_state_flags(states, prog,
 165                                ST_NEW_GS_CONSTANTS,
 166                                ST_NEW_GS_SAMPLER_VIEWS,
 167                                ST_NEW_GS_SAMPLERS,
 168                                ST_NEW_GS_IMAGES,
 169                                ST_NEW_GS_UBOS,
 170                                ST_NEW_GS_SSBOS,
 171                                ST_NEW_GS_ATOMICS);
 172       break;
 173
 174    case MESA_SHADER_FRAGMENT:
 175       states = &((struct st_program*)prog)->affected_states;
 176
 177       /* gl_FragCoord and glDrawPixels always use constants. */
 178       *states = ST_NEW_FS_STATE |
 179                 ST_NEW_SAMPLE_SHADING |
 180                 ST_NEW_FS_CONSTANTS;
 181
 182       set_affected_state_flags(states, prog,
 183                                ST_NEW_FS_CONSTANTS,
 184                                ST_NEW_FS_SAMPLER_VIEWS,
 185                                ST_NEW_FS_SAMPLERS,
 186                                ST_NEW_FS_IMAGES,
 187                                ST_NEW_FS_UBOS,
 188                                ST_NEW_FS_SSBOS,
 189                                ST_NEW_FS_ATOMICS);
 190       break;
 191
 192    case MESA_SHADER_COMPUTE:
 193       states = &((struct st_program*)prog)->affected_states;
 194
 195       *states = ST_NEW_CS_STATE;
 196
 197       set_affected_state_flags(states, prog,
 198                                ST_NEW_CS_CONSTANTS,
 199                                ST_NEW_CS_SAMPLER_VIEWS,
 200                                ST_NEW_CS_SAMPLERS,
 201                                ST_NEW_CS_IMAGES,
 202                                ST_NEW_CS_UBOS,
 203                                ST_NEW_CS_SSBOS,
 204                                ST_NEW_CS_ATOMICS);
 205       break;
 206
 207    default:
 208       unreachable("unhandled shader stage");
 209    }
 210 }
 211
 212
 213 /**
 214  * Delete a shader variant.  Note the caller must unlink the variant from
 215  * the linked list.
 216  */
 217 static void
 218 delete_variant(struct st_context *st, struct st_variant *v, GLenum target)
 219 {
 220    if (v->driver_shader) {
 221       if (target == GL_VERTEX_PROGRAM_ARB &&
 222           ((struct st_common_variant*)v)->key.is_draw_shader) {
 223          /* Draw shader. */
 224          draw_delete_vertex_shader(st->draw, v->driver_shader);
 225       } else if (st->has_shareable_shaders || v->st == st) {
 226          /* The shader's context matches the calling context, or we
 227           * don't care.
 228           */
 229          switch (target) {
 230          case GL_VERTEX_PROGRAM_ARB:
 231             cso_delete_vertex_shader(st->cso_context, v->driver_shader);
 232             break;
 233          case GL_TESS_CONTROL_PROGRAM_NV:
 234             cso_delete_tessctrl_shader(st->cso_context, v->driver_shader);
 235             break;
 236          case GL_TESS_EVALUATION_PROGRAM_NV:
 237             cso_delete_tesseval_shader(st->cso_context, v->driver_shader);
 238             break;
 239          case GL_GEOMETRY_PROGRAM_NV:
 240             cso_delete_geometry_shader(st->cso_context, v->driver_shader);
 241             break;
 242          case GL_FRAGMENT_PROGRAM_ARB:
 243             cso_delete_fragment_shader(st->cso_context, v->driver_shader);
 244             break;
 245          case GL_COMPUTE_PROGRAM_NV:
 246             cso_delete_compute_shader(st->cso_context, v->driver_shader);
 247             break;
 248          default:
 249             unreachable("bad shader type in delete_basic_variant");
 250          }
 251       } else {
 252          /* We can't delete a shader with a context different from the one
 253           * that created it.  Add it to the creating context's zombie list.
 254           */
 255          enum pipe_shader_type type =
 256             pipe_shader_type_from_mesa(_mesa_program_enum_to_shader_stage(target));
 257
 258          st_save_zombie_shader(v->st, type, v->driver_shader);
 259       }
 260    }
 261
 262    free(v);
 263 }
 264
 265
 266 /**
 267  * Free all basic program variants.
 268  */
 269 void
 270 st_release_variants(struct st_context *st, struct st_program *p)
 271 {
 272    struct st_variant *v;
 273
 274    for (v = p->variants; v; ) {
 275       struct st_variant *next = v->next;
 276       delete_variant(st, v, p->Base.Target);
 277       v = next;
 278    }
 279
 280    p->variants = NULL;
 281
 282    if (p->state.tokens) {
 283       ureg_free_tokens(p->state.tokens);
 284       p->state.tokens = NULL;
 285    }
 286
 287    /* Note: Any setup of ->ir.nir that has had pipe->create_*_state called on
 288     * it has resulted in the driver taking ownership of the NIR.  Those
 289     * callers should be NULLing out the nir field in any pipe_shader_state
 290     * that might have this called in order to indicate that.
 291     *
 292     * GLSL IR and ARB programs will have set gl_program->nir to the same
 293     * shader as ir->ir.nir, so it will be freed by _mesa_delete_program().
 294     */
 295 }
 296
 297 void
 298 st_finalize_nir_before_variants(struct nir_shader *nir)
 299 {
 300    NIR_PASS_V(nir, nir_opt_access);
 301
 302    NIR_PASS_V(nir, nir_split_var_copies);
 303    NIR_PASS_V(nir, nir_lower_var_copies);
 304    if (nir->options->lower_all_io_to_temps ||
 305        nir->options->lower_all_io_to_elements ||
 306        nir->info.stage == MESA_SHADER_VERTEX ||
 307        nir->info.stage == MESA_SHADER_GEOMETRY) {
 308       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
 309    } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
 310       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
 311    }
 312
 313    st_nir_assign_vs_in_locations(nir);
 314 }
 315
 316 /**
 317  * Translate ARB (asm) program to NIR
 318  */
 319 static nir_shader *
 320 st_translate_prog_to_nir(struct st_context *st, struct gl_program *prog,
 321                          gl_shader_stage stage)
 322 {
 323    struct pipe_screen *screen = st->pipe->screen;
 324    const struct gl_shader_compiler_options *options =
 325       &st->ctx->Const.ShaderCompilerOptions[stage];
 326
 327    /* Translate to NIR */
 328    nir_shader *nir = prog_to_nir(prog, options->NirOptions);
 329    NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
 330    nir_validate_shader(nir, "after st/ptn lower_regs_to_ssa");
 331
 332    NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, prog, screen);
 333    NIR_PASS_V(nir, nir_lower_system_values);
 334
 335    /* Optimise NIR */
 336    NIR_PASS_V(nir, nir_opt_constant_folding);
 337    st_nir_opts(nir);
 338    st_finalize_nir_before_variants(nir);
 339
 340    if (st->allow_st_finalize_nir_twice)
 341       st_finalize_nir(st, prog, NULL, nir, true);
 342
 343    nir_validate_shader(nir, "after st/glsl finalize_nir");
 344
 345    return nir;
 346 }
 347
 348 void
 349 st_prepare_vertex_program(struct st_program *stp)
 350 {
 351    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 352
 353    stvp->num_inputs = 0;
 354    memset(stvp->input_to_index, ~0, sizeof(stvp->input_to_index));
 355    memset(stvp->result_to_output, ~0, sizeof(stvp->result_to_output));
 356
 357    /* Determine number of inputs, the mappings between VERT_ATTRIB_x
 358     * and TGSI generic input indexes, plus input attrib semantic info.
 359     */
 360    for (unsigned attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
 361       if ((stp->Base.info.inputs_read & BITFIELD64_BIT(attr)) != 0) {
 362          stvp->input_to_index[attr] = stvp->num_inputs;
 363          stvp->index_to_input[stvp->num_inputs] = attr;
 364          stvp->num_inputs++;
 365
 366          if ((stp->Base.DualSlotInputs & BITFIELD64_BIT(attr)) != 0) {
 367             /* add placeholder for second part of a double attribute */
 368             stvp->index_to_input[stvp->num_inputs] = ST_DOUBLE_ATTRIB_PLACEHOLDER;
 369             stvp->num_inputs++;
 370          }
 371       }
 372    }
 373    /* pre-setup potentially unused edgeflag input */
 374    stvp->input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
 375    stvp->index_to_input[stvp->num_inputs] = VERT_ATTRIB_EDGEFLAG;
 376
 377    /* Compute mapping of vertex program outputs to slots. */
 378    unsigned num_outputs = 0;
 379    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 380       if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr))
 381          stvp->result_to_output[attr] = num_outputs++;
 382    }
 383    /* pre-setup potentially unused edgeflag output */
 384    stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs;
 385 }
 386
 387 void
 388 st_translate_stream_output_info(struct gl_program *prog)
 389 {
 390    struct gl_transform_feedback_info *info = prog->sh.LinkedTransformFeedback;
 391    if (!info)
 392       return;
 393
 394    /* Determine the (default) output register mapping for each output. */
 395    unsigned num_outputs = 0;
 396    ubyte output_mapping[VARYING_SLOT_TESS_MAX];
 397    memset(output_mapping, 0, sizeof(output_mapping));
 398
 399    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 400       if (prog->info.outputs_written & BITFIELD64_BIT(attr))
 401          output_mapping[attr] = num_outputs++;
 402    }
 403
 404    /* Translate stream output info. */
 405    struct pipe_stream_output_info *so_info =
 406       &((struct st_program*)prog)->state.stream_output;
 407
 408    for (unsigned i = 0; i < info->NumOutputs; i++) {
 409       so_info->output[i].register_index =
 410          output_mapping[info->Outputs[i].OutputRegister];
 411       so_info->output[i].start_component = info->Outputs[i].ComponentOffset;
 412       so_info->output[i].num_components = info->Outputs[i].NumComponents;
 413       so_info->output[i].output_buffer = info->Outputs[i].OutputBuffer;
 414       so_info->output[i].dst_offset = info->Outputs[i].DstOffset;
 415       so_info->output[i].stream = info->Outputs[i].StreamId;
 416    }
 417
 418    for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
 419       so_info->stride[i] = info->Buffers[i].Stride;
 420    }
 421    so_info->num_outputs = info->NumOutputs;
 422 }
 423
 424 /**
 425  * Translate a vertex program.
 426  */
 427 bool
 428 st_translate_vertex_program(struct st_context *st,
 429                             struct st_program *stp)
 430 {
 431    struct ureg_program *ureg;
 432    enum pipe_error error;
 433    unsigned num_outputs = 0;
 434    unsigned attr;
 435    ubyte output_semantic_name[VARYING_SLOT_MAX] = {0};
 436    ubyte output_semantic_index[VARYING_SLOT_MAX] = {0};
 437
 438    if (stp->Base.arb.IsPositionInvariant)
 439       _mesa_insert_mvp_code(st->ctx, &stp->Base);
 440
 441    st_prepare_vertex_program(stp);
 442
 443    /* ARB_vp: */
 444    if (!stp->glsl_to_tgsi) {
 445       _mesa_remove_output_reads(&stp->Base, PROGRAM_OUTPUT);
 446
 447       /* This determines which states will be updated when the assembly
 448        * shader is bound.
 449        */
 450       stp->affected_states = ST_NEW_VS_STATE |
 451                               ST_NEW_RASTERIZER |
 452                               ST_NEW_VERTEX_ARRAYS;
 453
 454       if (stp->Base.Parameters->NumParameters)
 455          stp->affected_states |= ST_NEW_VS_CONSTANTS;
 456
 457       /* Translate to NIR if preferred. */
 458       if (st->pipe->screen->get_shader_param(st->pipe->screen,
 459                                              PIPE_SHADER_VERTEX,
 460                                              PIPE_SHADER_CAP_PREFERRED_IR)) {
 461          assert(!stp->glsl_to_tgsi);
 462
 463          if (stp->Base.nir)
 464             ralloc_free(stp->Base.nir);
 465
 466          stp->state.type = PIPE_SHADER_IR_NIR;
 467          stp->Base.nir = st_translate_prog_to_nir(st, &stp->Base,
 468                                                   MESA_SHADER_VERTEX);
 469          /* For st_draw_feedback, we need to generate TGSI too if draw doesn't
 470           * use LLVM.
 471           */
 472          if (draw_has_llvm())
 473             return true;
 474       }
 475    }
 476
 477    /* Get semantic names and indices. */
 478    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 479       if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr)) {
 480          unsigned slot = num_outputs++;
 481          unsigned semantic_name, semantic_index;
 482          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
 483                                       &semantic_name, &semantic_index);
 484          output_semantic_name[slot] = semantic_name;
 485          output_semantic_index[slot] = semantic_index;
 486       }
 487    }
 488    /* pre-setup potentially unused edgeflag output */
 489    output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
 490    output_semantic_index[num_outputs] = 0;
 491
 492    ureg = ureg_create_with_screen(PIPE_SHADER_VERTEX, st->pipe->screen);
 493    if (ureg == NULL)
 494       return false;
 495
 496    if (stp->Base.info.clip_distance_array_size)
 497       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
 498                     stp->Base.info.clip_distance_array_size);
 499    if (stp->Base.info.cull_distance_array_size)
 500       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
 501                     stp->Base.info.cull_distance_array_size);
 502
 503    if (ST_DEBUG & DEBUG_MESA) {
 504       _mesa_print_program(&stp->Base);
 505       _mesa_print_program_parameters(st->ctx, &stp->Base);
 506       debug_printf("\n");
 507    }
 508
 509    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 510
 511    if (stp->glsl_to_tgsi) {
 512       error = st_translate_program(st->ctx,
 513                                    PIPE_SHADER_VERTEX,
 514                                    ureg,
 515                                    stp->glsl_to_tgsi,
 516                                    &stp->Base,
 517                                    /* inputs */
 518                                    stvp->num_inputs,
 519                                    stvp->input_to_index,
 520                                    NULL, /* inputSlotToAttr */
 521                                    NULL, /* input semantic name */
 522                                    NULL, /* input semantic index */
 523                                    NULL, /* interp mode */
 524                                    /* outputs */
 525                                    num_outputs,
 526                                    stvp->result_to_output,
 527                                    output_semantic_name,
 528                                    output_semantic_index);
 529
 530       st_translate_stream_output_info(&stp->Base);
 531
 532       free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
 533    } else
 534       error = st_translate_mesa_program(st->ctx,
 535                                         PIPE_SHADER_VERTEX,
 536                                         ureg,
 537                                         &stp->Base,
 538                                         /* inputs */
 539                                         stvp->num_inputs,
 540                                         stvp->input_to_index,
 541                                         NULL, /* input semantic name */
 542                                         NULL, /* input semantic index */
 543                                         NULL,
 544                                         /* outputs */
 545                                         num_outputs,
 546                                         stvp->result_to_output,
 547                                         output_semantic_name,
 548                                         output_semantic_index);
 549
 550    if (error) {
 551       debug_printf("%s: failed to translate Mesa program:\n", __func__);
 552       _mesa_print_program(&stp->Base);
 553       debug_assert(0);
 554       return false;
 555    }
 556
 557    stp->state.tokens = ureg_get_tokens(ureg, NULL);
 558    ureg_destroy(ureg);
 559
 560    if (stp->glsl_to_tgsi) {
 561       stp->glsl_to_tgsi = NULL;
 562       st_store_ir_in_disk_cache(st, &stp->Base, false);
 563    }
 564
 565    return stp->state.tokens != NULL;
 566 }
 567
 568 static const gl_state_index16 depth_range_state[STATE_LENGTH] =
 569    { STATE_DEPTH_RANGE };
 570
 571 static struct st_common_variant *
 572 st_create_vp_variant(struct st_context *st,
 573                      struct st_program *stvp,
 574                      const struct st_common_variant_key *key)
 575 {
 576    struct st_common_variant *vpv = CALLOC_STRUCT(st_common_variant);
 577    struct pipe_context *pipe = st->pipe;
 578    struct pipe_screen *screen = pipe->screen;
 579    struct pipe_shader_state state = {0};
 580
 581    static const gl_state_index16 point_size_state[STATE_LENGTH] =
 582       { STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED, 0 };
 583    struct gl_program_parameter_list *params = stvp->Base.Parameters;
 584
 585    vpv->key = *key;
 586
 587    state.stream_output = stvp->state.stream_output;
 588
 589    if (stvp->state.type == PIPE_SHADER_IR_NIR &&
 590        (!key->is_draw_shader || draw_has_llvm())) {
 591       bool finalize = false;
 592
 593       state.type = PIPE_SHADER_IR_NIR;
 594       state.ir.nir = nir_shader_clone(NULL, stvp->Base.nir);
 595       if (key->clamp_color) {
 596          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
 597          finalize = true;
 598       }
 599       if (key->passthrough_edgeflags) {
 600          NIR_PASS_V(state.ir.nir, nir_lower_passthrough_edgeflags);
 601          finalize = true;
 602       }
 603
 604       if (key->lower_point_size) {
 605          _mesa_add_state_reference(params, point_size_state);
 606          NIR_PASS_V(state.ir.nir, nir_lower_point_size_mov,
 607                     point_size_state);
 608          finalize = true;
 609       }
 610
 611       if (key->lower_ucp) {
 612          bool can_compact = screen->get_param(screen,
 613                                               PIPE_CAP_NIR_COMPACT_ARRAYS);
 614
 615          bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;
 616          gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH];
 617          for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
 618             if (use_eye) {
 619                clipplane_state[i][0] = STATE_CLIPPLANE;
 620                clipplane_state[i][1] = i;
 621             } else {
 622                clipplane_state[i][0] = STATE_INTERNAL;
 623                clipplane_state[i][1] = STATE_CLIP_INTERNAL;
 624                clipplane_state[i][2] = i;
 625             }
 626             _mesa_add_state_reference(params, clipplane_state[i]);
 627          }
 628
 629          NIR_PASS_V(state.ir.nir, nir_lower_clip_vs, key->lower_ucp,
 630                     true, can_compact, clipplane_state);
 631          NIR_PASS_V(state.ir.nir, nir_lower_io_to_temporaries,
 632                     nir_shader_get_entrypoint(state.ir.nir), true, false);
 633          NIR_PASS_V(state.ir.nir, nir_lower_global_vars_to_local);
 634          finalize = true;
 635       }
 636
 637       if (finalize || !st->allow_st_finalize_nir_twice) {
 638          st_finalize_nir(st, &stvp->Base, stvp->shader_program, state.ir.nir,
 639                          true);
 640
 641          /* Some of the lowering above may have introduced new varyings */
 642          nir_shader_gather_info(state.ir.nir,
 643                                 nir_shader_get_entrypoint(state.ir.nir));
 644       }
 645
 646       if (ST_DEBUG & DEBUG_PRINT_IR)
 647          nir_print_shader(state.ir.nir, stderr);
 648
 649       if (key->is_draw_shader)
 650          vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
 651       else
 652          vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
 653
 654       return vpv;
 655    }
 656
 657    state.type = PIPE_SHADER_IR_TGSI;
 658    state.tokens = tgsi_dup_tokens(stvp->state.tokens);
 659
 660    /* Emulate features. */
 661    if (key->clamp_color || key->passthrough_edgeflags) {
 662       const struct tgsi_token *tokens;
 663       unsigned flags =
 664          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
 665          (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
 666
 667       tokens = tgsi_emulate(state.tokens, flags);
 668
 669       if (tokens) {
 670          tgsi_free_tokens(state.tokens);
 671          state.tokens = tokens;
 672       } else {
 673          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
 674       }
 675    }
 676
 677    if (key->lower_depth_clamp) {
 678       unsigned depth_range_const =
 679             _mesa_add_state_reference(params, depth_range_state);
 680
 681       const struct tgsi_token *tokens;
 682       tokens = st_tgsi_lower_depth_clamp(state.tokens, depth_range_const,
 683                                          key->clip_negative_one_to_one);
 684       if (tokens != state.tokens)
 685          tgsi_free_tokens(state.tokens);
 686       state.tokens = tokens;
 687    }
 688
 689    if (ST_DEBUG & DEBUG_PRINT_IR)
 690       tgsi_dump(state.tokens, 0);
 691
 692    if (key->is_draw_shader)
 693       vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
 694    else
 695       vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
 696
 697    if (state.tokens) {
 698       tgsi_free_tokens(state.tokens);
 699    }
 700
 701    return vpv;
 702 }
 703
 704
 705 /**
 706  * Find/create a vertex program variant.
 707  */
 708 struct st_common_variant *
 709 st_get_vp_variant(struct st_context *st,
 710                   struct st_program *stp,
 711                   const struct st_common_variant_key *key)
 712 {
 713    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 714    struct st_common_variant *vpv;
 715
 716    /* Search for existing variant */
 717    for (vpv = st_common_variant(stp->variants); vpv;
 718         vpv = st_common_variant(vpv->base.next)) {
 719       if (memcmp(&vpv->key, key, sizeof(*key)) == 0) {
 720          break;
 721       }
 722    }
 723
 724    if (!vpv) {
 725       /* create now */
 726       vpv = st_create_vp_variant(st, stp, key);
 727       if (vpv) {
 728          vpv->base.st = key->st;
 729
 730          unsigned num_inputs = stvp->num_inputs + key->passthrough_edgeflags;
 731          for (unsigned index = 0; index < num_inputs; ++index) {
 732             unsigned attr = stvp->index_to_input[index];
 733             if (attr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
 734                continue;
 735             vpv->vert_attrib_mask |= 1u << attr;
 736          }
 737
 738          /* insert into list */
 739          vpv->base.next = stp->variants;
 740          stp->variants = &vpv->base;
 741       }
 742    }
 743
 744    return vpv;
 745 }
 746
 747
 748 /**
 749  * Translate a Mesa fragment shader into a TGSI shader.
 750  */
 751 bool
 752 st_translate_fragment_program(struct st_context *st,
 753                               struct st_program *stfp)
 754 {
 755    /* Non-GLSL programs: */
 756    if (!stfp->glsl_to_tgsi) {
 757       _mesa_remove_output_reads(&stfp->Base, PROGRAM_OUTPUT);
 758       if (st->ctx->Const.GLSLFragCoordIsSysVal)
 759          _mesa_program_fragment_position_to_sysval(&stfp->Base);
 760
 761       /* This determines which states will be updated when the assembly
 762        * shader is bound.
 763        *
 764        * fragment.position and glDrawPixels always use constants.
 765        */
 766       stfp->affected_states = ST_NEW_FS_STATE |
 767                               ST_NEW_SAMPLE_SHADING |
 768                               ST_NEW_FS_CONSTANTS;
 769
 770       if (stfp->ati_fs) {
 771          /* Just set them for ATI_fs unconditionally. */
 772          stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 773                                   ST_NEW_FS_SAMPLERS;
 774       } else {
 775          /* ARB_fp */
 776          if (stfp->Base.SamplersUsed)
 777             stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 778                                      ST_NEW_FS_SAMPLERS;
 779       }
 780
 781       /* Translate to NIR. */
 782       if (!stfp->ati_fs &&
 783           st->pipe->screen->get_shader_param(st->pipe->screen,
 784                                              PIPE_SHADER_FRAGMENT,
 785                                              PIPE_SHADER_CAP_PREFERRED_IR)) {
 786          nir_shader *nir =
 787             st_translate_prog_to_nir(st, &stfp->Base, MESA_SHADER_FRAGMENT);
 788
 789          if (stfp->Base.nir)
 790             ralloc_free(stfp->Base.nir);
 791          stfp->state.type = PIPE_SHADER_IR_NIR;
 792          stfp->Base.nir = nir;
 793          return true;
 794       }
 795    }
 796
 797    ubyte outputMapping[2 * FRAG_RESULT_MAX];
 798    ubyte inputMapping[VARYING_SLOT_MAX];
 799    ubyte inputSlotToAttr[VARYING_SLOT_MAX];
 800    ubyte interpMode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
 801    GLuint attr;
 802    GLbitfield64 inputsRead;
 803    struct ureg_program *ureg;
 804
 805    GLboolean write_all = GL_FALSE;
 806
 807    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
 808    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
 809    uint fs_num_inputs = 0;
 810
 811    ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
 812    ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
 813    uint fs_num_outputs = 0;
 814
 815    memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
 816
 817    /*
 818     * Convert Mesa program inputs to TGSI input register semantics.
 819     */
 820    inputsRead = stfp->Base.info.inputs_read;
 821    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 822       if ((inputsRead & BITFIELD64_BIT(attr)) != 0) {
 823          const GLuint slot = fs_num_inputs++;
 824
 825          inputMapping[attr] = slot;
 826          inputSlotToAttr[slot] = attr;
 827
 828          switch (attr) {
 829          case VARYING_SLOT_POS:
 830             input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
 831             input_semantic_index[slot] = 0;
 832             interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 833             break;
 834          case VARYING_SLOT_COL0:
 835             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 836             input_semantic_index[slot] = 0;
 837             interpMode[slot] = stfp->glsl_to_tgsi ?
 838                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 839             break;
 840          case VARYING_SLOT_COL1:
 841             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 842             input_semantic_index[slot] = 1;
 843             interpMode[slot] = stfp->glsl_to_tgsi ?
 844                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 845             break;
 846          case VARYING_SLOT_FOGC:
 847             input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
 848             input_semantic_index[slot] = 0;
 849             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 850             break;
 851          case VARYING_SLOT_FACE:
 852             input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
 853             input_semantic_index[slot] = 0;
 854             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 855             break;
 856          case VARYING_SLOT_PRIMITIVE_ID:
 857             input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
 858             input_semantic_index[slot] = 0;
 859             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 860             break;
 861          case VARYING_SLOT_LAYER:
 862             input_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
 863             input_semantic_index[slot] = 0;
 864             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 865             break;
 866          case VARYING_SLOT_VIEWPORT:
 867             input_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
 868             input_semantic_index[slot] = 0;
 869             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 870             break;
 871          case VARYING_SLOT_CLIP_DIST0:
 872             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
 873             input_semantic_index[slot] = 0;
 874             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 875             break;
 876          case VARYING_SLOT_CLIP_DIST1:
 877             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
 878             input_semantic_index[slot] = 1;
 879             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 880             break;
 881          case VARYING_SLOT_CULL_DIST0:
 882          case VARYING_SLOT_CULL_DIST1:
 883             /* these should have been lowered by GLSL */
 884             assert(0);
 885             break;
 886             /* In most cases, there is nothing special about these
 887              * inputs, so adopt a convention to use the generic
 888              * semantic name and the mesa VARYING_SLOT_ number as the
 889              * index.
 890              *
 891              * All that is required is that the vertex shader labels
 892              * its own outputs similarly, and that the vertex shader
 893              * generates at least every output required by the
 894              * fragment shader plus fixed-function hardware (such as
 895              * BFC).
 896              *
 897              * However, some drivers may need us to identify the PNTC and TEXi
 898              * varyings if, for example, their capability to replace them with
 899              * sprite coordinates is limited.
 900              */
 901          case VARYING_SLOT_PNTC:
 902             if (st->needs_texcoord_semantic) {
 903                input_semantic_name[slot] = TGSI_SEMANTIC_PCOORD;
 904                input_semantic_index[slot] = 0;
 905                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 906                break;
 907             }
 908             /* fall through */
 909          case VARYING_SLOT_TEX0:
 910          case VARYING_SLOT_TEX1:
 911          case VARYING_SLOT_TEX2:
 912          case VARYING_SLOT_TEX3:
 913          case VARYING_SLOT_TEX4:
 914          case VARYING_SLOT_TEX5:
 915          case VARYING_SLOT_TEX6:
 916          case VARYING_SLOT_TEX7:
 917             if (st->needs_texcoord_semantic) {
 918                input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
 919                input_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
 920                interpMode[slot] = stfp->glsl_to_tgsi ?
 921                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
 922                break;
 923             }
 924             /* fall through */
 925          case VARYING_SLOT_VAR0:
 926          default:
 927             /* Semantic indices should be zero-based because drivers may choose
 928              * to assign a fixed slot determined by that index.
 929              * This is useful because ARB_separate_shader_objects uses location
 930              * qualifiers for linkage, and if the semantic index corresponds to
 931              * these locations, linkage passes in the driver become unecessary.
 932              *
 933              * If needs_texcoord_semantic is true, no semantic indices will be
 934              * consumed for the TEXi varyings, and we can base the locations of
 935              * the user varyings on VAR0.  Otherwise, we use TEX0 as base index.
 936              */
 937             assert(attr >= VARYING_SLOT_VAR0 || attr == VARYING_SLOT_PNTC ||
 938                    (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
 939             input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
 940             input_semantic_index[slot] = st_get_generic_varying_index(st, attr);
 941             if (attr == VARYING_SLOT_PNTC)
 942                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 943             else {
 944                interpMode[slot] = stfp->glsl_to_tgsi ?
 945                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
 946             }
 947             break;
 948          }
 949       }
 950       else {
 951          inputMapping[attr] = -1;
 952       }
 953    }
 954
 955    /*
 956     * Semantics and mapping for outputs
 957     */
 958    GLbitfield64 outputsWritten = stfp->Base.info.outputs_written;
 959
 960    /* if z is written, emit that first */
 961    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
 962       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
 963       fs_output_semantic_index[fs_num_outputs] = 0;
 964       outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
 965       fs_num_outputs++;
 966       outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
 967    }
 968
 969    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
 970       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
 971       fs_output_semantic_index[fs_num_outputs] = 0;
 972       outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
 973       fs_num_outputs++;
 974       outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
 975    }
 976
 977    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
 978       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_SAMPLEMASK;
 979       fs_output_semantic_index[fs_num_outputs] = 0;
 980       outputMapping[FRAG_RESULT_SAMPLE_MASK] = fs_num_outputs;
 981       fs_num_outputs++;
 982       outputsWritten &= ~(1 << FRAG_RESULT_SAMPLE_MASK);
 983    }
 984
 985    /* handle remaining outputs (color) */
 986    for (attr = 0; attr < ARRAY_SIZE(outputMapping); attr++) {
 987       const GLbitfield64 written = attr < FRAG_RESULT_MAX ? outputsWritten :
 988          stfp->Base.SecondaryOutputsWritten;
 989       const unsigned loc = attr % FRAG_RESULT_MAX;
 990
 991       if (written & BITFIELD64_BIT(loc)) {
 992          switch (loc) {
 993          case FRAG_RESULT_DEPTH:
 994          case FRAG_RESULT_STENCIL:
 995          case FRAG_RESULT_SAMPLE_MASK:
 996             /* handled above */
 997             assert(0);
 998             break;
 999          case FRAG_RESULT_COLOR:
1000             write_all = GL_TRUE; /* fallthrough */
1001          default: {
1002             int index;
1003             assert(loc == FRAG_RESULT_COLOR ||
1004                    (FRAG_RESULT_DATA0 <= loc && loc < FRAG_RESULT_MAX));
1005
1006             index = (loc == FRAG_RESULT_COLOR) ? 0 : (loc - FRAG_RESULT_DATA0);
1007
1008             if (attr >= FRAG_RESULT_MAX) {
1009                /* Secondary color for dual source blending. */
1010                assert(index == 0);
1011                index++;
1012             }
1013
1014             fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
1015             fs_output_semantic_index[fs_num_outputs] = index;
1016             outputMapping[attr] = fs_num_outputs;
1017             break;
1018          }
1019          }
1020
1021          fs_num_outputs++;
1022       }
1023    }
1024
1025    ureg = ureg_create_with_screen(PIPE_SHADER_FRAGMENT, st->pipe->screen);
1026    if (ureg == NULL)
1027       return false;
1028
1029    if (ST_DEBUG & DEBUG_MESA) {
1030       _mesa_print_program(&stfp->Base);
1031       _mesa_print_program_parameters(st->ctx, &stfp->Base);
1032       debug_printf("\n");
1033    }
1034    if (write_all == GL_TRUE)
1035       ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1);
1036
1037    if (stfp->Base.info.fs.depth_layout != FRAG_DEPTH_LAYOUT_NONE) {
1038       switch (stfp->Base.info.fs.depth_layout) {
1039       case FRAG_DEPTH_LAYOUT_ANY:
1040          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1041                        TGSI_FS_DEPTH_LAYOUT_ANY);
1042          break;
1043       case FRAG_DEPTH_LAYOUT_GREATER:
1044          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1045                        TGSI_FS_DEPTH_LAYOUT_GREATER);
1046          break;
1047       case FRAG_DEPTH_LAYOUT_LESS:
1048          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1049                        TGSI_FS_DEPTH_LAYOUT_LESS);
1050          break;
1051       case FRAG_DEPTH_LAYOUT_UNCHANGED:
1052          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1053                        TGSI_FS_DEPTH_LAYOUT_UNCHANGED);
1054          break;
1055       default:
1056          assert(0);
1057       }
1058    }
1059
1060    if (stfp->glsl_to_tgsi) {
1061       st_translate_program(st->ctx,
1062                            PIPE_SHADER_FRAGMENT,
1063                            ureg,
1064                            stfp->glsl_to_tgsi,
1065                            &stfp->Base,
1066                            /* inputs */
1067                            fs_num_inputs,
1068                            inputMapping,
1069                            inputSlotToAttr,
1070                            input_semantic_name,
1071                            input_semantic_index,
1072                            interpMode,
1073                            /* outputs */
1074                            fs_num_outputs,
1075                            outputMapping,
1076                            fs_output_semantic_name,
1077                            fs_output_semantic_index);
1078
1079       free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
1080    } else if (stfp->ati_fs)
1081       st_translate_atifs_program(ureg,
1082                                  stfp->ati_fs,
1083                                  &stfp->Base,
1084                                  /* inputs */
1085                                  fs_num_inputs,
1086                                  inputMapping,
1087                                  input_semantic_name,
1088                                  input_semantic_index,
1089                                  interpMode,
1090                                  /* outputs */
1091                                  fs_num_outputs,
1092                                  outputMapping,
1093                                  fs_output_semantic_name,
1094                                  fs_output_semantic_index);
1095    else
1096       st_translate_mesa_program(st->ctx,
1097                                 PIPE_SHADER_FRAGMENT,
1098                                 ureg,
1099                                 &stfp->Base,
1100                                 /* inputs */
1101                                 fs_num_inputs,
1102                                 inputMapping,
1103                                 input_semantic_name,
1104                                 input_semantic_index,
1105                                 interpMode,
1106                                 /* outputs */
1107                                 fs_num_outputs,
1108                                 outputMapping,
1109                                 fs_output_semantic_name,
1110                                 fs_output_semantic_index);
1111
1112    stfp->state.tokens = ureg_get_tokens(ureg, NULL);
1113    ureg_destroy(ureg);
1114
1115    if (stfp->glsl_to_tgsi) {
1116       stfp->glsl_to_tgsi = NULL;
1117       st_store_ir_in_disk_cache(st, &stfp->Base, false);
1118    }
1119
1120    return stfp->state.tokens != NULL;
1121 }
1122
1123 static struct st_fp_variant *
1124 st_create_fp_variant(struct st_context *st,
1125                      struct st_program *stfp,
1126                      const struct st_fp_variant_key *key)
1127 {
1128    struct pipe_context *pipe = st->pipe;
1129    struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
1130    struct pipe_shader_state state = {0};
1131    struct gl_program_parameter_list *params = stfp->Base.Parameters;
1132    static const gl_state_index16 texcoord_state[STATE_LENGTH] =
1133       { STATE_INTERNAL, STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
1134    static const gl_state_index16 scale_state[STATE_LENGTH] =
1135       { STATE_INTERNAL, STATE_PT_SCALE };
1136    static const gl_state_index16 bias_state[STATE_LENGTH] =
1137       { STATE_INTERNAL, STATE_PT_BIAS };
1138    static const gl_state_index16 alpha_ref_state[STATE_LENGTH] =
1139       { STATE_INTERNAL, STATE_ALPHA_REF };
1140
1141    if (!variant)
1142       return NULL;
1143
1144    if (stfp->state.type == PIPE_SHADER_IR_NIR) {
1145       bool finalize = false;
1146
1147       state.type = PIPE_SHADER_IR_NIR;
1148       state.ir.nir = nir_shader_clone(NULL, stfp->Base.nir);
1149
1150       if (key->clamp_color) {
1151          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1152          finalize = true;
1153       }
1154
1155       if (key->lower_flatshade) {
1156          NIR_PASS_V(state.ir.nir, nir_lower_flatshade);
1157          finalize = true;
1158       }
1159
1160       if (key->lower_alpha_func != COMPARE_FUNC_NEVER) {
1161          _mesa_add_state_reference(params, alpha_ref_state);
1162          NIR_PASS_V(state.ir.nir, nir_lower_alpha_test, key->lower_alpha_func,
1163                     false, alpha_ref_state);
1164          finalize = true;
1165       }
1166
1167       if (key->lower_two_sided_color) {
1168          NIR_PASS_V(state.ir.nir, nir_lower_two_sided_color);
1169          finalize = true;
1170       }
1171
1172       if (key->persample_shading) {
1173           nir_shader *shader = state.ir.nir;
1174           nir_foreach_variable(var, &shader->inputs)
1175              var->data.sample = true;
1176           finalize = true;
1177       }
1178
1179       assert(!(key->bitmap && key->drawpixels));
1180
1181       /* glBitmap */
1182       if (key->bitmap) {
1183          nir_lower_bitmap_options options = {0};
1184
1185          variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1186          options.sampler = variant->bitmap_sampler;
1187          options.swizzle_xxxx = st->bitmap.tex_format == PIPE_FORMAT_R8_UNORM;
1188
1189          NIR_PASS_V(state.ir.nir, nir_lower_bitmap, &options);
1190          finalize = true;
1191       }
1192
1193       /* glDrawPixels (color only) */
1194       if (key->drawpixels) {
1195          nir_lower_drawpixels_options options = {{0}};
1196          unsigned samplers_used = stfp->Base.SamplersUsed;
1197
1198          /* Find the first unused slot. */
1199          variant->drawpix_sampler = ffs(~samplers_used) - 1;
1200          options.drawpix_sampler = variant->drawpix_sampler;
1201          samplers_used |= (1 << variant->drawpix_sampler);
1202
1203          options.pixel_maps = key->pixelMaps;
1204          if (key->pixelMaps) {
1205             variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1206             options.pixelmap_sampler = variant->pixelmap_sampler;
1207          }
1208
1209          options.scale_and_bias = key->scaleAndBias;
1210          if (key->scaleAndBias) {
1211             _mesa_add_state_reference(params, scale_state);
1212             memcpy(options.scale_state_tokens, scale_state,
1213                    sizeof(options.scale_state_tokens));
1214             _mesa_add_state_reference(params, bias_state);
1215             memcpy(options.bias_state_tokens, bias_state,
1216                    sizeof(options.bias_state_tokens));
1217          }
1218
1219          _mesa_add_state_reference(params, texcoord_state);
1220          memcpy(options.texcoord_state_tokens, texcoord_state,
1221                 sizeof(options.texcoord_state_tokens));
1222
1223          NIR_PASS_V(state.ir.nir, nir_lower_drawpixels, &options);
1224          finalize = true;
1225       }
1226
1227       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1228                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1229                    key->external.lower_ayuv || key->external.lower_xyuv)) {
1230
1231          st_nir_lower_samplers(pipe->screen, state.ir.nir,
1232                                stfp->shader_program, &stfp->Base);
1233
1234          nir_lower_tex_options options = {0};
1235          options.lower_y_uv_external = key->external.lower_nv12;
1236          options.lower_y_u_v_external = key->external.lower_iyuv;
1237          options.lower_xy_uxvx_external = key->external.lower_xy_uxvx;
1238          options.lower_yx_xuxv_external = key->external.lower_yx_xuxv;
1239          options.lower_ayuv_external = key->external.lower_ayuv;
1240          options.lower_xyuv_external = key->external.lower_xyuv;
1241          NIR_PASS_V(state.ir.nir, nir_lower_tex, &options);
1242          finalize = true;
1243       }
1244
1245       if (finalize || !st->allow_st_finalize_nir_twice) {
1246          st_finalize_nir(st, &stfp->Base, stfp->shader_program, state.ir.nir,
1247                          false);
1248       }
1249
1250       /* This pass needs to happen *after* nir_lower_sampler */
1251       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1252                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1253                    key->external.lower_ayuv || key->external.lower_xyuv)) {
1254          NIR_PASS_V(state.ir.nir, st_nir_lower_tex_src_plane,
1255                     ~stfp->Base.SamplersUsed,
1256                     key->external.lower_nv12 || key->external.lower_xy_uxvx ||
1257                        key->external.lower_yx_xuxv,
1258                     key->external.lower_iyuv);
1259          finalize = true;
1260       }
1261
1262       if (finalize || !st->allow_st_finalize_nir_twice) {
1263          /* Some of the lowering above may have introduced new varyings */
1264          nir_shader_gather_info(state.ir.nir,
1265                                 nir_shader_get_entrypoint(state.ir.nir));
1266
1267          struct pipe_screen *screen = pipe->screen;
1268          if (screen->finalize_nir)
1269             screen->finalize_nir(screen, state.ir.nir, false);
1270       }
1271
1272       if (ST_DEBUG & DEBUG_PRINT_IR)
1273          nir_print_shader(state.ir.nir, stderr);
1274
1275       variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1276       variant->key = *key;
1277
1278       return variant;
1279    }
1280
1281    state.tokens = stfp->state.tokens;
1282
1283    assert(!(key->bitmap && key->drawpixels));
1284
1285    /* Fix texture targets and add fog for ATI_fs */
1286    if (stfp->ati_fs) {
1287       const struct tgsi_token *tokens = st_fixup_atifs(state.tokens, key);
1288
1289       if (tokens)
1290          state.tokens = tokens;
1291       else
1292          fprintf(stderr, "mesa: cannot post-process ATI_fs\n");
1293    }
1294
1295    /* Emulate features. */
1296    if (key->clamp_color || key->persample_shading) {
1297       const struct tgsi_token *tokens;
1298       unsigned flags =
1299          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
1300          (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
1301
1302       tokens = tgsi_emulate(state.tokens, flags);
1303
1304       if (tokens) {
1305          if (state.tokens != stfp->state.tokens)
1306             tgsi_free_tokens(state.tokens);
1307          state.tokens = tokens;
1308       } else
1309          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
1310    }
1311
1312    /* glBitmap */
1313    if (key->bitmap) {
1314       const struct tgsi_token *tokens;
1315
1316       variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1317
1318       tokens = st_get_bitmap_shader(state.tokens,
1319                                     st->internal_target,
1320                                     variant->bitmap_sampler,
1321                                     st->needs_texcoord_semantic,
1322                                     st->bitmap.tex_format ==
1323                                     PIPE_FORMAT_R8_UNORM);
1324
1325       if (tokens) {
1326          if (state.tokens != stfp->state.tokens)
1327             tgsi_free_tokens(state.tokens);
1328          state.tokens = tokens;
1329       } else
1330          fprintf(stderr, "mesa: cannot create a shader for glBitmap\n");
1331    }
1332
1333    /* glDrawPixels (color only) */
1334    if (key->drawpixels) {
1335       const struct tgsi_token *tokens;
1336       unsigned scale_const = 0, bias_const = 0, texcoord_const = 0;
1337
1338       /* Find the first unused slot. */
1339       variant->drawpix_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1340
1341       if (key->pixelMaps) {
1342          unsigned samplers_used = stfp->Base.SamplersUsed |
1343                                   (1 << variant->drawpix_sampler);
1344
1345          variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1346       }
1347
1348       if (key->scaleAndBias) {
1349          scale_const = _mesa_add_state_reference(params, scale_state);
1350          bias_const = _mesa_add_state_reference(params, bias_state);
1351       }
1352
1353       texcoord_const = _mesa_add_state_reference(params, texcoord_state);
1354
1355       tokens = st_get_drawpix_shader(state.tokens,
1356                                      st->needs_texcoord_semantic,
1357                                      key->scaleAndBias, scale_const,
1358                                      bias_const, key->pixelMaps,
1359                                      variant->drawpix_sampler,
1360                                      variant->pixelmap_sampler,
1361                                      texcoord_const, st->internal_target);
1362
1363       if (tokens) {
1364          if (state.tokens != stfp->state.tokens)
1365             tgsi_free_tokens(state.tokens);
1366          state.tokens = tokens;
1367       } else
1368          fprintf(stderr, "mesa: cannot create a shader for glDrawPixels\n");
1369    }
1370
1371    if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1372                 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1373       const struct tgsi_token *tokens;
1374
1375       /* samplers inserted would conflict, but this should be unpossible: */
1376       assert(!(key->bitmap || key->drawpixels));
1377
1378       tokens = st_tgsi_lower_yuv(state.tokens,
1379                                  ~stfp->Base.SamplersUsed,
1380                                  key->external.lower_nv12 ||
1381                                     key->external.lower_xy_uxvx ||
1382                                     key->external.lower_yx_xuxv,
1383                                  key->external.lower_iyuv);
1384       if (tokens) {
1385          if (state.tokens != stfp->state.tokens)
1386             tgsi_free_tokens(state.tokens);
1387          state.tokens = tokens;
1388       } else {
1389          fprintf(stderr, "mesa: cannot create a shader for samplerExternalOES\n");
1390       }
1391    }
1392
1393    if (key->lower_depth_clamp) {
1394       unsigned depth_range_const = _mesa_add_state_reference(params, depth_range_state);
1395
1396       const struct tgsi_token *tokens;
1397       tokens = st_tgsi_lower_depth_clamp_fs(state.tokens, depth_range_const);
1398       if (state.tokens != stfp->state.tokens)
1399          tgsi_free_tokens(state.tokens);
1400       state.tokens = tokens;
1401    }
1402
1403    if (ST_DEBUG & DEBUG_PRINT_IR)
1404       tgsi_dump(state.tokens, 0);
1405
1406    /* fill in variant */
1407    variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1408    variant->key = *key;
1409
1410    if (state.tokens != stfp->state.tokens)
1411       tgsi_free_tokens(state.tokens);
1412    return variant;
1413 }
1414
1415 /**
1416  * Translate fragment program if needed.
1417  */
1418 struct st_fp_variant *
1419 st_get_fp_variant(struct st_context *st,
1420                   struct st_program *stfp,
1421                   const struct st_fp_variant_key *key)
1422 {
1423    struct st_fp_variant *fpv;
1424
1425    /* Search for existing variant */
1426    for (fpv = st_fp_variant(stfp->variants); fpv;
1427         fpv = st_fp_variant(fpv->base.next)) {
1428       if (memcmp(&fpv->key, key, sizeof(*key)) == 0) {
1429          break;
1430       }
1431    }
1432
1433    if (!fpv) {
1434       /* create new */
1435       fpv = st_create_fp_variant(st, stfp, key);
1436       if (fpv) {
1437          fpv->base.st = key->st;
1438
1439          if (key->bitmap || key->drawpixels) {
1440             /* Regular variants should always come before the
1441              * bitmap & drawpixels variants, (unless there
1442              * are no regular variants) so that
1443              * st_update_fp can take a fast path when
1444              * shader_has_one_variant is set.
1445              */
1446             if (!stfp->variants) {
1447                stfp->variants = &fpv->base;
1448             } else {
1449                /* insert into list after the first one */
1450                fpv->base.next = stfp->variants->next;
1451                stfp->variants->next = &fpv->base;
1452             }
1453          } else {
1454             /* insert into list */
1455             fpv->base.next = stfp->variants;
1456             stfp->variants = &fpv->base;
1457          }
1458       }
1459    }
1460
1461    return fpv;
1462 }
1463
1464 /**
1465  * Translate a program. This is common code for geometry and tessellation
1466  * shaders.
1467  */
1468 bool
1469 st_translate_common_program(struct st_context *st,
1470                             struct st_program *stp)
1471 {
1472    struct gl_program *prog = &stp->Base;
1473    enum pipe_shader_type stage =
1474       pipe_shader_type_from_mesa(stp->Base.info.stage);
1475    struct ureg_program *ureg = ureg_create_with_screen(stage, st->pipe->screen);
1476
1477    if (ureg == NULL)
1478       return false;
1479
1480    switch (stage) {
1481    case PIPE_SHADER_TESS_CTRL:
1482       ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT,
1483                     stp->Base.info.tess.tcs_vertices_out);
1484       break;
1485
1486    case PIPE_SHADER_TESS_EVAL:
1487       if (stp->Base.info.tess.primitive_mode == GL_ISOLINES)
1488          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, GL_LINES);
1489       else
1490          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE,
1491                        stp->Base.info.tess.primitive_mode);
1492
1493       STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL);
1494       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 ==
1495                     PIPE_TESS_SPACING_FRACTIONAL_ODD);
1496       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 ==
1497                     PIPE_TESS_SPACING_FRACTIONAL_EVEN);
1498
1499       ureg_property(ureg, TGSI_PROPERTY_TES_SPACING,
1500                     (stp->Base.info.tess.spacing + 1) % 3);
1501
1502       ureg_property(ureg, TGSI_PROPERTY_TES_VERTEX_ORDER_CW,
1503                     !stp->Base.info.tess.ccw);
1504       ureg_property(ureg, TGSI_PROPERTY_TES_POINT_MODE,
1505                     stp->Base.info.tess.point_mode);
1506       break;
1507
1508    case PIPE_SHADER_GEOMETRY:
1509       ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM,
1510                     stp->Base.info.gs.input_primitive);
1511       ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM,
1512                     stp->Base.info.gs.output_primitive);
1513       ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
1514                     stp->Base.info.gs.vertices_out);
1515       ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS,
1516                     stp->Base.info.gs.invocations);
1517       break;
1518
1519    default:
1520       break;
1521    }
1522
1523    ubyte inputSlotToAttr[VARYING_SLOT_TESS_MAX];
1524    ubyte inputMapping[VARYING_SLOT_TESS_MAX];
1525    ubyte outputMapping[VARYING_SLOT_TESS_MAX];
1526    GLuint attr;
1527
1528    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
1529    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
1530    uint num_inputs = 0;
1531
1532    ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
1533    ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
1534    uint num_outputs = 0;
1535
1536    GLint i;
1537
1538    memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr));
1539    memset(inputMapping, 0, sizeof(inputMapping));
1540    memset(outputMapping, 0, sizeof(outputMapping));
1541    memset(&stp->state, 0, sizeof(stp->state));
1542
1543    if (prog->info.clip_distance_array_size)
1544       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
1545                     prog->info.clip_distance_array_size);
1546    if (prog->info.cull_distance_array_size)
1547       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
1548                     prog->info.cull_distance_array_size);
1549
1550    /*
1551     * Convert Mesa program inputs to TGSI input register semantics.
1552     */
1553    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1554       if ((prog->info.inputs_read & BITFIELD64_BIT(attr)) == 0)
1555          continue;
1556
1557       unsigned slot = num_inputs++;
1558
1559       inputMapping[attr] = slot;
1560       inputSlotToAttr[slot] = attr;
1561
1562       unsigned semantic_name, semantic_index;
1563       tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1564                                    &semantic_name, &semantic_index);
1565       input_semantic_name[slot] = semantic_name;
1566       input_semantic_index[slot] = semantic_index;
1567    }
1568
1569    /* Also add patch inputs. */
1570    for (attr = 0; attr < 32; attr++) {
1571       if (prog->info.patch_inputs_read & (1u << attr)) {
1572          GLuint slot = num_inputs++;
1573          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1574
1575          inputMapping[patch_attr] = slot;
1576          inputSlotToAttr[slot] = patch_attr;
1577          input_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1578          input_semantic_index[slot] = attr;
1579       }
1580    }
1581
1582    /* initialize output semantics to defaults */
1583    for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
1584       output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
1585       output_semantic_index[i] = 0;
1586    }
1587
1588    /*
1589     * Determine number of outputs, the (default) output register
1590     * mapping and the semantic information for each output.
1591     */
1592    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1593       if (prog->info.outputs_written & BITFIELD64_BIT(attr)) {
1594          GLuint slot = num_outputs++;
1595
1596          outputMapping[attr] = slot;
1597
1598          unsigned semantic_name, semantic_index;
1599          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1600                                       &semantic_name, &semantic_index);
1601          output_semantic_name[slot] = semantic_name;
1602          output_semantic_index[slot] = semantic_index;
1603       }
1604    }
1605
1606    /* Also add patch outputs. */
1607    for (attr = 0; attr < 32; attr++) {
1608       if (prog->info.patch_outputs_written & (1u << attr)) {
1609          GLuint slot = num_outputs++;
1610          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1611
1612          outputMapping[patch_attr] = slot;
1613          output_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1614          output_semantic_index[slot] = attr;
1615       }
1616    }
1617
1618    st_translate_program(st->ctx,
1619                         stage,
1620                         ureg,
1621                         stp->glsl_to_tgsi,
1622                         prog,
1623                         /* inputs */
1624                         num_inputs,
1625                         inputMapping,
1626                         inputSlotToAttr,
1627                         input_semantic_name,
1628                         input_semantic_index,
1629                         NULL,
1630                         /* outputs */
1631                         num_outputs,
1632                         outputMapping,
1633                         output_semantic_name,
1634                         output_semantic_index);
1635
1636    stp->state.tokens = ureg_get_tokens(ureg, NULL);
1637
1638    ureg_destroy(ureg);
1639
1640    st_translate_stream_output_info(prog);
1641
1642    st_store_ir_in_disk_cache(st, prog, false);
1643
1644    if (ST_DEBUG & DEBUG_PRINT_IR && ST_DEBUG & DEBUG_MESA)
1645       _mesa_print_program(prog);
1646
1647    free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
1648    stp->glsl_to_tgsi = NULL;
1649    return true;
1650 }
1651
1652
1653 /**
1654  * Get/create a basic program variant.
1655  */
1656 struct st_variant *
1657 st_get_common_variant(struct st_context *st,
1658                       struct st_program *prog,
1659                       const struct st_common_variant_key *key)
1660 {
1661    struct pipe_context *pipe = st->pipe;
1662    struct st_variant *v;
1663    struct pipe_shader_state state = {0};
1664
1665    /* Search for existing variant */
1666    for (v = prog->variants; v; v = v->next) {
1667       if (memcmp(&st_common_variant(v)->key, key, sizeof(*key)) == 0)
1668          break;
1669    }
1670
1671    if (!v) {
1672       /* create new */
1673       v = (struct st_variant*)CALLOC_STRUCT(st_common_variant);
1674       if (v) {
1675          if (prog->state.type == PIPE_SHADER_IR_NIR) {
1676             bool finalize = false;
1677
1678             state.type = PIPE_SHADER_IR_NIR;
1679             state.ir.nir = nir_shader_clone(NULL, prog->Base.nir);
1680
1681             if (key->clamp_color) {
1682                NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1683                finalize = true;
1684             }
1685
1686             state.stream_output = prog->state.stream_output;
1687
1688             if (finalize || !st->allow_st_finalize_nir_twice) {
1689                st_finalize_nir(st, &prog->Base, prog->shader_program,
1690                                state.ir.nir, true);
1691             }
1692
1693             if (ST_DEBUG & DEBUG_PRINT_IR)
1694                nir_print_shader(state.ir.nir, stderr);
1695          } else {
1696             if (key->lower_depth_clamp) {
1697                struct gl_program_parameter_list *params = prog->Base.Parameters;
1698
1699                unsigned depth_range_const =
1700                      _mesa_add_state_reference(params, depth_range_state);
1701
1702                const struct tgsi_token *tokens;
1703                tokens =
1704                      st_tgsi_lower_depth_clamp(prog->state.tokens,
1705                                                depth_range_const,
1706                                                key->clip_negative_one_to_one);
1707
1708                if (tokens != prog->state.tokens)
1709                   tgsi_free_tokens(prog->state.tokens);
1710
1711                prog->state.tokens = tokens;
1712             }
1713             state = prog->state;
1714
1715             if (ST_DEBUG & DEBUG_PRINT_IR)
1716                tgsi_dump(state.tokens, 0);
1717          }
1718          /* fill in new variant */
1719          switch (prog->Base.info.stage) {
1720          case MESA_SHADER_TESS_CTRL:
1721             v->driver_shader = pipe->create_tcs_state(pipe, &state);
1722             break;
1723          case MESA_SHADER_TESS_EVAL:
1724             v->driver_shader = pipe->create_tes_state(pipe, &state);
1725             break;
1726          case MESA_SHADER_GEOMETRY:
1727             v->driver_shader = pipe->create_gs_state(pipe, &state);
1728             break;
1729          case MESA_SHADER_COMPUTE: {
1730             struct pipe_compute_state cs = {0};
1731             cs.ir_type = state.type;
1732             cs.req_local_mem = prog->Base.info.cs.shared_size;
1733
1734             if (state.type == PIPE_SHADER_IR_NIR)
1735                cs.prog = state.ir.nir;
1736             else
1737                cs.prog = state.tokens;
1738
1739             v->driver_shader = pipe->create_compute_state(pipe, &cs);
1740             break;
1741          }
1742          default:
1743             assert(!"unhandled shader type");
1744             free(v);
1745             return NULL;
1746          }
1747
1748          st_common_variant(v)->key = *key;
1749          v->st = key->st;
1750
1751          /* insert into list */
1752          v->next = prog->variants;
1753          prog->variants = v;
1754       }
1755    }
1756
1757    return v;
1758 }
1759
1760
1761 /**
1762  * Vert/Geom/Frag programs have per-context variants.  Free all the
1763  * variants attached to the given program which match the given context.
1764  */
1765 static void
1766 destroy_program_variants(struct st_context *st, struct gl_program *target)
1767 {
1768    if (!target || target == &_mesa_DummyProgram)
1769       return;
1770
1771    struct st_program *p = st_program(target);
1772    struct st_variant *v, **prevPtr = &p->variants;
1773
1774    for (v = p->variants; v; ) {
1775       struct st_variant *next = v->next;
1776       if (v->st == st) {
1777          /* unlink from list */
1778          *prevPtr = next;
1779          /* destroy this variant */
1780          delete_variant(st, v, target->Target);
1781       }
1782       else {
1783          prevPtr = &v->next;
1784       }
1785       v = next;
1786    }
1787 }
1788
1789
1790 /**
1791  * Callback for _mesa_HashWalk.  Free all the shader's program variants
1792  * which match the given context.
1793  */
1794 static void
1795 destroy_shader_program_variants_cb(GLuint key, void *data, void *userData)
1796 {
1797    struct st_context *st = (struct st_context *) userData;
1798    struct gl_shader *shader = (struct gl_shader *) data;
1799
1800    switch (shader->Type) {
1801    case GL_SHADER_PROGRAM_MESA:
1802       {
1803          struct gl_shader_program *shProg = (struct gl_shader_program *) data;
1804          GLuint i;
1805
1806          for (i = 0; i < ARRAY_SIZE(shProg->_LinkedShaders); i++) {
1807             if (shProg->_LinkedShaders[i])
1808                destroy_program_variants(st, shProg->_LinkedShaders[i]->Program);
1809          }
1810       }
1811       break;
1812    case GL_VERTEX_SHADER:
1813    case GL_FRAGMENT_SHADER:
1814    case GL_GEOMETRY_SHADER:
1815    case GL_TESS_CONTROL_SHADER:
1816    case GL_TESS_EVALUATION_SHADER:
1817    case GL_COMPUTE_SHADER:
1818       break;
1819    default:
1820       assert(0);
1821    }
1822 }
1823
1824
1825 /**
1826  * Callback for _mesa_HashWalk.  Free all the program variants which match
1827  * the given context.
1828  */
1829 static void
1830 destroy_program_variants_cb(GLuint key, void *data, void *userData)
1831 {
1832    struct st_context *st = (struct st_context *) userData;
1833    struct gl_program *program = (struct gl_program *) data;
1834    destroy_program_variants(st, program);
1835 }
1836
1837
1838 /**
1839  * Walk over all shaders and programs to delete any variants which
1840  * belong to the given context.
1841  * This is called during context tear-down.
1842  */
1843 void
1844 st_destroy_program_variants(struct st_context *st)
1845 {
1846    /* If shaders can be shared with other contexts, the last context will
1847     * call DeleteProgram on all shaders, releasing everything.
1848     */
1849    if (st->has_shareable_shaders)
1850       return;
1851
1852    /* ARB vert/frag program */
1853    _mesa_HashWalk(st->ctx->Shared->Programs,
1854                   destroy_program_variants_cb, st);
1855
1856    /* GLSL vert/frag/geom shaders */
1857    _mesa_HashWalk(st->ctx->Shared->ShaderObjects,
1858                   destroy_shader_program_variants_cb, st);
1859 }
1860
1861
1862 /**
1863  * Compile one shader variant.
1864  */
1865 static void
1866 st_precompile_shader_variant(struct st_context *st,
1867                              struct gl_program *prog)
1868 {
1869    switch (prog->Target) {
1870    case GL_VERTEX_PROGRAM_ARB: {
1871       struct st_program *p = (struct st_program *)prog;
1872       struct st_common_variant_key key;
1873
1874       memset(&key, 0, sizeof(key));
1875
1876       key.st = st->has_shareable_shaders ? NULL : st;
1877       st_get_vp_variant(st, p, &key);
1878       break;
1879    }
1880
1881    case GL_FRAGMENT_PROGRAM_ARB: {
1882       struct st_program *p = (struct st_program *)prog;
1883       struct st_fp_variant_key key;
1884
1885       memset(&key, 0, sizeof(key));
1886
1887       key.st = st->has_shareable_shaders ? NULL : st;
1888       st_get_fp_variant(st, p, &key);
1889       break;
1890    }
1891
1892    case GL_TESS_CONTROL_PROGRAM_NV:
1893    case GL_TESS_EVALUATION_PROGRAM_NV:
1894    case GL_GEOMETRY_PROGRAM_NV:
1895    case GL_COMPUTE_PROGRAM_NV: {
1896       struct st_program *p = st_program(prog);
1897       struct st_common_variant_key key;
1898
1899       memset(&key, 0, sizeof(key));
1900
1901       key.st = st->has_shareable_shaders ? NULL : st;
1902       st_get_common_variant(st, p, &key);
1903       break;
1904    }
1905
1906    default:
1907       assert(0);
1908    }
1909 }
1910
1911 void
1912 st_finalize_program(struct st_context *st, struct gl_program *prog)
1913 {
1914    if (st->current_program[prog->info.stage] == prog) {
1915       if (prog->info.stage == MESA_SHADER_VERTEX)
1916          st->dirty |= ST_NEW_VERTEX_PROGRAM(st, (struct st_program *)prog);
1917       else
1918          st->dirty |= ((struct st_program *)prog)->affected_states;
1919    }
1920
1921    if (prog->nir)
1922       nir_sweep(prog->nir);
1923
1924    /* Create Gallium shaders now instead of on demand. */
1925    if (ST_DEBUG & DEBUG_PRECOMPILE ||
1926        st->shader_has_one_variant[prog->info.stage])
1927       st_precompile_shader_variant(st, prog);
1928 }