src/mesa/state_tracker/st_program.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   *   Brian Paul
  31   */
  32
  33
  34 #include "main/errors.h"
  35 #include "main/imports.h"
  36 #include "main/hash.h"
  37 #include "main/mtypes.h"
  38 #include "program/prog_parameter.h"
  39 #include "program/prog_print.h"
  40 #include "program/prog_to_nir.h"
  41 #include "program/programopt.h"
  42
  43 #include "compiler/nir/nir.h"
  44 #include "draw/draw_context.h"
  45
  46 #include "pipe/p_context.h"
  47 #include "pipe/p_defines.h"
  48 #include "pipe/p_shader_tokens.h"
  49 #include "draw/draw_context.h"
  50 #include "tgsi/tgsi_dump.h"
  51 #include "tgsi/tgsi_emulate.h"
  52 #include "tgsi/tgsi_parse.h"
  53 #include "tgsi/tgsi_ureg.h"
  54
  55 #include "st_debug.h"
  56 #include "st_cb_bitmap.h"
  57 #include "st_cb_drawpixels.h"
  58 #include "st_context.h"
  59 #include "st_tgsi_lower_depth_clamp.h"
  60 #include "st_tgsi_lower_yuv.h"
  61 #include "st_program.h"
  62 #include "st_mesa_to_tgsi.h"
  63 #include "st_atifs_to_tgsi.h"
  64 #include "st_nir.h"
  65 #include "st_shader_cache.h"
  66 #include "st_util.h"
  67 #include "cso_cache/cso_context.h"
  68
  69
  70
  71 static void
  72 set_affected_state_flags(uint64_t *states,
  73                          struct gl_program *prog,
  74                          uint64_t new_constants,
  75                          uint64_t new_sampler_views,
  76                          uint64_t new_samplers,
  77                          uint64_t new_images,
  78                          uint64_t new_ubos,
  79                          uint64_t new_ssbos,
  80                          uint64_t new_atomics)
  81 {
  82    if (prog->Parameters->NumParameters)
  83       *states |= new_constants;
  84
  85    if (prog->info.num_textures)
  86       *states |= new_sampler_views | new_samplers;
  87
  88    if (prog->info.num_images)
  89       *states |= new_images;
  90
  91    if (prog->info.num_ubos)
  92       *states |= new_ubos;
  93
  94    if (prog->info.num_ssbos)
  95       *states |= new_ssbos;
  96
  97    if (prog->info.num_abos)
  98       *states |= new_atomics;
  99 }
 100
 101 /**
 102  * This determines which states will be updated when the shader is bound.
 103  */
 104 void
 105 st_set_prog_affected_state_flags(struct gl_program *prog)
 106 {
 107    uint64_t *states;
 108
 109    switch (prog->info.stage) {
 110    case MESA_SHADER_VERTEX:
 111       states = &((struct st_program*)prog)->affected_states;
 112
 113       *states = ST_NEW_VS_STATE |
 114                 ST_NEW_RASTERIZER |
 115                 ST_NEW_VERTEX_ARRAYS;
 116
 117       set_affected_state_flags(states, prog,
 118                                ST_NEW_VS_CONSTANTS,
 119                                ST_NEW_VS_SAMPLER_VIEWS,
 120                                ST_NEW_VS_SAMPLERS,
 121                                ST_NEW_VS_IMAGES,
 122                                ST_NEW_VS_UBOS,
 123                                ST_NEW_VS_SSBOS,
 124                                ST_NEW_VS_ATOMICS);
 125       break;
 126
 127    case MESA_SHADER_TESS_CTRL:
 128       states = &(st_program(prog))->affected_states;
 129
 130       *states = ST_NEW_TCS_STATE;
 131
 132       set_affected_state_flags(states, prog,
 133                                ST_NEW_TCS_CONSTANTS,
 134                                ST_NEW_TCS_SAMPLER_VIEWS,
 135                                ST_NEW_TCS_SAMPLERS,
 136                                ST_NEW_TCS_IMAGES,
 137                                ST_NEW_TCS_UBOS,
 138                                ST_NEW_TCS_SSBOS,
 139                                ST_NEW_TCS_ATOMICS);
 140       break;
 141
 142    case MESA_SHADER_TESS_EVAL:
 143       states = &(st_program(prog))->affected_states;
 144
 145       *states = ST_NEW_TES_STATE |
 146                 ST_NEW_RASTERIZER;
 147
 148       set_affected_state_flags(states, prog,
 149                                ST_NEW_TES_CONSTANTS,
 150                                ST_NEW_TES_SAMPLER_VIEWS,
 151                                ST_NEW_TES_SAMPLERS,
 152                                ST_NEW_TES_IMAGES,
 153                                ST_NEW_TES_UBOS,
 154                                ST_NEW_TES_SSBOS,
 155                                ST_NEW_TES_ATOMICS);
 156       break;
 157
 158    case MESA_SHADER_GEOMETRY:
 159       states = &(st_program(prog))->affected_states;
 160
 161       *states = ST_NEW_GS_STATE |
 162                 ST_NEW_RASTERIZER;
 163
 164       set_affected_state_flags(states, prog,
 165                                ST_NEW_GS_CONSTANTS,
 166                                ST_NEW_GS_SAMPLER_VIEWS,
 167                                ST_NEW_GS_SAMPLERS,
 168                                ST_NEW_GS_IMAGES,
 169                                ST_NEW_GS_UBOS,
 170                                ST_NEW_GS_SSBOS,
 171                                ST_NEW_GS_ATOMICS);
 172       break;
 173
 174    case MESA_SHADER_FRAGMENT:
 175       states = &((struct st_program*)prog)->affected_states;
 176
 177       /* gl_FragCoord and glDrawPixels always use constants. */
 178       *states = ST_NEW_FS_STATE |
 179                 ST_NEW_SAMPLE_SHADING |
 180                 ST_NEW_FS_CONSTANTS;
 181
 182       set_affected_state_flags(states, prog,
 183                                ST_NEW_FS_CONSTANTS,
 184                                ST_NEW_FS_SAMPLER_VIEWS,
 185                                ST_NEW_FS_SAMPLERS,
 186                                ST_NEW_FS_IMAGES,
 187                                ST_NEW_FS_UBOS,
 188                                ST_NEW_FS_SSBOS,
 189                                ST_NEW_FS_ATOMICS);
 190       break;
 191
 192    case MESA_SHADER_COMPUTE:
 193       states = &((struct st_program*)prog)->affected_states;
 194
 195       *states = ST_NEW_CS_STATE;
 196
 197       set_affected_state_flags(states, prog,
 198                                ST_NEW_CS_CONSTANTS,
 199                                ST_NEW_CS_SAMPLER_VIEWS,
 200                                ST_NEW_CS_SAMPLERS,
 201                                ST_NEW_CS_IMAGES,
 202                                ST_NEW_CS_UBOS,
 203                                ST_NEW_CS_SSBOS,
 204                                ST_NEW_CS_ATOMICS);
 205       break;
 206
 207    default:
 208       unreachable("unhandled shader stage");
 209    }
 210 }
 211
 212
 213 /**
 214  * Delete a shader variant.  Note the caller must unlink the variant from
 215  * the linked list.
 216  */
 217 static void
 218 delete_variant(struct st_context *st, struct st_variant *v, GLenum target)
 219 {
 220    if (v->driver_shader) {
 221       if (st->has_shareable_shaders || v->st == st) {
 222          /* The shader's context matches the calling context, or we
 223           * don't care.
 224           */
 225          switch (target) {
 226          case GL_VERTEX_PROGRAM_ARB:
 227             cso_delete_vertex_shader(st->cso_context, v->driver_shader);
 228             break;
 229          case GL_TESS_CONTROL_PROGRAM_NV:
 230             cso_delete_tessctrl_shader(st->cso_context, v->driver_shader);
 231             break;
 232          case GL_TESS_EVALUATION_PROGRAM_NV:
 233             cso_delete_tesseval_shader(st->cso_context, v->driver_shader);
 234             break;
 235          case GL_GEOMETRY_PROGRAM_NV:
 236             cso_delete_geometry_shader(st->cso_context, v->driver_shader);
 237             break;
 238          case GL_FRAGMENT_PROGRAM_ARB:
 239             cso_delete_fragment_shader(st->cso_context, v->driver_shader);
 240             break;
 241          case GL_COMPUTE_PROGRAM_NV:
 242             cso_delete_compute_shader(st->cso_context, v->driver_shader);
 243             break;
 244          default:
 245             unreachable("bad shader type in delete_basic_variant");
 246          }
 247       } else {
 248          /* We can't delete a shader with a context different from the one
 249           * that created it.  Add it to the creating context's zombie list.
 250           */
 251          enum pipe_shader_type type =
 252             pipe_shader_type_from_mesa(_mesa_program_enum_to_shader_stage(target));
 253
 254          st_save_zombie_shader(v->st, type, v->driver_shader);
 255       }
 256    }
 257
 258    free(v);
 259 }
 260
 261
 262 /**
 263  * Free all basic program variants.
 264  */
 265 void
 266 st_release_variants(struct st_context *st, struct st_program *p)
 267 {
 268    struct st_variant *v;
 269
 270    for (v = p->variants; v; ) {
 271       struct st_variant *next = v->next;
 272       delete_variant(st, v, p->Base.Target);
 273       v = next;
 274    }
 275
 276    p->variants = NULL;
 277
 278    if (p->state.tokens) {
 279       ureg_free_tokens(p->state.tokens);
 280       p->state.tokens = NULL;
 281    }
 282
 283    /* Note: Any setup of ->ir.nir that has had pipe->create_*_state called on
 284     * it has resulted in the driver taking ownership of the NIR.  Those
 285     * callers should be NULLing out the nir field in any pipe_shader_state
 286     * that might have this called in order to indicate that.
 287     *
 288     * GLSL IR and ARB programs will have set gl_program->nir to the same
 289     * shader as ir->ir.nir, so it will be freed by _mesa_delete_program().
 290     */
 291 }
 292
 293 void
 294 st_finalize_nir_before_variants(struct nir_shader *nir)
 295 {
 296    NIR_PASS_V(nir, nir_opt_access);
 297
 298    NIR_PASS_V(nir, nir_split_var_copies);
 299    NIR_PASS_V(nir, nir_lower_var_copies);
 300    if (nir->options->lower_all_io_to_temps ||
 301        nir->options->lower_all_io_to_elements ||
 302        nir->info.stage == MESA_SHADER_VERTEX ||
 303        nir->info.stage == MESA_SHADER_GEOMETRY) {
 304       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
 305    } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
 306       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
 307    }
 308
 309    st_nir_assign_vs_in_locations(nir);
 310 }
 311
 312 /**
 313  * Translate ARB (asm) program to NIR
 314  */
 315 static nir_shader *
 316 st_translate_prog_to_nir(struct st_context *st, struct gl_program *prog,
 317                          gl_shader_stage stage)
 318 {
 319    struct pipe_screen *screen = st->pipe->screen;
 320    const struct gl_shader_compiler_options *options =
 321       &st->ctx->Const.ShaderCompilerOptions[stage];
 322
 323    /* Translate to NIR */
 324    nir_shader *nir = prog_to_nir(prog, options->NirOptions);
 325    NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
 326    nir_validate_shader(nir, "after st/ptn lower_regs_to_ssa");
 327
 328    NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, prog, screen);
 329    NIR_PASS_V(nir, nir_lower_system_values);
 330
 331    /* Optimise NIR */
 332    NIR_PASS_V(nir, nir_opt_constant_folding);
 333    st_nir_opts(nir);
 334    st_finalize_nir_before_variants(nir);
 335
 336    if (st->allow_st_finalize_nir_twice)
 337       st_finalize_nir(st, prog, NULL, nir, true);
 338
 339    nir_validate_shader(nir, "after st/glsl finalize_nir");
 340
 341    return nir;
 342 }
 343
 344 void
 345 st_prepare_vertex_program(struct st_program *stp)
 346 {
 347    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 348
 349    stvp->num_inputs = 0;
 350    memset(stvp->input_to_index, ~0, sizeof(stvp->input_to_index));
 351    memset(stvp->result_to_output, ~0, sizeof(stvp->result_to_output));
 352
 353    /* Determine number of inputs, the mappings between VERT_ATTRIB_x
 354     * and TGSI generic input indexes, plus input attrib semantic info.
 355     */
 356    for (unsigned attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
 357       if ((stp->Base.info.inputs_read & BITFIELD64_BIT(attr)) != 0) {
 358          stvp->input_to_index[attr] = stvp->num_inputs;
 359          stvp->index_to_input[stvp->num_inputs] = attr;
 360          stvp->num_inputs++;
 361
 362          if ((stp->Base.DualSlotInputs & BITFIELD64_BIT(attr)) != 0) {
 363             /* add placeholder for second part of a double attribute */
 364             stvp->index_to_input[stvp->num_inputs] = ST_DOUBLE_ATTRIB_PLACEHOLDER;
 365             stvp->num_inputs++;
 366          }
 367       }
 368    }
 369    /* pre-setup potentially unused edgeflag input */
 370    stvp->input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
 371    stvp->index_to_input[stvp->num_inputs] = VERT_ATTRIB_EDGEFLAG;
 372
 373    /* Compute mapping of vertex program outputs to slots. */
 374    unsigned num_outputs = 0;
 375    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 376       if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr))
 377          stvp->result_to_output[attr] = num_outputs++;
 378    }
 379    /* pre-setup potentially unused edgeflag output */
 380    stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs;
 381 }
 382
 383 void
 384 st_translate_stream_output_info(struct gl_program *prog)
 385 {
 386    struct gl_transform_feedback_info *info = prog->sh.LinkedTransformFeedback;
 387    if (!info)
 388       return;
 389
 390    /* Determine the (default) output register mapping for each output. */
 391    unsigned num_outputs = 0;
 392    ubyte output_mapping[VARYING_SLOT_TESS_MAX];
 393    memset(output_mapping, 0, sizeof(output_mapping));
 394
 395    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 396       if (prog->info.outputs_written & BITFIELD64_BIT(attr))
 397          output_mapping[attr] = num_outputs++;
 398    }
 399
 400    /* Translate stream output info. */
 401    struct pipe_stream_output_info *so_info =
 402       &((struct st_program*)prog)->state.stream_output;
 403
 404    for (unsigned i = 0; i < info->NumOutputs; i++) {
 405       so_info->output[i].register_index =
 406          output_mapping[info->Outputs[i].OutputRegister];
 407       so_info->output[i].start_component = info->Outputs[i].ComponentOffset;
 408       so_info->output[i].num_components = info->Outputs[i].NumComponents;
 409       so_info->output[i].output_buffer = info->Outputs[i].OutputBuffer;
 410       so_info->output[i].dst_offset = info->Outputs[i].DstOffset;
 411       so_info->output[i].stream = info->Outputs[i].StreamId;
 412    }
 413
 414    for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
 415       so_info->stride[i] = info->Buffers[i].Stride;
 416    }
 417    so_info->num_outputs = info->NumOutputs;
 418 }
 419
 420 /**
 421  * Translate a vertex program.
 422  */
 423 bool
 424 st_translate_vertex_program(struct st_context *st,
 425                             struct st_program *stp)
 426 {
 427    struct ureg_program *ureg;
 428    enum pipe_error error;
 429    unsigned num_outputs = 0;
 430    unsigned attr;
 431    ubyte output_semantic_name[VARYING_SLOT_MAX] = {0};
 432    ubyte output_semantic_index[VARYING_SLOT_MAX] = {0};
 433
 434    if (stp->Base.arb.IsPositionInvariant)
 435       _mesa_insert_mvp_code(st->ctx, &stp->Base);
 436
 437    st_prepare_vertex_program(stp);
 438
 439    /* ARB_vp: */
 440    if (!stp->glsl_to_tgsi) {
 441       _mesa_remove_output_reads(&stp->Base, PROGRAM_OUTPUT);
 442
 443       /* This determines which states will be updated when the assembly
 444        * shader is bound.
 445        */
 446       stp->affected_states = ST_NEW_VS_STATE |
 447                               ST_NEW_RASTERIZER |
 448                               ST_NEW_VERTEX_ARRAYS;
 449
 450       if (stp->Base.Parameters->NumParameters)
 451          stp->affected_states |= ST_NEW_VS_CONSTANTS;
 452
 453       /* Translate to NIR if preferred. */
 454       if (st->pipe->screen->get_shader_param(st->pipe->screen,
 455                                              PIPE_SHADER_VERTEX,
 456                                              PIPE_SHADER_CAP_PREFERRED_IR)) {
 457          assert(!stp->glsl_to_tgsi);
 458
 459          if (stp->Base.nir)
 460             ralloc_free(stp->Base.nir);
 461
 462          stp->state.type = PIPE_SHADER_IR_NIR;
 463          stp->Base.nir = st_translate_prog_to_nir(st, &stp->Base,
 464                                                   MESA_SHADER_VERTEX);
 465          return true;
 466       }
 467    }
 468
 469    /* Get semantic names and indices. */
 470    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 471       if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr)) {
 472          unsigned slot = num_outputs++;
 473          unsigned semantic_name, semantic_index;
 474          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
 475                                       &semantic_name, &semantic_index);
 476          output_semantic_name[slot] = semantic_name;
 477          output_semantic_index[slot] = semantic_index;
 478       }
 479    }
 480    /* pre-setup potentially unused edgeflag output */
 481    output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
 482    output_semantic_index[num_outputs] = 0;
 483
 484    ureg = ureg_create_with_screen(PIPE_SHADER_VERTEX, st->pipe->screen);
 485    if (ureg == NULL)
 486       return false;
 487
 488    if (stp->Base.info.clip_distance_array_size)
 489       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
 490                     stp->Base.info.clip_distance_array_size);
 491    if (stp->Base.info.cull_distance_array_size)
 492       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
 493                     stp->Base.info.cull_distance_array_size);
 494
 495    if (ST_DEBUG & DEBUG_MESA) {
 496       _mesa_print_program(&stp->Base);
 497       _mesa_print_program_parameters(st->ctx, &stp->Base);
 498       debug_printf("\n");
 499    }
 500
 501    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 502
 503    if (stp->glsl_to_tgsi) {
 504       error = st_translate_program(st->ctx,
 505                                    PIPE_SHADER_VERTEX,
 506                                    ureg,
 507                                    stp->glsl_to_tgsi,
 508                                    &stp->Base,
 509                                    /* inputs */
 510                                    stvp->num_inputs,
 511                                    stvp->input_to_index,
 512                                    NULL, /* inputSlotToAttr */
 513                                    NULL, /* input semantic name */
 514                                    NULL, /* input semantic index */
 515                                    NULL, /* interp mode */
 516                                    /* outputs */
 517                                    num_outputs,
 518                                    stvp->result_to_output,
 519                                    output_semantic_name,
 520                                    output_semantic_index);
 521
 522       st_translate_stream_output_info(&stp->Base);
 523
 524       free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
 525    } else
 526       error = st_translate_mesa_program(st->ctx,
 527                                         PIPE_SHADER_VERTEX,
 528                                         ureg,
 529                                         &stp->Base,
 530                                         /* inputs */
 531                                         stvp->num_inputs,
 532                                         stvp->input_to_index,
 533                                         NULL, /* input semantic name */
 534                                         NULL, /* input semantic index */
 535                                         NULL,
 536                                         /* outputs */
 537                                         num_outputs,
 538                                         stvp->result_to_output,
 539                                         output_semantic_name,
 540                                         output_semantic_index);
 541
 542    if (error) {
 543       debug_printf("%s: failed to translate Mesa program:\n", __func__);
 544       _mesa_print_program(&stp->Base);
 545       debug_assert(0);
 546       return false;
 547    }
 548
 549    stp->state.tokens = ureg_get_tokens(ureg, NULL);
 550    ureg_destroy(ureg);
 551
 552    if (stp->glsl_to_tgsi) {
 553       stp->glsl_to_tgsi = NULL;
 554       st_store_ir_in_disk_cache(st, &stp->Base, false);
 555    }
 556
 557    return stp->state.tokens != NULL;
 558 }
 559
 560 static const gl_state_index16 depth_range_state[STATE_LENGTH] =
 561    { STATE_DEPTH_RANGE };
 562
 563 static struct st_common_variant *
 564 st_create_vp_variant(struct st_context *st,
 565                      struct st_program *stvp,
 566                      const struct st_common_variant_key *key)
 567 {
 568    struct st_common_variant *vpv = CALLOC_STRUCT(st_common_variant);
 569    struct pipe_context *pipe = st->pipe;
 570    struct pipe_screen *screen = pipe->screen;
 571    struct pipe_shader_state state = {0};
 572
 573    static const gl_state_index16 point_size_state[STATE_LENGTH] =
 574       { STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED, 0 };
 575    struct gl_program_parameter_list *params = stvp->Base.Parameters;
 576
 577    vpv->key = *key;
 578
 579    state.stream_output = stvp->state.stream_output;
 580
 581    if (stvp->state.type == PIPE_SHADER_IR_NIR) {
 582       bool finalize = false;
 583
 584       state.type = PIPE_SHADER_IR_NIR;
 585       state.ir.nir = nir_shader_clone(NULL, stvp->Base.nir);
 586       if (key->clamp_color) {
 587          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
 588          finalize = true;
 589       }
 590       if (key->passthrough_edgeflags) {
 591          NIR_PASS_V(state.ir.nir, nir_lower_passthrough_edgeflags);
 592          finalize = true;
 593       }
 594
 595       if (key->lower_point_size) {
 596          _mesa_add_state_reference(params, point_size_state);
 597          NIR_PASS_V(state.ir.nir, nir_lower_point_size_mov,
 598                     point_size_state);
 599          finalize = true;
 600       }
 601
 602       if (key->lower_ucp) {
 603          bool can_compact = screen->get_param(screen,
 604                                               PIPE_CAP_NIR_COMPACT_ARRAYS);
 605
 606          bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;
 607          gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH];
 608          for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
 609             if (use_eye) {
 610                clipplane_state[i][0] = STATE_CLIPPLANE;
 611                clipplane_state[i][1] = i;
 612             } else {
 613                clipplane_state[i][0] = STATE_INTERNAL;
 614                clipplane_state[i][1] = STATE_CLIP_INTERNAL;
 615                clipplane_state[i][2] = i;
 616             }
 617             _mesa_add_state_reference(params, clipplane_state[i]);
 618          }
 619
 620          NIR_PASS_V(state.ir.nir, nir_lower_clip_vs, key->lower_ucp,
 621                     true, can_compact, clipplane_state);
 622          NIR_PASS_V(state.ir.nir, nir_lower_io_to_temporaries,
 623                     nir_shader_get_entrypoint(state.ir.nir), true, false);
 624          NIR_PASS_V(state.ir.nir, nir_lower_global_vars_to_local);
 625          finalize = true;
 626       }
 627
 628       if (finalize || !st->allow_st_finalize_nir_twice) {
 629          st_finalize_nir(st, &stvp->Base, stvp->shader_program, state.ir.nir,
 630                          true);
 631
 632          /* Some of the lowering above may have introduced new varyings */
 633          nir_shader_gather_info(state.ir.nir,
 634                                 nir_shader_get_entrypoint(state.ir.nir));
 635       }
 636
 637       if (ST_DEBUG & DEBUG_PRINT_IR)
 638          nir_print_shader(state.ir.nir, stderr);
 639
 640       if (key->is_draw_shader)
 641          vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
 642       else
 643          vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
 644
 645       return vpv;
 646    }
 647
 648    state.type = PIPE_SHADER_IR_TGSI;
 649    state.tokens = tgsi_dup_tokens(stvp->state.tokens);
 650
 651    /* Emulate features. */
 652    if (key->clamp_color || key->passthrough_edgeflags) {
 653       const struct tgsi_token *tokens;
 654       unsigned flags =
 655          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
 656          (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
 657
 658       tokens = tgsi_emulate(state.tokens, flags);
 659
 660       if (tokens) {
 661          tgsi_free_tokens(state.tokens);
 662          state.tokens = tokens;
 663       } else {
 664          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
 665       }
 666    }
 667
 668    if (key->lower_depth_clamp) {
 669       unsigned depth_range_const =
 670             _mesa_add_state_reference(params, depth_range_state);
 671
 672       const struct tgsi_token *tokens;
 673       tokens = st_tgsi_lower_depth_clamp(state.tokens, depth_range_const,
 674                                          key->clip_negative_one_to_one);
 675       if (tokens != state.tokens)
 676          tgsi_free_tokens(state.tokens);
 677       state.tokens = tokens;
 678    }
 679
 680    if (ST_DEBUG & DEBUG_PRINT_IR)
 681       tgsi_dump(state.tokens, 0);
 682
 683    if (key->is_draw_shader)
 684       vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
 685    else
 686       vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
 687
 688    return vpv;
 689 }
 690
 691
 692 /**
 693  * Find/create a vertex program variant.
 694  */
 695 struct st_common_variant *
 696 st_get_vp_variant(struct st_context *st,
 697                   struct st_program *stp,
 698                   const struct st_common_variant_key *key)
 699 {
 700    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 701    struct st_common_variant *vpv;
 702
 703    /* Search for existing variant */
 704    for (vpv = st_common_variant(stp->variants); vpv;
 705         vpv = st_common_variant(vpv->base.next)) {
 706       if (memcmp(&vpv->key, key, sizeof(*key)) == 0) {
 707          break;
 708       }
 709    }
 710
 711    if (!vpv) {
 712       /* create now */
 713       vpv = st_create_vp_variant(st, stp, key);
 714       if (vpv) {
 715          vpv->base.st = key->st;
 716
 717          unsigned num_inputs = stvp->num_inputs + key->passthrough_edgeflags;
 718          for (unsigned index = 0; index < num_inputs; ++index) {
 719             unsigned attr = stvp->index_to_input[index];
 720             if (attr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
 721                continue;
 722             vpv->vert_attrib_mask |= 1u << attr;
 723          }
 724
 725          /* insert into list */
 726          vpv->base.next = stp->variants;
 727          stp->variants = &vpv->base;
 728       }
 729    }
 730
 731    return vpv;
 732 }
 733
 734
 735 /**
 736  * Translate a Mesa fragment shader into a TGSI shader.
 737  */
 738 bool
 739 st_translate_fragment_program(struct st_context *st,
 740                               struct st_program *stfp)
 741 {
 742    /* Non-GLSL programs: */
 743    if (!stfp->glsl_to_tgsi) {
 744       _mesa_remove_output_reads(&stfp->Base, PROGRAM_OUTPUT);
 745       if (st->ctx->Const.GLSLFragCoordIsSysVal)
 746          _mesa_program_fragment_position_to_sysval(&stfp->Base);
 747
 748       /* This determines which states will be updated when the assembly
 749        * shader is bound.
 750        *
 751        * fragment.position and glDrawPixels always use constants.
 752        */
 753       stfp->affected_states = ST_NEW_FS_STATE |
 754                               ST_NEW_SAMPLE_SHADING |
 755                               ST_NEW_FS_CONSTANTS;
 756
 757       if (stfp->ati_fs) {
 758          /* Just set them for ATI_fs unconditionally. */
 759          stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 760                                   ST_NEW_FS_SAMPLERS;
 761       } else {
 762          /* ARB_fp */
 763          if (stfp->Base.SamplersUsed)
 764             stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 765                                      ST_NEW_FS_SAMPLERS;
 766       }
 767
 768       /* Translate to NIR. */
 769       if (!stfp->ati_fs &&
 770           st->pipe->screen->get_shader_param(st->pipe->screen,
 771                                              PIPE_SHADER_FRAGMENT,
 772                                              PIPE_SHADER_CAP_PREFERRED_IR)) {
 773          nir_shader *nir =
 774             st_translate_prog_to_nir(st, &stfp->Base, MESA_SHADER_FRAGMENT);
 775
 776          if (stfp->Base.nir)
 777             ralloc_free(stfp->Base.nir);
 778          stfp->state.type = PIPE_SHADER_IR_NIR;
 779          stfp->Base.nir = nir;
 780          return true;
 781       }
 782    }
 783
 784    ubyte outputMapping[2 * FRAG_RESULT_MAX];
 785    ubyte inputMapping[VARYING_SLOT_MAX];
 786    ubyte inputSlotToAttr[VARYING_SLOT_MAX];
 787    ubyte interpMode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
 788    GLuint attr;
 789    GLbitfield64 inputsRead;
 790    struct ureg_program *ureg;
 791
 792    GLboolean write_all = GL_FALSE;
 793
 794    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
 795    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
 796    uint fs_num_inputs = 0;
 797
 798    ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
 799    ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
 800    uint fs_num_outputs = 0;
 801
 802    memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
 803
 804    /*
 805     * Convert Mesa program inputs to TGSI input register semantics.
 806     */
 807    inputsRead = stfp->Base.info.inputs_read;
 808    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 809       if ((inputsRead & BITFIELD64_BIT(attr)) != 0) {
 810          const GLuint slot = fs_num_inputs++;
 811
 812          inputMapping[attr] = slot;
 813          inputSlotToAttr[slot] = attr;
 814
 815          switch (attr) {
 816          case VARYING_SLOT_POS:
 817             input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
 818             input_semantic_index[slot] = 0;
 819             interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 820             break;
 821          case VARYING_SLOT_COL0:
 822             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 823             input_semantic_index[slot] = 0;
 824             interpMode[slot] = stfp->glsl_to_tgsi ?
 825                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 826             break;
 827          case VARYING_SLOT_COL1:
 828             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 829             input_semantic_index[slot] = 1;
 830             interpMode[slot] = stfp->glsl_to_tgsi ?
 831                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 832             break;
 833          case VARYING_SLOT_FOGC:
 834             input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
 835             input_semantic_index[slot] = 0;
 836             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 837             break;
 838          case VARYING_SLOT_FACE:
 839             input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
 840             input_semantic_index[slot] = 0;
 841             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 842             break;
 843          case VARYING_SLOT_PRIMITIVE_ID:
 844             input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
 845             input_semantic_index[slot] = 0;
 846             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 847             break;
 848          case VARYING_SLOT_LAYER:
 849             input_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
 850             input_semantic_index[slot] = 0;
 851             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 852             break;
 853          case VARYING_SLOT_VIEWPORT:
 854             input_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
 855             input_semantic_index[slot] = 0;
 856             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 857             break;
 858          case VARYING_SLOT_CLIP_DIST0:
 859             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
 860             input_semantic_index[slot] = 0;
 861             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 862             break;
 863          case VARYING_SLOT_CLIP_DIST1:
 864             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
 865             input_semantic_index[slot] = 1;
 866             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 867             break;
 868          case VARYING_SLOT_CULL_DIST0:
 869          case VARYING_SLOT_CULL_DIST1:
 870             /* these should have been lowered by GLSL */
 871             assert(0);
 872             break;
 873             /* In most cases, there is nothing special about these
 874              * inputs, so adopt a convention to use the generic
 875              * semantic name and the mesa VARYING_SLOT_ number as the
 876              * index.
 877              *
 878              * All that is required is that the vertex shader labels
 879              * its own outputs similarly, and that the vertex shader
 880              * generates at least every output required by the
 881              * fragment shader plus fixed-function hardware (such as
 882              * BFC).
 883              *
 884              * However, some drivers may need us to identify the PNTC and TEXi
 885              * varyings if, for example, their capability to replace them with
 886              * sprite coordinates is limited.
 887              */
 888          case VARYING_SLOT_PNTC:
 889             if (st->needs_texcoord_semantic) {
 890                input_semantic_name[slot] = TGSI_SEMANTIC_PCOORD;
 891                input_semantic_index[slot] = 0;
 892                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 893                break;
 894             }
 895             /* fall through */
 896          case VARYING_SLOT_TEX0:
 897          case VARYING_SLOT_TEX1:
 898          case VARYING_SLOT_TEX2:
 899          case VARYING_SLOT_TEX3:
 900          case VARYING_SLOT_TEX4:
 901          case VARYING_SLOT_TEX5:
 902          case VARYING_SLOT_TEX6:
 903          case VARYING_SLOT_TEX7:
 904             if (st->needs_texcoord_semantic) {
 905                input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
 906                input_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
 907                interpMode[slot] = stfp->glsl_to_tgsi ?
 908                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
 909                break;
 910             }
 911             /* fall through */
 912          case VARYING_SLOT_VAR0:
 913          default:
 914             /* Semantic indices should be zero-based because drivers may choose
 915              * to assign a fixed slot determined by that index.
 916              * This is useful because ARB_separate_shader_objects uses location
 917              * qualifiers for linkage, and if the semantic index corresponds to
 918              * these locations, linkage passes in the driver become unecessary.
 919              *
 920              * If needs_texcoord_semantic is true, no semantic indices will be
 921              * consumed for the TEXi varyings, and we can base the locations of
 922              * the user varyings on VAR0.  Otherwise, we use TEX0 as base index.
 923              */
 924             assert(attr >= VARYING_SLOT_VAR0 || attr == VARYING_SLOT_PNTC ||
 925                    (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
 926             input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
 927             input_semantic_index[slot] = st_get_generic_varying_index(st, attr);
 928             if (attr == VARYING_SLOT_PNTC)
 929                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 930             else {
 931                interpMode[slot] = stfp->glsl_to_tgsi ?
 932                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
 933             }
 934             break;
 935          }
 936       }
 937       else {
 938          inputMapping[attr] = -1;
 939       }
 940    }
 941
 942    /*
 943     * Semantics and mapping for outputs
 944     */
 945    GLbitfield64 outputsWritten = stfp->Base.info.outputs_written;
 946
 947    /* if z is written, emit that first */
 948    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
 949       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
 950       fs_output_semantic_index[fs_num_outputs] = 0;
 951       outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
 952       fs_num_outputs++;
 953       outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
 954    }
 955
 956    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
 957       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
 958       fs_output_semantic_index[fs_num_outputs] = 0;
 959       outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
 960       fs_num_outputs++;
 961       outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
 962    }
 963
 964    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
 965       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_SAMPLEMASK;
 966       fs_output_semantic_index[fs_num_outputs] = 0;
 967       outputMapping[FRAG_RESULT_SAMPLE_MASK] = fs_num_outputs;
 968       fs_num_outputs++;
 969       outputsWritten &= ~(1 << FRAG_RESULT_SAMPLE_MASK);
 970    }
 971
 972    /* handle remaining outputs (color) */
 973    for (attr = 0; attr < ARRAY_SIZE(outputMapping); attr++) {
 974       const GLbitfield64 written = attr < FRAG_RESULT_MAX ? outputsWritten :
 975          stfp->Base.SecondaryOutputsWritten;
 976       const unsigned loc = attr % FRAG_RESULT_MAX;
 977
 978       if (written & BITFIELD64_BIT(loc)) {
 979          switch (loc) {
 980          case FRAG_RESULT_DEPTH:
 981          case FRAG_RESULT_STENCIL:
 982          case FRAG_RESULT_SAMPLE_MASK:
 983             /* handled above */
 984             assert(0);
 985             break;
 986          case FRAG_RESULT_COLOR:
 987             write_all = GL_TRUE; /* fallthrough */
 988          default: {
 989             int index;
 990             assert(loc == FRAG_RESULT_COLOR ||
 991                    (FRAG_RESULT_DATA0 <= loc && loc < FRAG_RESULT_MAX));
 992
 993             index = (loc == FRAG_RESULT_COLOR) ? 0 : (loc - FRAG_RESULT_DATA0);
 994
 995             if (attr >= FRAG_RESULT_MAX) {
 996                /* Secondary color for dual source blending. */
 997                assert(index == 0);
 998                index++;
 999             }
1000
1001             fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
1002             fs_output_semantic_index[fs_num_outputs] = index;
1003             outputMapping[attr] = fs_num_outputs;
1004             break;
1005          }
1006          }
1007
1008          fs_num_outputs++;
1009       }
1010    }
1011
1012    ureg = ureg_create_with_screen(PIPE_SHADER_FRAGMENT, st->pipe->screen);
1013    if (ureg == NULL)
1014       return false;
1015
1016    if (ST_DEBUG & DEBUG_MESA) {
1017       _mesa_print_program(&stfp->Base);
1018       _mesa_print_program_parameters(st->ctx, &stfp->Base);
1019       debug_printf("\n");
1020    }
1021    if (write_all == GL_TRUE)
1022       ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1);
1023
1024    if (stfp->Base.info.fs.depth_layout != FRAG_DEPTH_LAYOUT_NONE) {
1025       switch (stfp->Base.info.fs.depth_layout) {
1026       case FRAG_DEPTH_LAYOUT_ANY:
1027          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1028                        TGSI_FS_DEPTH_LAYOUT_ANY);
1029          break;
1030       case FRAG_DEPTH_LAYOUT_GREATER:
1031          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1032                        TGSI_FS_DEPTH_LAYOUT_GREATER);
1033          break;
1034       case FRAG_DEPTH_LAYOUT_LESS:
1035          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1036                        TGSI_FS_DEPTH_LAYOUT_LESS);
1037          break;
1038       case FRAG_DEPTH_LAYOUT_UNCHANGED:
1039          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1040                        TGSI_FS_DEPTH_LAYOUT_UNCHANGED);
1041          break;
1042       default:
1043          assert(0);
1044       }
1045    }
1046
1047    if (stfp->glsl_to_tgsi) {
1048       st_translate_program(st->ctx,
1049                            PIPE_SHADER_FRAGMENT,
1050                            ureg,
1051                            stfp->glsl_to_tgsi,
1052                            &stfp->Base,
1053                            /* inputs */
1054                            fs_num_inputs,
1055                            inputMapping,
1056                            inputSlotToAttr,
1057                            input_semantic_name,
1058                            input_semantic_index,
1059                            interpMode,
1060                            /* outputs */
1061                            fs_num_outputs,
1062                            outputMapping,
1063                            fs_output_semantic_name,
1064                            fs_output_semantic_index);
1065
1066       free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
1067    } else if (stfp->ati_fs)
1068       st_translate_atifs_program(ureg,
1069                                  stfp->ati_fs,
1070                                  &stfp->Base,
1071                                  /* inputs */
1072                                  fs_num_inputs,
1073                                  inputMapping,
1074                                  input_semantic_name,
1075                                  input_semantic_index,
1076                                  interpMode,
1077                                  /* outputs */
1078                                  fs_num_outputs,
1079                                  outputMapping,
1080                                  fs_output_semantic_name,
1081                                  fs_output_semantic_index);
1082    else
1083       st_translate_mesa_program(st->ctx,
1084                                 PIPE_SHADER_FRAGMENT,
1085                                 ureg,
1086                                 &stfp->Base,
1087                                 /* inputs */
1088                                 fs_num_inputs,
1089                                 inputMapping,
1090                                 input_semantic_name,
1091                                 input_semantic_index,
1092                                 interpMode,
1093                                 /* outputs */
1094                                 fs_num_outputs,
1095                                 outputMapping,
1096                                 fs_output_semantic_name,
1097                                 fs_output_semantic_index);
1098
1099    stfp->state.tokens = ureg_get_tokens(ureg, NULL);
1100    ureg_destroy(ureg);
1101
1102    if (stfp->glsl_to_tgsi) {
1103       stfp->glsl_to_tgsi = NULL;
1104       st_store_ir_in_disk_cache(st, &stfp->Base, false);
1105    }
1106
1107    return stfp->state.tokens != NULL;
1108 }
1109
1110 static struct st_fp_variant *
1111 st_create_fp_variant(struct st_context *st,
1112                      struct st_program *stfp,
1113                      const struct st_fp_variant_key *key)
1114 {
1115    struct pipe_context *pipe = st->pipe;
1116    struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
1117    struct pipe_shader_state state = {0};
1118    struct gl_program_parameter_list *params = stfp->Base.Parameters;
1119    static const gl_state_index16 texcoord_state[STATE_LENGTH] =
1120       { STATE_INTERNAL, STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
1121    static const gl_state_index16 scale_state[STATE_LENGTH] =
1122       { STATE_INTERNAL, STATE_PT_SCALE };
1123    static const gl_state_index16 bias_state[STATE_LENGTH] =
1124       { STATE_INTERNAL, STATE_PT_BIAS };
1125    static const gl_state_index16 alpha_ref_state[STATE_LENGTH] =
1126       { STATE_INTERNAL, STATE_ALPHA_REF };
1127
1128    if (!variant)
1129       return NULL;
1130
1131    if (stfp->state.type == PIPE_SHADER_IR_NIR) {
1132       bool finalize = false;
1133
1134       state.type = PIPE_SHADER_IR_NIR;
1135       state.ir.nir = nir_shader_clone(NULL, stfp->Base.nir);
1136
1137       if (key->clamp_color) {
1138          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1139          finalize = true;
1140       }
1141
1142       if (key->lower_flatshade) {
1143          NIR_PASS_V(state.ir.nir, nir_lower_flatshade);
1144          finalize = true;
1145       }
1146
1147       if (key->lower_alpha_func != COMPARE_FUNC_NEVER) {
1148          _mesa_add_state_reference(params, alpha_ref_state);
1149          NIR_PASS_V(state.ir.nir, nir_lower_alpha_test, key->lower_alpha_func,
1150                     false, alpha_ref_state);
1151          finalize = true;
1152       }
1153
1154       if (key->lower_two_sided_color) {
1155          NIR_PASS_V(state.ir.nir, nir_lower_two_sided_color);
1156          finalize = true;
1157       }
1158
1159       if (key->persample_shading) {
1160           nir_shader *shader = state.ir.nir;
1161           nir_foreach_variable(var, &shader->inputs)
1162              var->data.sample = true;
1163           finalize = true;
1164       }
1165
1166       assert(!(key->bitmap && key->drawpixels));
1167
1168       /* glBitmap */
1169       if (key->bitmap) {
1170          nir_lower_bitmap_options options = {0};
1171
1172          variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1173          options.sampler = variant->bitmap_sampler;
1174          options.swizzle_xxxx = st->bitmap.tex_format == PIPE_FORMAT_R8_UNORM;
1175
1176          NIR_PASS_V(state.ir.nir, nir_lower_bitmap, &options);
1177          finalize = true;
1178       }
1179
1180       /* glDrawPixels (color only) */
1181       if (key->drawpixels) {
1182          nir_lower_drawpixels_options options = {{0}};
1183          unsigned samplers_used = stfp->Base.SamplersUsed;
1184
1185          /* Find the first unused slot. */
1186          variant->drawpix_sampler = ffs(~samplers_used) - 1;
1187          options.drawpix_sampler = variant->drawpix_sampler;
1188          samplers_used |= (1 << variant->drawpix_sampler);
1189
1190          options.pixel_maps = key->pixelMaps;
1191          if (key->pixelMaps) {
1192             variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1193             options.pixelmap_sampler = variant->pixelmap_sampler;
1194          }
1195
1196          options.scale_and_bias = key->scaleAndBias;
1197          if (key->scaleAndBias) {
1198             _mesa_add_state_reference(params, scale_state);
1199             memcpy(options.scale_state_tokens, scale_state,
1200                    sizeof(options.scale_state_tokens));
1201             _mesa_add_state_reference(params, bias_state);
1202             memcpy(options.bias_state_tokens, bias_state,
1203                    sizeof(options.bias_state_tokens));
1204          }
1205
1206          _mesa_add_state_reference(params, texcoord_state);
1207          memcpy(options.texcoord_state_tokens, texcoord_state,
1208                 sizeof(options.texcoord_state_tokens));
1209
1210          NIR_PASS_V(state.ir.nir, nir_lower_drawpixels, &options);
1211          finalize = true;
1212       }
1213
1214       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1215                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1216                    key->external.lower_ayuv || key->external.lower_xyuv)) {
1217          nir_lower_tex_options options = {0};
1218          options.lower_y_uv_external = key->external.lower_nv12;
1219          options.lower_y_u_v_external = key->external.lower_iyuv;
1220          options.lower_xy_uxvx_external = key->external.lower_xy_uxvx;
1221          options.lower_yx_xuxv_external = key->external.lower_yx_xuxv;
1222          options.lower_ayuv_external = key->external.lower_ayuv;
1223          options.lower_xyuv_external = key->external.lower_xyuv;
1224          NIR_PASS_V(state.ir.nir, nir_lower_tex, &options);
1225          finalize = true;
1226       }
1227
1228       if (finalize || !st->allow_st_finalize_nir_twice) {
1229          st_finalize_nir(st, &stfp->Base, stfp->shader_program, state.ir.nir,
1230                          false);
1231       }
1232
1233       /* This pass needs to happen *after* nir_lower_sampler */
1234       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1235                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1236          NIR_PASS_V(state.ir.nir, st_nir_lower_tex_src_plane,
1237                     ~stfp->Base.SamplersUsed,
1238                     key->external.lower_nv12 || key->external.lower_xy_uxvx ||
1239                        key->external.lower_yx_xuxv,
1240                     key->external.lower_iyuv);
1241          finalize = true;
1242       }
1243
1244       if (finalize || !st->allow_st_finalize_nir_twice) {
1245          /* Some of the lowering above may have introduced new varyings */
1246          nir_shader_gather_info(state.ir.nir,
1247                                 nir_shader_get_entrypoint(state.ir.nir));
1248
1249          struct pipe_screen *screen = pipe->screen;
1250          if (screen->finalize_nir)
1251             screen->finalize_nir(screen, state.ir.nir, false);
1252       }
1253
1254       if (ST_DEBUG & DEBUG_PRINT_IR)
1255          nir_print_shader(state.ir.nir, stderr);
1256
1257       variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1258       variant->key = *key;
1259
1260       return variant;
1261    }
1262
1263    state.tokens = stfp->state.tokens;
1264
1265    assert(!(key->bitmap && key->drawpixels));
1266
1267    /* Fix texture targets and add fog for ATI_fs */
1268    if (stfp->ati_fs) {
1269       const struct tgsi_token *tokens = st_fixup_atifs(state.tokens, key);
1270
1271       if (tokens)
1272          state.tokens = tokens;
1273       else
1274          fprintf(stderr, "mesa: cannot post-process ATI_fs\n");
1275    }
1276
1277    /* Emulate features. */
1278    if (key->clamp_color || key->persample_shading) {
1279       const struct tgsi_token *tokens;
1280       unsigned flags =
1281          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
1282          (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
1283
1284       tokens = tgsi_emulate(state.tokens, flags);
1285
1286       if (tokens) {
1287          if (state.tokens != stfp->state.tokens)
1288             tgsi_free_tokens(state.tokens);
1289          state.tokens = tokens;
1290       } else
1291          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
1292    }
1293
1294    /* glBitmap */
1295    if (key->bitmap) {
1296       const struct tgsi_token *tokens;
1297
1298       variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1299
1300       tokens = st_get_bitmap_shader(state.tokens,
1301                                     st->internal_target,
1302                                     variant->bitmap_sampler,
1303                                     st->needs_texcoord_semantic,
1304                                     st->bitmap.tex_format ==
1305                                     PIPE_FORMAT_R8_UNORM);
1306
1307       if (tokens) {
1308          if (state.tokens != stfp->state.tokens)
1309             tgsi_free_tokens(state.tokens);
1310          state.tokens = tokens;
1311       } else
1312          fprintf(stderr, "mesa: cannot create a shader for glBitmap\n");
1313    }
1314
1315    /* glDrawPixels (color only) */
1316    if (key->drawpixels) {
1317       const struct tgsi_token *tokens;
1318       unsigned scale_const = 0, bias_const = 0, texcoord_const = 0;
1319
1320       /* Find the first unused slot. */
1321       variant->drawpix_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1322
1323       if (key->pixelMaps) {
1324          unsigned samplers_used = stfp->Base.SamplersUsed |
1325                                   (1 << variant->drawpix_sampler);
1326
1327          variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1328       }
1329
1330       if (key->scaleAndBias) {
1331          scale_const = _mesa_add_state_reference(params, scale_state);
1332          bias_const = _mesa_add_state_reference(params, bias_state);
1333       }
1334
1335       texcoord_const = _mesa_add_state_reference(params, texcoord_state);
1336
1337       tokens = st_get_drawpix_shader(state.tokens,
1338                                      st->needs_texcoord_semantic,
1339                                      key->scaleAndBias, scale_const,
1340                                      bias_const, key->pixelMaps,
1341                                      variant->drawpix_sampler,
1342                                      variant->pixelmap_sampler,
1343                                      texcoord_const, st->internal_target);
1344
1345       if (tokens) {
1346          if (state.tokens != stfp->state.tokens)
1347             tgsi_free_tokens(state.tokens);
1348          state.tokens = tokens;
1349       } else
1350          fprintf(stderr, "mesa: cannot create a shader for glDrawPixels\n");
1351    }
1352
1353    if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1354                 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1355       const struct tgsi_token *tokens;
1356
1357       /* samplers inserted would conflict, but this should be unpossible: */
1358       assert(!(key->bitmap || key->drawpixels));
1359
1360       tokens = st_tgsi_lower_yuv(state.tokens,
1361                                  ~stfp->Base.SamplersUsed,
1362                                  key->external.lower_nv12 ||
1363                                     key->external.lower_xy_uxvx ||
1364                                     key->external.lower_yx_xuxv,
1365                                  key->external.lower_iyuv);
1366       if (tokens) {
1367          if (state.tokens != stfp->state.tokens)
1368             tgsi_free_tokens(state.tokens);
1369          state.tokens = tokens;
1370       } else {
1371          fprintf(stderr, "mesa: cannot create a shader for samplerExternalOES\n");
1372       }
1373    }
1374
1375    if (key->lower_depth_clamp) {
1376       unsigned depth_range_const = _mesa_add_state_reference(params, depth_range_state);
1377
1378       const struct tgsi_token *tokens;
1379       tokens = st_tgsi_lower_depth_clamp_fs(state.tokens, depth_range_const);
1380       if (state.tokens != stfp->state.tokens)
1381          tgsi_free_tokens(state.tokens);
1382       state.tokens = tokens;
1383    }
1384
1385    if (ST_DEBUG & DEBUG_PRINT_IR)
1386       tgsi_dump(state.tokens, 0);
1387
1388    /* fill in variant */
1389    variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1390    variant->key = *key;
1391
1392    if (state.tokens != stfp->state.tokens)
1393       tgsi_free_tokens(state.tokens);
1394    return variant;
1395 }
1396
1397 /**
1398  * Translate fragment program if needed.
1399  */
1400 struct st_fp_variant *
1401 st_get_fp_variant(struct st_context *st,
1402                   struct st_program *stfp,
1403                   const struct st_fp_variant_key *key)
1404 {
1405    struct st_fp_variant *fpv;
1406
1407    /* Search for existing variant */
1408    for (fpv = st_fp_variant(stfp->variants); fpv;
1409         fpv = st_fp_variant(fpv->base.next)) {
1410       if (memcmp(&fpv->key, key, sizeof(*key)) == 0) {
1411          break;
1412       }
1413    }
1414
1415    if (!fpv) {
1416       /* create new */
1417       fpv = st_create_fp_variant(st, stfp, key);
1418       if (fpv) {
1419          fpv->base.st = key->st;
1420
1421          if (key->bitmap || key->drawpixels) {
1422             /* Regular variants should always come before the
1423              * bitmap & drawpixels variants, (unless there
1424              * are no regular variants) so that
1425              * st_update_fp can take a fast path when
1426              * shader_has_one_variant is set.
1427              */
1428             if (!stfp->variants) {
1429                stfp->variants = &fpv->base;
1430             } else {
1431                /* insert into list after the first one */
1432                fpv->base.next = stfp->variants->next;
1433                stfp->variants->next = &fpv->base;
1434             }
1435          } else {
1436             /* insert into list */
1437             fpv->base.next = stfp->variants;
1438             stfp->variants = &fpv->base;
1439          }
1440       }
1441    }
1442
1443    return fpv;
1444 }
1445
1446 /**
1447  * Translate a program. This is common code for geometry and tessellation
1448  * shaders.
1449  */
1450 bool
1451 st_translate_common_program(struct st_context *st,
1452                             struct st_program *stp)
1453 {
1454    struct gl_program *prog = &stp->Base;
1455    enum pipe_shader_type stage =
1456       pipe_shader_type_from_mesa(stp->Base.info.stage);
1457    struct ureg_program *ureg = ureg_create_with_screen(stage, st->pipe->screen);
1458
1459    if (ureg == NULL)
1460       return false;
1461
1462    switch (stage) {
1463    case PIPE_SHADER_TESS_CTRL:
1464       ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT,
1465                     stp->Base.info.tess.tcs_vertices_out);
1466       break;
1467
1468    case PIPE_SHADER_TESS_EVAL:
1469       if (stp->Base.info.tess.primitive_mode == GL_ISOLINES)
1470          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, GL_LINES);
1471       else
1472          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE,
1473                        stp->Base.info.tess.primitive_mode);
1474
1475       STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL);
1476       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 ==
1477                     PIPE_TESS_SPACING_FRACTIONAL_ODD);
1478       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 ==
1479                     PIPE_TESS_SPACING_FRACTIONAL_EVEN);
1480
1481       ureg_property(ureg, TGSI_PROPERTY_TES_SPACING,
1482                     (stp->Base.info.tess.spacing + 1) % 3);
1483
1484       ureg_property(ureg, TGSI_PROPERTY_TES_VERTEX_ORDER_CW,
1485                     !stp->Base.info.tess.ccw);
1486       ureg_property(ureg, TGSI_PROPERTY_TES_POINT_MODE,
1487                     stp->Base.info.tess.point_mode);
1488       break;
1489
1490    case PIPE_SHADER_GEOMETRY:
1491       ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM,
1492                     stp->Base.info.gs.input_primitive);
1493       ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM,
1494                     stp->Base.info.gs.output_primitive);
1495       ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
1496                     stp->Base.info.gs.vertices_out);
1497       ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS,
1498                     stp->Base.info.gs.invocations);
1499       break;
1500
1501    default:
1502       break;
1503    }
1504
1505    ubyte inputSlotToAttr[VARYING_SLOT_TESS_MAX];
1506    ubyte inputMapping[VARYING_SLOT_TESS_MAX];
1507    ubyte outputMapping[VARYING_SLOT_TESS_MAX];
1508    GLuint attr;
1509
1510    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
1511    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
1512    uint num_inputs = 0;
1513
1514    ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
1515    ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
1516    uint num_outputs = 0;
1517
1518    GLint i;
1519
1520    memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr));
1521    memset(inputMapping, 0, sizeof(inputMapping));
1522    memset(outputMapping, 0, sizeof(outputMapping));
1523    memset(&stp->state, 0, sizeof(stp->state));
1524
1525    if (prog->info.clip_distance_array_size)
1526       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
1527                     prog->info.clip_distance_array_size);
1528    if (prog->info.cull_distance_array_size)
1529       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
1530                     prog->info.cull_distance_array_size);
1531
1532    /*
1533     * Convert Mesa program inputs to TGSI input register semantics.
1534     */
1535    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1536       if ((prog->info.inputs_read & BITFIELD64_BIT(attr)) == 0)
1537          continue;
1538
1539       unsigned slot = num_inputs++;
1540
1541       inputMapping[attr] = slot;
1542       inputSlotToAttr[slot] = attr;
1543
1544       unsigned semantic_name, semantic_index;
1545       tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1546                                    &semantic_name, &semantic_index);
1547       input_semantic_name[slot] = semantic_name;
1548       input_semantic_index[slot] = semantic_index;
1549    }
1550
1551    /* Also add patch inputs. */
1552    for (attr = 0; attr < 32; attr++) {
1553       if (prog->info.patch_inputs_read & (1u << attr)) {
1554          GLuint slot = num_inputs++;
1555          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1556
1557          inputMapping[patch_attr] = slot;
1558          inputSlotToAttr[slot] = patch_attr;
1559          input_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1560          input_semantic_index[slot] = attr;
1561       }
1562    }
1563
1564    /* initialize output semantics to defaults */
1565    for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
1566       output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
1567       output_semantic_index[i] = 0;
1568    }
1569
1570    /*
1571     * Determine number of outputs, the (default) output register
1572     * mapping and the semantic information for each output.
1573     */
1574    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1575       if (prog->info.outputs_written & BITFIELD64_BIT(attr)) {
1576          GLuint slot = num_outputs++;
1577
1578          outputMapping[attr] = slot;
1579
1580          unsigned semantic_name, semantic_index;
1581          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1582                                       &semantic_name, &semantic_index);
1583          output_semantic_name[slot] = semantic_name;
1584          output_semantic_index[slot] = semantic_index;
1585       }
1586    }
1587
1588    /* Also add patch outputs. */
1589    for (attr = 0; attr < 32; attr++) {
1590       if (prog->info.patch_outputs_written & (1u << attr)) {
1591          GLuint slot = num_outputs++;
1592          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1593
1594          outputMapping[patch_attr] = slot;
1595          output_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1596          output_semantic_index[slot] = attr;
1597       }
1598    }
1599
1600    st_translate_program(st->ctx,
1601                         stage,
1602                         ureg,
1603                         stp->glsl_to_tgsi,
1604                         prog,
1605                         /* inputs */
1606                         num_inputs,
1607                         inputMapping,
1608                         inputSlotToAttr,
1609                         input_semantic_name,
1610                         input_semantic_index,
1611                         NULL,
1612                         /* outputs */
1613                         num_outputs,
1614                         outputMapping,
1615                         output_semantic_name,
1616                         output_semantic_index);
1617
1618    stp->state.tokens = ureg_get_tokens(ureg, NULL);
1619
1620    ureg_destroy(ureg);
1621
1622    st_translate_stream_output_info(prog);
1623
1624    st_store_ir_in_disk_cache(st, prog, false);
1625
1626    if (ST_DEBUG & DEBUG_PRINT_IR && ST_DEBUG & DEBUG_MESA)
1627       _mesa_print_program(prog);
1628
1629    free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
1630    stp->glsl_to_tgsi = NULL;
1631    return true;
1632 }
1633
1634
1635 /**
1636  * Get/create a basic program variant.
1637  */
1638 struct st_variant *
1639 st_get_common_variant(struct st_context *st,
1640                       struct st_program *prog,
1641                       const struct st_common_variant_key *key)
1642 {
1643    struct pipe_context *pipe = st->pipe;
1644    struct st_variant *v;
1645    struct pipe_shader_state state = {0};
1646
1647    /* Search for existing variant */
1648    for (v = prog->variants; v; v = v->next) {
1649       if (memcmp(&st_common_variant(v)->key, key, sizeof(*key)) == 0)
1650          break;
1651    }
1652
1653    if (!v) {
1654       /* create new */
1655       v = (struct st_variant*)CALLOC_STRUCT(st_common_variant);
1656       if (v) {
1657          if (prog->state.type == PIPE_SHADER_IR_NIR) {
1658             bool finalize = false;
1659
1660             state.type = PIPE_SHADER_IR_NIR;
1661             state.ir.nir = nir_shader_clone(NULL, prog->Base.nir);
1662
1663             if (key->clamp_color) {
1664                NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1665                finalize = true;
1666             }
1667
1668             state.stream_output = prog->state.stream_output;
1669
1670             if (finalize || !st->allow_st_finalize_nir_twice) {
1671                st_finalize_nir(st, &prog->Base, prog->shader_program,
1672                                state.ir.nir, true);
1673             }
1674
1675             if (ST_DEBUG & DEBUG_PRINT_IR)
1676                nir_print_shader(state.ir.nir, stderr);
1677          } else {
1678             if (key->lower_depth_clamp) {
1679                struct gl_program_parameter_list *params = prog->Base.Parameters;
1680
1681                unsigned depth_range_const =
1682                      _mesa_add_state_reference(params, depth_range_state);
1683
1684                const struct tgsi_token *tokens;
1685                tokens =
1686                      st_tgsi_lower_depth_clamp(prog->state.tokens,
1687                                                depth_range_const,
1688                                                key->clip_negative_one_to_one);
1689
1690                if (tokens != prog->state.tokens)
1691                   tgsi_free_tokens(prog->state.tokens);
1692
1693                prog->state.tokens = tokens;
1694             }
1695             state = prog->state;
1696
1697             if (ST_DEBUG & DEBUG_PRINT_IR)
1698                tgsi_dump(state.tokens, 0);
1699          }
1700          /* fill in new variant */
1701          switch (prog->Base.info.stage) {
1702          case MESA_SHADER_TESS_CTRL:
1703             v->driver_shader = pipe->create_tcs_state(pipe, &state);
1704             break;
1705          case MESA_SHADER_TESS_EVAL:
1706             v->driver_shader = pipe->create_tes_state(pipe, &state);
1707             break;
1708          case MESA_SHADER_GEOMETRY:
1709             v->driver_shader = pipe->create_gs_state(pipe, &state);
1710             break;
1711          case MESA_SHADER_COMPUTE: {
1712             struct pipe_compute_state cs = {0};
1713             cs.ir_type = state.type;
1714             cs.req_local_mem = prog->Base.info.cs.shared_size;
1715
1716             if (state.type == PIPE_SHADER_IR_NIR)
1717                cs.prog = state.ir.nir;
1718             else
1719                cs.prog = state.tokens;
1720
1721             v->driver_shader = pipe->create_compute_state(pipe, &cs);
1722             break;
1723          }
1724          default:
1725             assert(!"unhandled shader type");
1726             free(v);
1727             return NULL;
1728          }
1729
1730          st_common_variant(v)->key = *key;
1731          v->st = key->st;
1732
1733          /* insert into list */
1734          v->next = prog->variants;
1735          prog->variants = v;
1736       }
1737    }
1738
1739    return v;
1740 }
1741
1742
1743 /**
1744  * Vert/Geom/Frag programs have per-context variants.  Free all the
1745  * variants attached to the given program which match the given context.
1746  */
1747 static void
1748 destroy_program_variants(struct st_context *st, struct gl_program *target)
1749 {
1750    if (!target || target == &_mesa_DummyProgram)
1751       return;
1752
1753    struct st_program *p = st_program(target);
1754    struct st_variant *v, **prevPtr = &p->variants;
1755
1756    for (v = p->variants; v; ) {
1757       struct st_variant *next = v->next;
1758       if (v->st == st) {
1759          /* unlink from list */
1760          *prevPtr = next;
1761          /* destroy this variant */
1762          delete_variant(st, v, target->Target);
1763       }
1764       else {
1765          prevPtr = &v->next;
1766       }
1767       v = next;
1768    }
1769 }
1770
1771
1772 /**
1773  * Callback for _mesa_HashWalk.  Free all the shader's program variants
1774  * which match the given context.
1775  */
1776 static void
1777 destroy_shader_program_variants_cb(GLuint key, void *data, void *userData)
1778 {
1779    struct st_context *st = (struct st_context *) userData;
1780    struct gl_shader *shader = (struct gl_shader *) data;
1781
1782    switch (shader->Type) {
1783    case GL_SHADER_PROGRAM_MESA:
1784       {
1785          struct gl_shader_program *shProg = (struct gl_shader_program *) data;
1786          GLuint i;
1787
1788          for (i = 0; i < ARRAY_SIZE(shProg->_LinkedShaders); i++) {
1789             if (shProg->_LinkedShaders[i])
1790                destroy_program_variants(st, shProg->_LinkedShaders[i]->Program);
1791          }
1792       }
1793       break;
1794    case GL_VERTEX_SHADER:
1795    case GL_FRAGMENT_SHADER:
1796    case GL_GEOMETRY_SHADER:
1797    case GL_TESS_CONTROL_SHADER:
1798    case GL_TESS_EVALUATION_SHADER:
1799    case GL_COMPUTE_SHADER:
1800       break;
1801    default:
1802       assert(0);
1803    }
1804 }
1805
1806
1807 /**
1808  * Callback for _mesa_HashWalk.  Free all the program variants which match
1809  * the given context.
1810  */
1811 static void
1812 destroy_program_variants_cb(GLuint key, void *data, void *userData)
1813 {
1814    struct st_context *st = (struct st_context *) userData;
1815    struct gl_program *program = (struct gl_program *) data;
1816    destroy_program_variants(st, program);
1817 }
1818
1819
1820 /**
1821  * Walk over all shaders and programs to delete any variants which
1822  * belong to the given context.
1823  * This is called during context tear-down.
1824  */
1825 void
1826 st_destroy_program_variants(struct st_context *st)
1827 {
1828    /* If shaders can be shared with other contexts, the last context will
1829     * call DeleteProgram on all shaders, releasing everything.
1830     */
1831    if (st->has_shareable_shaders)
1832       return;
1833
1834    /* ARB vert/frag program */
1835    _mesa_HashWalk(st->ctx->Shared->Programs,
1836                   destroy_program_variants_cb, st);
1837
1838    /* GLSL vert/frag/geom shaders */
1839    _mesa_HashWalk(st->ctx->Shared->ShaderObjects,
1840                   destroy_shader_program_variants_cb, st);
1841 }
1842
1843
1844 /**
1845  * Compile one shader variant.
1846  */
1847 static void
1848 st_precompile_shader_variant(struct st_context *st,
1849                              struct gl_program *prog)
1850 {
1851    switch (prog->Target) {
1852    case GL_VERTEX_PROGRAM_ARB: {
1853       struct st_program *p = (struct st_program *)prog;
1854       struct st_common_variant_key key;
1855
1856       memset(&key, 0, sizeof(key));
1857
1858       key.st = st->has_shareable_shaders ? NULL : st;
1859       st_get_vp_variant(st, p, &key);
1860       break;
1861    }
1862
1863    case GL_FRAGMENT_PROGRAM_ARB: {
1864       struct st_program *p = (struct st_program *)prog;
1865       struct st_fp_variant_key key;
1866
1867       memset(&key, 0, sizeof(key));
1868
1869       key.st = st->has_shareable_shaders ? NULL : st;
1870       st_get_fp_variant(st, p, &key);
1871       break;
1872    }
1873
1874    case GL_TESS_CONTROL_PROGRAM_NV:
1875    case GL_TESS_EVALUATION_PROGRAM_NV:
1876    case GL_GEOMETRY_PROGRAM_NV:
1877    case GL_COMPUTE_PROGRAM_NV: {
1878       struct st_program *p = st_program(prog);
1879       struct st_common_variant_key key;
1880
1881       memset(&key, 0, sizeof(key));
1882
1883       key.st = st->has_shareable_shaders ? NULL : st;
1884       st_get_common_variant(st, p, &key);
1885       break;
1886    }
1887
1888    default:
1889       assert(0);
1890    }
1891 }
1892
1893 void
1894 st_finalize_program(struct st_context *st, struct gl_program *prog)
1895 {
1896    if (st->current_program[prog->info.stage] == prog) {
1897       if (prog->info.stage == MESA_SHADER_VERTEX)
1898          st->dirty |= ST_NEW_VERTEX_PROGRAM(st, (struct st_program *)prog);
1899       else
1900          st->dirty |= ((struct st_program *)prog)->affected_states;
1901    }
1902
1903    if (prog->nir)
1904       nir_sweep(prog->nir);
1905
1906    /* Create Gallium shaders now instead of on demand. */
1907    if (ST_DEBUG & DEBUG_PRECOMPILE ||
1908        st->shader_has_one_variant[prog->info.stage])
1909       st_precompile_shader_variant(st, prog);
1910 }