src/mesa/state_tracker/st_program.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   *   Brian Paul
  31   */
  32
  33
  34 #include "main/errors.h"
  35 #include "main/imports.h"
  36 #include "main/hash.h"
  37 #include "main/mtypes.h"
  38 #include "program/prog_parameter.h"
  39 #include "program/prog_print.h"
  40 #include "program/prog_to_nir.h"
  41 #include "program/programopt.h"
  42
  43 #include "compiler/nir/nir.h"
  44 #include "draw/draw_context.h"
  45
  46 #include "pipe/p_context.h"
  47 #include "pipe/p_defines.h"
  48 #include "pipe/p_shader_tokens.h"
  49 #include "draw/draw_context.h"
  50 #include "tgsi/tgsi_dump.h"
  51 #include "tgsi/tgsi_emulate.h"
  52 #include "tgsi/tgsi_parse.h"
  53 #include "tgsi/tgsi_ureg.h"
  54
  55 #include "st_debug.h"
  56 #include "st_cb_bitmap.h"
  57 #include "st_cb_drawpixels.h"
  58 #include "st_context.h"
  59 #include "st_tgsi_lower_depth_clamp.h"
  60 #include "st_tgsi_lower_yuv.h"
  61 #include "st_program.h"
  62 #include "st_mesa_to_tgsi.h"
  63 #include "st_atifs_to_tgsi.h"
  64 #include "st_nir.h"
  65 #include "st_shader_cache.h"
  66 #include "st_util.h"
  67 #include "cso_cache/cso_context.h"
  68
  69
  70
  71 static void
  72 set_affected_state_flags(uint64_t *states,
  73                          struct gl_program *prog,
  74                          uint64_t new_constants,
  75                          uint64_t new_sampler_views,
  76                          uint64_t new_samplers,
  77                          uint64_t new_images,
  78                          uint64_t new_ubos,
  79                          uint64_t new_ssbos,
  80                          uint64_t new_atomics)
  81 {
  82    if (prog->Parameters->NumParameters)
  83       *states |= new_constants;
  84
  85    if (prog->info.num_textures)
  86       *states |= new_sampler_views | new_samplers;
  87
  88    if (prog->info.num_images)
  89       *states |= new_images;
  90
  91    if (prog->info.num_ubos)
  92       *states |= new_ubos;
  93
  94    if (prog->info.num_ssbos)
  95       *states |= new_ssbos;
  96
  97    if (prog->info.num_abos)
  98       *states |= new_atomics;
  99 }
 100
 101 /**
 102  * This determines which states will be updated when the shader is bound.
 103  */
 104 void
 105 st_set_prog_affected_state_flags(struct gl_program *prog)
 106 {
 107    uint64_t *states;
 108
 109    switch (prog->info.stage) {
 110    case MESA_SHADER_VERTEX:
 111       states = &((struct st_program*)prog)->affected_states;
 112
 113       *states = ST_NEW_VS_STATE |
 114                 ST_NEW_RASTERIZER |
 115                 ST_NEW_VERTEX_ARRAYS;
 116
 117       set_affected_state_flags(states, prog,
 118                                ST_NEW_VS_CONSTANTS,
 119                                ST_NEW_VS_SAMPLER_VIEWS,
 120                                ST_NEW_VS_SAMPLERS,
 121                                ST_NEW_VS_IMAGES,
 122                                ST_NEW_VS_UBOS,
 123                                ST_NEW_VS_SSBOS,
 124                                ST_NEW_VS_ATOMICS);
 125       break;
 126
 127    case MESA_SHADER_TESS_CTRL:
 128       states = &(st_program(prog))->affected_states;
 129
 130       *states = ST_NEW_TCS_STATE;
 131
 132       set_affected_state_flags(states, prog,
 133                                ST_NEW_TCS_CONSTANTS,
 134                                ST_NEW_TCS_SAMPLER_VIEWS,
 135                                ST_NEW_TCS_SAMPLERS,
 136                                ST_NEW_TCS_IMAGES,
 137                                ST_NEW_TCS_UBOS,
 138                                ST_NEW_TCS_SSBOS,
 139                                ST_NEW_TCS_ATOMICS);
 140       break;
 141
 142    case MESA_SHADER_TESS_EVAL:
 143       states = &(st_program(prog))->affected_states;
 144
 145       *states = ST_NEW_TES_STATE |
 146                 ST_NEW_RASTERIZER;
 147
 148       set_affected_state_flags(states, prog,
 149                                ST_NEW_TES_CONSTANTS,
 150                                ST_NEW_TES_SAMPLER_VIEWS,
 151                                ST_NEW_TES_SAMPLERS,
 152                                ST_NEW_TES_IMAGES,
 153                                ST_NEW_TES_UBOS,
 154                                ST_NEW_TES_SSBOS,
 155                                ST_NEW_TES_ATOMICS);
 156       break;
 157
 158    case MESA_SHADER_GEOMETRY:
 159       states = &(st_program(prog))->affected_states;
 160
 161       *states = ST_NEW_GS_STATE |
 162                 ST_NEW_RASTERIZER;
 163
 164       set_affected_state_flags(states, prog,
 165                                ST_NEW_GS_CONSTANTS,
 166                                ST_NEW_GS_SAMPLER_VIEWS,
 167                                ST_NEW_GS_SAMPLERS,
 168                                ST_NEW_GS_IMAGES,
 169                                ST_NEW_GS_UBOS,
 170                                ST_NEW_GS_SSBOS,
 171                                ST_NEW_GS_ATOMICS);
 172       break;
 173
 174    case MESA_SHADER_FRAGMENT:
 175       states = &((struct st_program*)prog)->affected_states;
 176
 177       /* gl_FragCoord and glDrawPixels always use constants. */
 178       *states = ST_NEW_FS_STATE |
 179                 ST_NEW_SAMPLE_SHADING |
 180                 ST_NEW_FS_CONSTANTS;
 181
 182       set_affected_state_flags(states, prog,
 183                                ST_NEW_FS_CONSTANTS,
 184                                ST_NEW_FS_SAMPLER_VIEWS,
 185                                ST_NEW_FS_SAMPLERS,
 186                                ST_NEW_FS_IMAGES,
 187                                ST_NEW_FS_UBOS,
 188                                ST_NEW_FS_SSBOS,
 189                                ST_NEW_FS_ATOMICS);
 190       break;
 191
 192    case MESA_SHADER_COMPUTE:
 193       states = &((struct st_program*)prog)->affected_states;
 194
 195       *states = ST_NEW_CS_STATE;
 196
 197       set_affected_state_flags(states, prog,
 198                                ST_NEW_CS_CONSTANTS,
 199                                ST_NEW_CS_SAMPLER_VIEWS,
 200                                ST_NEW_CS_SAMPLERS,
 201                                ST_NEW_CS_IMAGES,
 202                                ST_NEW_CS_UBOS,
 203                                ST_NEW_CS_SSBOS,
 204                                ST_NEW_CS_ATOMICS);
 205       break;
 206
 207    default:
 208       unreachable("unhandled shader stage");
 209    }
 210 }
 211
 212
 213 /**
 214  * Delete a shader variant.  Note the caller must unlink the variant from
 215  * the linked list.
 216  */
 217 static void
 218 delete_variant(struct st_context *st, struct st_variant *v, GLenum target)
 219 {
 220    if (v->driver_shader) {
 221       if (st->has_shareable_shaders || v->st == st) {
 222          /* The shader's context matches the calling context, or we
 223           * don't care.
 224           */
 225          switch (target) {
 226          case GL_VERTEX_PROGRAM_ARB:
 227             cso_delete_vertex_shader(st->cso_context, v->driver_shader);
 228             break;
 229          case GL_TESS_CONTROL_PROGRAM_NV:
 230             cso_delete_tessctrl_shader(st->cso_context, v->driver_shader);
 231             break;
 232          case GL_TESS_EVALUATION_PROGRAM_NV:
 233             cso_delete_tesseval_shader(st->cso_context, v->driver_shader);
 234             break;
 235          case GL_GEOMETRY_PROGRAM_NV:
 236             cso_delete_geometry_shader(st->cso_context, v->driver_shader);
 237             break;
 238          case GL_FRAGMENT_PROGRAM_ARB:
 239             cso_delete_fragment_shader(st->cso_context, v->driver_shader);
 240             break;
 241          case GL_COMPUTE_PROGRAM_NV:
 242             cso_delete_compute_shader(st->cso_context, v->driver_shader);
 243             break;
 244          default:
 245             unreachable("bad shader type in delete_basic_variant");
 246          }
 247       } else {
 248          /* We can't delete a shader with a context different from the one
 249           * that created it.  Add it to the creating context's zombie list.
 250           */
 251          enum pipe_shader_type type =
 252             pipe_shader_type_from_mesa(_mesa_program_enum_to_shader_stage(target));
 253
 254          st_save_zombie_shader(v->st, type, v->driver_shader);
 255       }
 256    }
 257
 258    free(v);
 259 }
 260
 261
 262 /**
 263  * Free all basic program variants.
 264  */
 265 void
 266 st_release_variants(struct st_context *st, struct st_program *p)
 267 {
 268    struct st_variant *v;
 269
 270    for (v = p->variants; v; ) {
 271       struct st_variant *next = v->next;
 272       delete_variant(st, v, p->Base.Target);
 273       v = next;
 274    }
 275
 276    p->variants = NULL;
 277
 278    if (p->state.tokens) {
 279       ureg_free_tokens(p->state.tokens);
 280       p->state.tokens = NULL;
 281    }
 282
 283    /* Note: Any setup of ->ir.nir that has had pipe->create_*_state called on
 284     * it has resulted in the driver taking ownership of the NIR.  Those
 285     * callers should be NULLing out the nir field in any pipe_shader_state
 286     * that might have this called in order to indicate that.
 287     *
 288     * GLSL IR and ARB programs will have set gl_program->nir to the same
 289     * shader as ir->ir.nir, so it will be freed by _mesa_delete_program().
 290     */
 291 }
 292
 293 void
 294 st_finalize_nir_before_variants(struct nir_shader *nir)
 295 {
 296    NIR_PASS_V(nir, nir_opt_access);
 297
 298    NIR_PASS_V(nir, nir_split_var_copies);
 299    NIR_PASS_V(nir, nir_lower_var_copies);
 300    if (nir->options->lower_all_io_to_temps ||
 301        nir->options->lower_all_io_to_elements ||
 302        nir->info.stage == MESA_SHADER_VERTEX ||
 303        nir->info.stage == MESA_SHADER_GEOMETRY) {
 304       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
 305    } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
 306       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
 307    }
 308
 309    st_nir_assign_vs_in_locations(nir);
 310 }
 311
 312 /**
 313  * Translate ARB (asm) program to NIR
 314  */
 315 static nir_shader *
 316 st_translate_prog_to_nir(struct st_context *st, struct gl_program *prog,
 317                          gl_shader_stage stage)
 318 {
 319    struct pipe_screen *screen = st->pipe->screen;
 320    const struct gl_shader_compiler_options *options =
 321       &st->ctx->Const.ShaderCompilerOptions[stage];
 322
 323    /* Translate to NIR */
 324    nir_shader *nir = prog_to_nir(prog, options->NirOptions);
 325    NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
 326    nir_validate_shader(nir, "after st/ptn lower_regs_to_ssa");
 327
 328    NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, prog, screen);
 329    NIR_PASS_V(nir, nir_lower_system_values);
 330
 331    /* Optimise NIR */
 332    NIR_PASS_V(nir, nir_opt_constant_folding);
 333    st_nir_opts(nir);
 334    st_finalize_nir_before_variants(nir);
 335
 336    if (st->allow_st_finalize_nir_twice)
 337       st_finalize_nir(st, prog, NULL, nir, true);
 338
 339    nir_validate_shader(nir, "after st/glsl finalize_nir");
 340
 341    return nir;
 342 }
 343
 344 void
 345 st_prepare_vertex_program(struct st_program *stp)
 346 {
 347    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 348
 349    stvp->num_inputs = 0;
 350    memset(stvp->input_to_index, ~0, sizeof(stvp->input_to_index));
 351    memset(stvp->result_to_output, ~0, sizeof(stvp->result_to_output));
 352
 353    /* Determine number of inputs, the mappings between VERT_ATTRIB_x
 354     * and TGSI generic input indexes, plus input attrib semantic info.
 355     */
 356    for (unsigned attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
 357       if ((stp->Base.info.inputs_read & BITFIELD64_BIT(attr)) != 0) {
 358          stvp->input_to_index[attr] = stvp->num_inputs;
 359          stvp->index_to_input[stvp->num_inputs] = attr;
 360          stvp->num_inputs++;
 361
 362          if ((stp->Base.DualSlotInputs & BITFIELD64_BIT(attr)) != 0) {
 363             /* add placeholder for second part of a double attribute */
 364             stvp->index_to_input[stvp->num_inputs] = ST_DOUBLE_ATTRIB_PLACEHOLDER;
 365             stvp->num_inputs++;
 366          }
 367       }
 368    }
 369    /* pre-setup potentially unused edgeflag input */
 370    stvp->input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
 371    stvp->index_to_input[stvp->num_inputs] = VERT_ATTRIB_EDGEFLAG;
 372
 373    /* Compute mapping of vertex program outputs to slots. */
 374    unsigned num_outputs = 0;
 375    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 376       if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr))
 377          stvp->result_to_output[attr] = num_outputs++;
 378    }
 379    /* pre-setup potentially unused edgeflag output */
 380    stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs;
 381 }
 382
 383 void
 384 st_translate_stream_output_info(struct gl_program *prog)
 385 {
 386    struct gl_transform_feedback_info *info = prog->sh.LinkedTransformFeedback;
 387    if (!info)
 388       return;
 389
 390    /* Determine the (default) output register mapping for each output. */
 391    unsigned num_outputs = 0;
 392    ubyte output_mapping[VARYING_SLOT_TESS_MAX];
 393    memset(output_mapping, 0, sizeof(output_mapping));
 394
 395    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 396       if (prog->info.outputs_written & BITFIELD64_BIT(attr))
 397          output_mapping[attr] = num_outputs++;
 398    }
 399
 400    /* Translate stream output info. */
 401    struct pipe_stream_output_info *so_info =
 402       &((struct st_program*)prog)->state.stream_output;
 403
 404    for (unsigned i = 0; i < info->NumOutputs; i++) {
 405       so_info->output[i].register_index =
 406          output_mapping[info->Outputs[i].OutputRegister];
 407       so_info->output[i].start_component = info->Outputs[i].ComponentOffset;
 408       so_info->output[i].num_components = info->Outputs[i].NumComponents;
 409       so_info->output[i].output_buffer = info->Outputs[i].OutputBuffer;
 410       so_info->output[i].dst_offset = info->Outputs[i].DstOffset;
 411       so_info->output[i].stream = info->Outputs[i].StreamId;
 412    }
 413
 414    for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
 415       so_info->stride[i] = info->Buffers[i].Stride;
 416    }
 417    so_info->num_outputs = info->NumOutputs;
 418 }
 419
 420 /**
 421  * Translate a vertex program.
 422  */
 423 bool
 424 st_translate_vertex_program(struct st_context *st,
 425                             struct st_program *stp)
 426 {
 427    struct ureg_program *ureg;
 428    enum pipe_error error;
 429    unsigned num_outputs = 0;
 430    unsigned attr;
 431    ubyte output_semantic_name[VARYING_SLOT_MAX] = {0};
 432    ubyte output_semantic_index[VARYING_SLOT_MAX] = {0};
 433
 434    if (stp->Base.arb.IsPositionInvariant)
 435       _mesa_insert_mvp_code(st->ctx, &stp->Base);
 436
 437    st_prepare_vertex_program(stp);
 438
 439    /* ARB_vp: */
 440    if (!stp->glsl_to_tgsi) {
 441       _mesa_remove_output_reads(&stp->Base, PROGRAM_OUTPUT);
 442
 443       /* This determines which states will be updated when the assembly
 444        * shader is bound.
 445        */
 446       stp->affected_states = ST_NEW_VS_STATE |
 447                               ST_NEW_RASTERIZER |
 448                               ST_NEW_VERTEX_ARRAYS;
 449
 450       if (stp->Base.Parameters->NumParameters)
 451          stp->affected_states |= ST_NEW_VS_CONSTANTS;
 452
 453       /* No samplers are allowed in ARB_vp. */
 454    }
 455
 456    /* Get semantic names and indices. */
 457    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 458       if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr)) {
 459          unsigned slot = num_outputs++;
 460          unsigned semantic_name, semantic_index;
 461          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
 462                                       &semantic_name, &semantic_index);
 463          output_semantic_name[slot] = semantic_name;
 464          output_semantic_index[slot] = semantic_index;
 465       }
 466    }
 467    /* pre-setup potentially unused edgeflag output */
 468    output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
 469    output_semantic_index[num_outputs] = 0;
 470
 471    ureg = ureg_create_with_screen(PIPE_SHADER_VERTEX, st->pipe->screen);
 472    if (ureg == NULL)
 473       return false;
 474
 475    if (stp->Base.info.clip_distance_array_size)
 476       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
 477                     stp->Base.info.clip_distance_array_size);
 478    if (stp->Base.info.cull_distance_array_size)
 479       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
 480                     stp->Base.info.cull_distance_array_size);
 481
 482    if (ST_DEBUG & DEBUG_MESA) {
 483       _mesa_print_program(&stp->Base);
 484       _mesa_print_program_parameters(st->ctx, &stp->Base);
 485       debug_printf("\n");
 486    }
 487
 488    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 489
 490    if (stp->glsl_to_tgsi) {
 491       error = st_translate_program(st->ctx,
 492                                    PIPE_SHADER_VERTEX,
 493                                    ureg,
 494                                    stp->glsl_to_tgsi,
 495                                    &stp->Base,
 496                                    /* inputs */
 497                                    stvp->num_inputs,
 498                                    stvp->input_to_index,
 499                                    NULL, /* inputSlotToAttr */
 500                                    NULL, /* input semantic name */
 501                                    NULL, /* input semantic index */
 502                                    NULL, /* interp mode */
 503                                    /* outputs */
 504                                    num_outputs,
 505                                    stvp->result_to_output,
 506                                    output_semantic_name,
 507                                    output_semantic_index);
 508
 509       st_translate_stream_output_info(&stp->Base);
 510
 511       free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
 512    } else
 513       error = st_translate_mesa_program(st->ctx,
 514                                         PIPE_SHADER_VERTEX,
 515                                         ureg,
 516                                         &stp->Base,
 517                                         /* inputs */
 518                                         stvp->num_inputs,
 519                                         stvp->input_to_index,
 520                                         NULL, /* input semantic name */
 521                                         NULL, /* input semantic index */
 522                                         NULL,
 523                                         /* outputs */
 524                                         num_outputs,
 525                                         stvp->result_to_output,
 526                                         output_semantic_name,
 527                                         output_semantic_index);
 528
 529    if (error) {
 530       debug_printf("%s: failed to translate Mesa program:\n", __func__);
 531       _mesa_print_program(&stp->Base);
 532       debug_assert(0);
 533       return false;
 534    }
 535
 536    stp->state.tokens = ureg_get_tokens(ureg, NULL);
 537    ureg_destroy(ureg);
 538
 539    if (stp->glsl_to_tgsi) {
 540       stp->glsl_to_tgsi = NULL;
 541       st_store_ir_in_disk_cache(st, &stp->Base, false);
 542    }
 543
 544    /* Translate to NIR.
 545     *
 546     * This must be done after the translation to TGSI is done, because
 547     * we'll pass the NIR shader to the driver and the TGSI version to
 548     * the draw module for the select/feedback/rasterpos code.
 549     */
 550    if (st->pipe->screen->get_shader_param(st->pipe->screen,
 551                                           PIPE_SHADER_VERTEX,
 552                                           PIPE_SHADER_CAP_PREFERRED_IR)) {
 553       assert(!stp->glsl_to_tgsi);
 554
 555       nir_shader *nir =
 556          st_translate_prog_to_nir(st, &stp->Base, MESA_SHADER_VERTEX);
 557
 558       if (stp->Base.nir)
 559          ralloc_free(stp->Base.nir);
 560       stp->state.type = PIPE_SHADER_IR_NIR;
 561       stp->Base.nir = nir;
 562       return true;
 563    }
 564
 565    return stp->state.tokens != NULL;
 566 }
 567
 568 static const gl_state_index16 depth_range_state[STATE_LENGTH] =
 569    { STATE_DEPTH_RANGE };
 570
 571 static struct st_vp_variant *
 572 st_create_vp_variant(struct st_context *st,
 573                      struct st_program *stvp,
 574                      const struct st_common_variant_key *key)
 575 {
 576    struct st_vp_variant *vpv = CALLOC_STRUCT(st_vp_variant);
 577    struct pipe_context *pipe = st->pipe;
 578    struct pipe_screen *screen = pipe->screen;
 579    struct pipe_shader_state state = {0};
 580
 581    static const gl_state_index16 point_size_state[STATE_LENGTH] =
 582       { STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED, 0 };
 583    struct gl_program_parameter_list *params = stvp->Base.Parameters;
 584
 585    vpv->key = *key;
 586    vpv->num_inputs = ((struct st_vertex_program*)stvp)->num_inputs;
 587
 588    state.stream_output = stvp->state.stream_output;
 589
 590    if (stvp->state.type == PIPE_SHADER_IR_NIR) {
 591       bool finalize = false;
 592
 593       state.type = PIPE_SHADER_IR_NIR;
 594       state.ir.nir = nir_shader_clone(NULL, stvp->Base.nir);
 595       if (key->clamp_color) {
 596          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
 597          finalize = true;
 598       }
 599       if (key->passthrough_edgeflags) {
 600          NIR_PASS_V(state.ir.nir, nir_lower_passthrough_edgeflags);
 601          vpv->num_inputs++;
 602          finalize = true;
 603       }
 604
 605       if (key->lower_point_size) {
 606          _mesa_add_state_reference(params, point_size_state);
 607          NIR_PASS_V(state.ir.nir, nir_lower_point_size_mov,
 608                     point_size_state);
 609          finalize = true;
 610       }
 611
 612       if (key->lower_ucp) {
 613          bool can_compact = screen->get_param(screen,
 614                                               PIPE_CAP_NIR_COMPACT_ARRAYS);
 615
 616          bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;
 617          gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH];
 618          for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
 619             if (use_eye) {
 620                clipplane_state[i][0] = STATE_CLIPPLANE;
 621                clipplane_state[i][1] = i;
 622             } else {
 623                clipplane_state[i][0] = STATE_INTERNAL;
 624                clipplane_state[i][1] = STATE_CLIP_INTERNAL;
 625                clipplane_state[i][2] = i;
 626             }
 627             _mesa_add_state_reference(params, clipplane_state[i]);
 628          }
 629
 630          NIR_PASS_V(state.ir.nir, nir_lower_clip_vs, key->lower_ucp,
 631                     true, can_compact, clipplane_state);
 632          NIR_PASS_V(state.ir.nir, nir_lower_io_to_temporaries,
 633                     nir_shader_get_entrypoint(state.ir.nir), true, false);
 634          NIR_PASS_V(state.ir.nir, nir_lower_global_vars_to_local);
 635          finalize = true;
 636       }
 637
 638       if (finalize || !st->allow_st_finalize_nir_twice) {
 639          st_finalize_nir(st, &stvp->Base, stvp->shader_program, state.ir.nir,
 640                          true);
 641
 642          /* Some of the lowering above may have introduced new varyings */
 643          nir_shader_gather_info(state.ir.nir,
 644                                 nir_shader_get_entrypoint(state.ir.nir));
 645       }
 646
 647       if (ST_DEBUG & DEBUG_PRINT_IR)
 648          nir_print_shader(state.ir.nir, stderr);
 649
 650       if (key->is_draw_shader)
 651          vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
 652       else
 653          vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
 654
 655       return vpv;
 656    }
 657
 658    state.type = PIPE_SHADER_IR_TGSI;
 659    state.tokens = tgsi_dup_tokens(stvp->state.tokens);
 660
 661    /* Emulate features. */
 662    if (key->clamp_color || key->passthrough_edgeflags) {
 663       const struct tgsi_token *tokens;
 664       unsigned flags =
 665          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
 666          (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
 667
 668       tokens = tgsi_emulate(state.tokens, flags);
 669
 670       if (tokens) {
 671          tgsi_free_tokens(state.tokens);
 672          state.tokens = tokens;
 673
 674          if (key->passthrough_edgeflags)
 675             vpv->num_inputs++;
 676       } else
 677          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
 678    }
 679
 680    if (key->lower_depth_clamp) {
 681       unsigned depth_range_const =
 682             _mesa_add_state_reference(params, depth_range_state);
 683
 684       const struct tgsi_token *tokens;
 685       tokens = st_tgsi_lower_depth_clamp(state.tokens, depth_range_const,
 686                                          key->clip_negative_one_to_one);
 687       if (tokens != state.tokens)
 688          tgsi_free_tokens(state.tokens);
 689       state.tokens = tokens;
 690    }
 691
 692    if (ST_DEBUG & DEBUG_PRINT_IR)
 693       tgsi_dump(state.tokens, 0);
 694
 695    if (key->is_draw_shader)
 696       vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
 697    else
 698       vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
 699
 700    return vpv;
 701 }
 702
 703
 704 /**
 705  * Find/create a vertex program variant.
 706  */
 707 struct st_vp_variant *
 708 st_get_vp_variant(struct st_context *st,
 709                   struct st_program *stp,
 710                   const struct st_common_variant_key *key)
 711 {
 712    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 713    struct st_vp_variant *vpv;
 714
 715    /* Search for existing variant */
 716    for (vpv = st_vp_variant(stp->variants); vpv;
 717         vpv = st_vp_variant(vpv->base.next)) {
 718       if (memcmp(&vpv->key, key, sizeof(*key)) == 0) {
 719          break;
 720       }
 721    }
 722
 723    if (!vpv) {
 724       /* create now */
 725       vpv = st_create_vp_variant(st, stp, key);
 726       if (vpv) {
 727          vpv->base.st = key->st;
 728
 729          for (unsigned index = 0; index < vpv->num_inputs; ++index) {
 730             unsigned attr = stvp->index_to_input[index];
 731             if (attr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
 732                continue;
 733             vpv->vert_attrib_mask |= 1u << attr;
 734          }
 735
 736          /* insert into list */
 737          vpv->base.next = stp->variants;
 738          stp->variants = &vpv->base;
 739       }
 740    }
 741
 742    return vpv;
 743 }
 744
 745
 746 /**
 747  * Translate a Mesa fragment shader into a TGSI shader.
 748  */
 749 bool
 750 st_translate_fragment_program(struct st_context *st,
 751                               struct st_program *stfp)
 752 {
 753    /* Non-GLSL programs: */
 754    if (!stfp->glsl_to_tgsi) {
 755       _mesa_remove_output_reads(&stfp->Base, PROGRAM_OUTPUT);
 756       if (st->ctx->Const.GLSLFragCoordIsSysVal)
 757          _mesa_program_fragment_position_to_sysval(&stfp->Base);
 758
 759       /* This determines which states will be updated when the assembly
 760        * shader is bound.
 761        *
 762        * fragment.position and glDrawPixels always use constants.
 763        */
 764       stfp->affected_states = ST_NEW_FS_STATE |
 765                               ST_NEW_SAMPLE_SHADING |
 766                               ST_NEW_FS_CONSTANTS;
 767
 768       if (stfp->ati_fs) {
 769          /* Just set them for ATI_fs unconditionally. */
 770          stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 771                                   ST_NEW_FS_SAMPLERS;
 772       } else {
 773          /* ARB_fp */
 774          if (stfp->Base.SamplersUsed)
 775             stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 776                                      ST_NEW_FS_SAMPLERS;
 777       }
 778
 779       /* Translate to NIR. */
 780       if (!stfp->ati_fs &&
 781           st->pipe->screen->get_shader_param(st->pipe->screen,
 782                                              PIPE_SHADER_FRAGMENT,
 783                                              PIPE_SHADER_CAP_PREFERRED_IR)) {
 784          nir_shader *nir =
 785             st_translate_prog_to_nir(st, &stfp->Base, MESA_SHADER_FRAGMENT);
 786
 787          if (stfp->Base.nir)
 788             ralloc_free(stfp->Base.nir);
 789          stfp->state.type = PIPE_SHADER_IR_NIR;
 790          stfp->Base.nir = nir;
 791          return true;
 792       }
 793    }
 794
 795    ubyte outputMapping[2 * FRAG_RESULT_MAX];
 796    ubyte inputMapping[VARYING_SLOT_MAX];
 797    ubyte inputSlotToAttr[VARYING_SLOT_MAX];
 798    ubyte interpMode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
 799    GLuint attr;
 800    GLbitfield64 inputsRead;
 801    struct ureg_program *ureg;
 802
 803    GLboolean write_all = GL_FALSE;
 804
 805    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
 806    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
 807    uint fs_num_inputs = 0;
 808
 809    ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
 810    ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
 811    uint fs_num_outputs = 0;
 812
 813    memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
 814
 815    /*
 816     * Convert Mesa program inputs to TGSI input register semantics.
 817     */
 818    inputsRead = stfp->Base.info.inputs_read;
 819    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 820       if ((inputsRead & BITFIELD64_BIT(attr)) != 0) {
 821          const GLuint slot = fs_num_inputs++;
 822
 823          inputMapping[attr] = slot;
 824          inputSlotToAttr[slot] = attr;
 825
 826          switch (attr) {
 827          case VARYING_SLOT_POS:
 828             input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
 829             input_semantic_index[slot] = 0;
 830             interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 831             break;
 832          case VARYING_SLOT_COL0:
 833             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 834             input_semantic_index[slot] = 0;
 835             interpMode[slot] = stfp->glsl_to_tgsi ?
 836                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 837             break;
 838          case VARYING_SLOT_COL1:
 839             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 840             input_semantic_index[slot] = 1;
 841             interpMode[slot] = stfp->glsl_to_tgsi ?
 842                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 843             break;
 844          case VARYING_SLOT_FOGC:
 845             input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
 846             input_semantic_index[slot] = 0;
 847             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 848             break;
 849          case VARYING_SLOT_FACE:
 850             input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
 851             input_semantic_index[slot] = 0;
 852             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 853             break;
 854          case VARYING_SLOT_PRIMITIVE_ID:
 855             input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
 856             input_semantic_index[slot] = 0;
 857             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 858             break;
 859          case VARYING_SLOT_LAYER:
 860             input_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
 861             input_semantic_index[slot] = 0;
 862             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 863             break;
 864          case VARYING_SLOT_VIEWPORT:
 865             input_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
 866             input_semantic_index[slot] = 0;
 867             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 868             break;
 869          case VARYING_SLOT_CLIP_DIST0:
 870             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
 871             input_semantic_index[slot] = 0;
 872             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 873             break;
 874          case VARYING_SLOT_CLIP_DIST1:
 875             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
 876             input_semantic_index[slot] = 1;
 877             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 878             break;
 879          case VARYING_SLOT_CULL_DIST0:
 880          case VARYING_SLOT_CULL_DIST1:
 881             /* these should have been lowered by GLSL */
 882             assert(0);
 883             break;
 884             /* In most cases, there is nothing special about these
 885              * inputs, so adopt a convention to use the generic
 886              * semantic name and the mesa VARYING_SLOT_ number as the
 887              * index.
 888              *
 889              * All that is required is that the vertex shader labels
 890              * its own outputs similarly, and that the vertex shader
 891              * generates at least every output required by the
 892              * fragment shader plus fixed-function hardware (such as
 893              * BFC).
 894              *
 895              * However, some drivers may need us to identify the PNTC and TEXi
 896              * varyings if, for example, their capability to replace them with
 897              * sprite coordinates is limited.
 898              */
 899          case VARYING_SLOT_PNTC:
 900             if (st->needs_texcoord_semantic) {
 901                input_semantic_name[slot] = TGSI_SEMANTIC_PCOORD;
 902                input_semantic_index[slot] = 0;
 903                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 904                break;
 905             }
 906             /* fall through */
 907          case VARYING_SLOT_TEX0:
 908          case VARYING_SLOT_TEX1:
 909          case VARYING_SLOT_TEX2:
 910          case VARYING_SLOT_TEX3:
 911          case VARYING_SLOT_TEX4:
 912          case VARYING_SLOT_TEX5:
 913          case VARYING_SLOT_TEX6:
 914          case VARYING_SLOT_TEX7:
 915             if (st->needs_texcoord_semantic) {
 916                input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
 917                input_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
 918                interpMode[slot] = stfp->glsl_to_tgsi ?
 919                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
 920                break;
 921             }
 922             /* fall through */
 923          case VARYING_SLOT_VAR0:
 924          default:
 925             /* Semantic indices should be zero-based because drivers may choose
 926              * to assign a fixed slot determined by that index.
 927              * This is useful because ARB_separate_shader_objects uses location
 928              * qualifiers for linkage, and if the semantic index corresponds to
 929              * these locations, linkage passes in the driver become unecessary.
 930              *
 931              * If needs_texcoord_semantic is true, no semantic indices will be
 932              * consumed for the TEXi varyings, and we can base the locations of
 933              * the user varyings on VAR0.  Otherwise, we use TEX0 as base index.
 934              */
 935             assert(attr >= VARYING_SLOT_VAR0 || attr == VARYING_SLOT_PNTC ||
 936                    (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
 937             input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
 938             input_semantic_index[slot] = st_get_generic_varying_index(st, attr);
 939             if (attr == VARYING_SLOT_PNTC)
 940                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 941             else {
 942                interpMode[slot] = stfp->glsl_to_tgsi ?
 943                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
 944             }
 945             break;
 946          }
 947       }
 948       else {
 949          inputMapping[attr] = -1;
 950       }
 951    }
 952
 953    /*
 954     * Semantics and mapping for outputs
 955     */
 956    GLbitfield64 outputsWritten = stfp->Base.info.outputs_written;
 957
 958    /* if z is written, emit that first */
 959    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
 960       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
 961       fs_output_semantic_index[fs_num_outputs] = 0;
 962       outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
 963       fs_num_outputs++;
 964       outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
 965    }
 966
 967    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
 968       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
 969       fs_output_semantic_index[fs_num_outputs] = 0;
 970       outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
 971       fs_num_outputs++;
 972       outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
 973    }
 974
 975    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
 976       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_SAMPLEMASK;
 977       fs_output_semantic_index[fs_num_outputs] = 0;
 978       outputMapping[FRAG_RESULT_SAMPLE_MASK] = fs_num_outputs;
 979       fs_num_outputs++;
 980       outputsWritten &= ~(1 << FRAG_RESULT_SAMPLE_MASK);
 981    }
 982
 983    /* handle remaining outputs (color) */
 984    for (attr = 0; attr < ARRAY_SIZE(outputMapping); attr++) {
 985       const GLbitfield64 written = attr < FRAG_RESULT_MAX ? outputsWritten :
 986          stfp->Base.SecondaryOutputsWritten;
 987       const unsigned loc = attr % FRAG_RESULT_MAX;
 988
 989       if (written & BITFIELD64_BIT(loc)) {
 990          switch (loc) {
 991          case FRAG_RESULT_DEPTH:
 992          case FRAG_RESULT_STENCIL:
 993          case FRAG_RESULT_SAMPLE_MASK:
 994             /* handled above */
 995             assert(0);
 996             break;
 997          case FRAG_RESULT_COLOR:
 998             write_all = GL_TRUE; /* fallthrough */
 999          default: {
1000             int index;
1001             assert(loc == FRAG_RESULT_COLOR ||
1002                    (FRAG_RESULT_DATA0 <= loc && loc < FRAG_RESULT_MAX));
1003
1004             index = (loc == FRAG_RESULT_COLOR) ? 0 : (loc - FRAG_RESULT_DATA0);
1005
1006             if (attr >= FRAG_RESULT_MAX) {
1007                /* Secondary color for dual source blending. */
1008                assert(index == 0);
1009                index++;
1010             }
1011
1012             fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
1013             fs_output_semantic_index[fs_num_outputs] = index;
1014             outputMapping[attr] = fs_num_outputs;
1015             break;
1016          }
1017          }
1018
1019          fs_num_outputs++;
1020       }
1021    }
1022
1023    ureg = ureg_create_with_screen(PIPE_SHADER_FRAGMENT, st->pipe->screen);
1024    if (ureg == NULL)
1025       return false;
1026
1027    if (ST_DEBUG & DEBUG_MESA) {
1028       _mesa_print_program(&stfp->Base);
1029       _mesa_print_program_parameters(st->ctx, &stfp->Base);
1030       debug_printf("\n");
1031    }
1032    if (write_all == GL_TRUE)
1033       ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1);
1034
1035    if (stfp->Base.info.fs.depth_layout != FRAG_DEPTH_LAYOUT_NONE) {
1036       switch (stfp->Base.info.fs.depth_layout) {
1037       case FRAG_DEPTH_LAYOUT_ANY:
1038          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1039                        TGSI_FS_DEPTH_LAYOUT_ANY);
1040          break;
1041       case FRAG_DEPTH_LAYOUT_GREATER:
1042          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1043                        TGSI_FS_DEPTH_LAYOUT_GREATER);
1044          break;
1045       case FRAG_DEPTH_LAYOUT_LESS:
1046          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1047                        TGSI_FS_DEPTH_LAYOUT_LESS);
1048          break;
1049       case FRAG_DEPTH_LAYOUT_UNCHANGED:
1050          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1051                        TGSI_FS_DEPTH_LAYOUT_UNCHANGED);
1052          break;
1053       default:
1054          assert(0);
1055       }
1056    }
1057
1058    if (stfp->glsl_to_tgsi) {
1059       st_translate_program(st->ctx,
1060                            PIPE_SHADER_FRAGMENT,
1061                            ureg,
1062                            stfp->glsl_to_tgsi,
1063                            &stfp->Base,
1064                            /* inputs */
1065                            fs_num_inputs,
1066                            inputMapping,
1067                            inputSlotToAttr,
1068                            input_semantic_name,
1069                            input_semantic_index,
1070                            interpMode,
1071                            /* outputs */
1072                            fs_num_outputs,
1073                            outputMapping,
1074                            fs_output_semantic_name,
1075                            fs_output_semantic_index);
1076
1077       free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
1078    } else if (stfp->ati_fs)
1079       st_translate_atifs_program(ureg,
1080                                  stfp->ati_fs,
1081                                  &stfp->Base,
1082                                  /* inputs */
1083                                  fs_num_inputs,
1084                                  inputMapping,
1085                                  input_semantic_name,
1086                                  input_semantic_index,
1087                                  interpMode,
1088                                  /* outputs */
1089                                  fs_num_outputs,
1090                                  outputMapping,
1091                                  fs_output_semantic_name,
1092                                  fs_output_semantic_index);
1093    else
1094       st_translate_mesa_program(st->ctx,
1095                                 PIPE_SHADER_FRAGMENT,
1096                                 ureg,
1097                                 &stfp->Base,
1098                                 /* inputs */
1099                                 fs_num_inputs,
1100                                 inputMapping,
1101                                 input_semantic_name,
1102                                 input_semantic_index,
1103                                 interpMode,
1104                                 /* outputs */
1105                                 fs_num_outputs,
1106                                 outputMapping,
1107                                 fs_output_semantic_name,
1108                                 fs_output_semantic_index);
1109
1110    stfp->state.tokens = ureg_get_tokens(ureg, NULL);
1111    ureg_destroy(ureg);
1112
1113    if (stfp->glsl_to_tgsi) {
1114       stfp->glsl_to_tgsi = NULL;
1115       st_store_ir_in_disk_cache(st, &stfp->Base, false);
1116    }
1117
1118    return stfp->state.tokens != NULL;
1119 }
1120
1121 static struct st_fp_variant *
1122 st_create_fp_variant(struct st_context *st,
1123                      struct st_program *stfp,
1124                      const struct st_fp_variant_key *key)
1125 {
1126    struct pipe_context *pipe = st->pipe;
1127    struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
1128    struct pipe_shader_state state = {0};
1129    struct gl_program_parameter_list *params = stfp->Base.Parameters;
1130    static const gl_state_index16 texcoord_state[STATE_LENGTH] =
1131       { STATE_INTERNAL, STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
1132    static const gl_state_index16 scale_state[STATE_LENGTH] =
1133       { STATE_INTERNAL, STATE_PT_SCALE };
1134    static const gl_state_index16 bias_state[STATE_LENGTH] =
1135       { STATE_INTERNAL, STATE_PT_BIAS };
1136    static const gl_state_index16 alpha_ref_state[STATE_LENGTH] =
1137       { STATE_INTERNAL, STATE_ALPHA_REF };
1138
1139    if (!variant)
1140       return NULL;
1141
1142    if (stfp->state.type == PIPE_SHADER_IR_NIR) {
1143       bool finalize = false;
1144
1145       state.type = PIPE_SHADER_IR_NIR;
1146       state.ir.nir = nir_shader_clone(NULL, stfp->Base.nir);
1147
1148       if (key->clamp_color) {
1149          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1150          finalize = true;
1151       }
1152
1153       if (key->lower_flatshade) {
1154          NIR_PASS_V(state.ir.nir, nir_lower_flatshade);
1155          finalize = true;
1156       }
1157
1158       if (key->lower_alpha_func != COMPARE_FUNC_NEVER) {
1159          _mesa_add_state_reference(params, alpha_ref_state);
1160          NIR_PASS_V(state.ir.nir, nir_lower_alpha_test, key->lower_alpha_func,
1161                     false, alpha_ref_state);
1162          finalize = true;
1163       }
1164
1165       if (key->lower_two_sided_color) {
1166          NIR_PASS_V(state.ir.nir, nir_lower_two_sided_color);
1167          finalize = true;
1168       }
1169
1170       if (key->persample_shading) {
1171           nir_shader *shader = state.ir.nir;
1172           nir_foreach_variable(var, &shader->inputs)
1173              var->data.sample = true;
1174           finalize = true;
1175       }
1176
1177       assert(!(key->bitmap && key->drawpixels));
1178
1179       /* glBitmap */
1180       if (key->bitmap) {
1181          nir_lower_bitmap_options options = {0};
1182
1183          variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1184          options.sampler = variant->bitmap_sampler;
1185          options.swizzle_xxxx = st->bitmap.tex_format == PIPE_FORMAT_R8_UNORM;
1186
1187          NIR_PASS_V(state.ir.nir, nir_lower_bitmap, &options);
1188          finalize = true;
1189       }
1190
1191       /* glDrawPixels (color only) */
1192       if (key->drawpixels) {
1193          nir_lower_drawpixels_options options = {{0}};
1194          unsigned samplers_used = stfp->Base.SamplersUsed;
1195
1196          /* Find the first unused slot. */
1197          variant->drawpix_sampler = ffs(~samplers_used) - 1;
1198          options.drawpix_sampler = variant->drawpix_sampler;
1199          samplers_used |= (1 << variant->drawpix_sampler);
1200
1201          options.pixel_maps = key->pixelMaps;
1202          if (key->pixelMaps) {
1203             variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1204             options.pixelmap_sampler = variant->pixelmap_sampler;
1205          }
1206
1207          options.scale_and_bias = key->scaleAndBias;
1208          if (key->scaleAndBias) {
1209             _mesa_add_state_reference(params, scale_state);
1210             memcpy(options.scale_state_tokens, scale_state,
1211                    sizeof(options.scale_state_tokens));
1212             _mesa_add_state_reference(params, bias_state);
1213             memcpy(options.bias_state_tokens, bias_state,
1214                    sizeof(options.bias_state_tokens));
1215          }
1216
1217          _mesa_add_state_reference(params, texcoord_state);
1218          memcpy(options.texcoord_state_tokens, texcoord_state,
1219                 sizeof(options.texcoord_state_tokens));
1220
1221          NIR_PASS_V(state.ir.nir, nir_lower_drawpixels, &options);
1222          finalize = true;
1223       }
1224
1225       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1226                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1227                    key->external.lower_ayuv || key->external.lower_xyuv)) {
1228          nir_lower_tex_options options = {0};
1229          options.lower_y_uv_external = key->external.lower_nv12;
1230          options.lower_y_u_v_external = key->external.lower_iyuv;
1231          options.lower_xy_uxvx_external = key->external.lower_xy_uxvx;
1232          options.lower_yx_xuxv_external = key->external.lower_yx_xuxv;
1233          options.lower_ayuv_external = key->external.lower_ayuv;
1234          options.lower_xyuv_external = key->external.lower_xyuv;
1235          NIR_PASS_V(state.ir.nir, nir_lower_tex, &options);
1236          finalize = true;
1237       }
1238
1239       if (finalize || !st->allow_st_finalize_nir_twice) {
1240          st_finalize_nir(st, &stfp->Base, stfp->shader_program, state.ir.nir,
1241                          false);
1242       }
1243
1244       /* This pass needs to happen *after* nir_lower_sampler */
1245       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1246                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1247          NIR_PASS_V(state.ir.nir, st_nir_lower_tex_src_plane,
1248                     ~stfp->Base.SamplersUsed,
1249                     key->external.lower_nv12 || key->external.lower_xy_uxvx ||
1250                        key->external.lower_yx_xuxv,
1251                     key->external.lower_iyuv);
1252          finalize = true;
1253       }
1254
1255       if (finalize || !st->allow_st_finalize_nir_twice) {
1256          /* Some of the lowering above may have introduced new varyings */
1257          nir_shader_gather_info(state.ir.nir,
1258                                 nir_shader_get_entrypoint(state.ir.nir));
1259
1260          struct pipe_screen *screen = pipe->screen;
1261          if (screen->finalize_nir)
1262             screen->finalize_nir(screen, state.ir.nir, false);
1263       }
1264
1265       if (ST_DEBUG & DEBUG_PRINT_IR)
1266          nir_print_shader(state.ir.nir, stderr);
1267
1268       variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1269       variant->key = *key;
1270
1271       return variant;
1272    }
1273
1274    state.tokens = stfp->state.tokens;
1275
1276    assert(!(key->bitmap && key->drawpixels));
1277
1278    /* Fix texture targets and add fog for ATI_fs */
1279    if (stfp->ati_fs) {
1280       const struct tgsi_token *tokens = st_fixup_atifs(state.tokens, key);
1281
1282       if (tokens)
1283          state.tokens = tokens;
1284       else
1285          fprintf(stderr, "mesa: cannot post-process ATI_fs\n");
1286    }
1287
1288    /* Emulate features. */
1289    if (key->clamp_color || key->persample_shading) {
1290       const struct tgsi_token *tokens;
1291       unsigned flags =
1292          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
1293          (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
1294
1295       tokens = tgsi_emulate(state.tokens, flags);
1296
1297       if (tokens) {
1298          if (state.tokens != stfp->state.tokens)
1299             tgsi_free_tokens(state.tokens);
1300          state.tokens = tokens;
1301       } else
1302          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
1303    }
1304
1305    /* glBitmap */
1306    if (key->bitmap) {
1307       const struct tgsi_token *tokens;
1308
1309       variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1310
1311       tokens = st_get_bitmap_shader(state.tokens,
1312                                     st->internal_target,
1313                                     variant->bitmap_sampler,
1314                                     st->needs_texcoord_semantic,
1315                                     st->bitmap.tex_format ==
1316                                     PIPE_FORMAT_R8_UNORM);
1317
1318       if (tokens) {
1319          if (state.tokens != stfp->state.tokens)
1320             tgsi_free_tokens(state.tokens);
1321          state.tokens = tokens;
1322       } else
1323          fprintf(stderr, "mesa: cannot create a shader for glBitmap\n");
1324    }
1325
1326    /* glDrawPixels (color only) */
1327    if (key->drawpixels) {
1328       const struct tgsi_token *tokens;
1329       unsigned scale_const = 0, bias_const = 0, texcoord_const = 0;
1330
1331       /* Find the first unused slot. */
1332       variant->drawpix_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1333
1334       if (key->pixelMaps) {
1335          unsigned samplers_used = stfp->Base.SamplersUsed |
1336                                   (1 << variant->drawpix_sampler);
1337
1338          variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1339       }
1340
1341       if (key->scaleAndBias) {
1342          scale_const = _mesa_add_state_reference(params, scale_state);
1343          bias_const = _mesa_add_state_reference(params, bias_state);
1344       }
1345
1346       texcoord_const = _mesa_add_state_reference(params, texcoord_state);
1347
1348       tokens = st_get_drawpix_shader(state.tokens,
1349                                      st->needs_texcoord_semantic,
1350                                      key->scaleAndBias, scale_const,
1351                                      bias_const, key->pixelMaps,
1352                                      variant->drawpix_sampler,
1353                                      variant->pixelmap_sampler,
1354                                      texcoord_const, st->internal_target);
1355
1356       if (tokens) {
1357          if (state.tokens != stfp->state.tokens)
1358             tgsi_free_tokens(state.tokens);
1359          state.tokens = tokens;
1360       } else
1361          fprintf(stderr, "mesa: cannot create a shader for glDrawPixels\n");
1362    }
1363
1364    if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1365                 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1366       const struct tgsi_token *tokens;
1367
1368       /* samplers inserted would conflict, but this should be unpossible: */
1369       assert(!(key->bitmap || key->drawpixels));
1370
1371       tokens = st_tgsi_lower_yuv(state.tokens,
1372                                  ~stfp->Base.SamplersUsed,
1373                                  key->external.lower_nv12 ||
1374                                     key->external.lower_xy_uxvx ||
1375                                     key->external.lower_yx_xuxv,
1376                                  key->external.lower_iyuv);
1377       if (tokens) {
1378          if (state.tokens != stfp->state.tokens)
1379             tgsi_free_tokens(state.tokens);
1380          state.tokens = tokens;
1381       } else {
1382          fprintf(stderr, "mesa: cannot create a shader for samplerExternalOES\n");
1383       }
1384    }
1385
1386    if (key->lower_depth_clamp) {
1387       unsigned depth_range_const = _mesa_add_state_reference(params, depth_range_state);
1388
1389       const struct tgsi_token *tokens;
1390       tokens = st_tgsi_lower_depth_clamp_fs(state.tokens, depth_range_const);
1391       if (state.tokens != stfp->state.tokens)
1392          tgsi_free_tokens(state.tokens);
1393       state.tokens = tokens;
1394    }
1395
1396    if (ST_DEBUG & DEBUG_PRINT_IR)
1397       tgsi_dump(state.tokens, 0);
1398
1399    /* fill in variant */
1400    variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1401    variant->key = *key;
1402
1403    if (state.tokens != stfp->state.tokens)
1404       tgsi_free_tokens(state.tokens);
1405    return variant;
1406 }
1407
1408 /**
1409  * Translate fragment program if needed.
1410  */
1411 struct st_fp_variant *
1412 st_get_fp_variant(struct st_context *st,
1413                   struct st_program *stfp,
1414                   const struct st_fp_variant_key *key)
1415 {
1416    struct st_fp_variant *fpv;
1417
1418    /* Search for existing variant */
1419    for (fpv = st_fp_variant(stfp->variants); fpv;
1420         fpv = st_fp_variant(fpv->base.next)) {
1421       if (memcmp(&fpv->key, key, sizeof(*key)) == 0) {
1422          break;
1423       }
1424    }
1425
1426    if (!fpv) {
1427       /* create new */
1428       fpv = st_create_fp_variant(st, stfp, key);
1429       if (fpv) {
1430          fpv->base.st = key->st;
1431
1432          if (key->bitmap || key->drawpixels) {
1433             /* Regular variants should always come before the
1434              * bitmap & drawpixels variants, (unless there
1435              * are no regular variants) so that
1436              * st_update_fp can take a fast path when
1437              * shader_has_one_variant is set.
1438              */
1439             if (!stfp->variants) {
1440                stfp->variants = &fpv->base;
1441             } else {
1442                /* insert into list after the first one */
1443                fpv->base.next = stfp->variants->next;
1444                stfp->variants->next = &fpv->base;
1445             }
1446          } else {
1447             /* insert into list */
1448             fpv->base.next = stfp->variants;
1449             stfp->variants = &fpv->base;
1450          }
1451       }
1452    }
1453
1454    return fpv;
1455 }
1456
1457 /**
1458  * Translate a program. This is common code for geometry and tessellation
1459  * shaders.
1460  */
1461 bool
1462 st_translate_common_program(struct st_context *st,
1463                             struct st_program *stp)
1464 {
1465    struct gl_program *prog = &stp->Base;
1466    enum pipe_shader_type stage =
1467       pipe_shader_type_from_mesa(stp->Base.info.stage);
1468    struct ureg_program *ureg = ureg_create_with_screen(stage, st->pipe->screen);
1469
1470    if (ureg == NULL)
1471       return false;
1472
1473    switch (stage) {
1474    case PIPE_SHADER_TESS_CTRL:
1475       ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT,
1476                     stp->Base.info.tess.tcs_vertices_out);
1477       break;
1478
1479    case PIPE_SHADER_TESS_EVAL:
1480       if (stp->Base.info.tess.primitive_mode == GL_ISOLINES)
1481          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, GL_LINES);
1482       else
1483          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE,
1484                        stp->Base.info.tess.primitive_mode);
1485
1486       STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL);
1487       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 ==
1488                     PIPE_TESS_SPACING_FRACTIONAL_ODD);
1489       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 ==
1490                     PIPE_TESS_SPACING_FRACTIONAL_EVEN);
1491
1492       ureg_property(ureg, TGSI_PROPERTY_TES_SPACING,
1493                     (stp->Base.info.tess.spacing + 1) % 3);
1494
1495       ureg_property(ureg, TGSI_PROPERTY_TES_VERTEX_ORDER_CW,
1496                     !stp->Base.info.tess.ccw);
1497       ureg_property(ureg, TGSI_PROPERTY_TES_POINT_MODE,
1498                     stp->Base.info.tess.point_mode);
1499       break;
1500
1501    case PIPE_SHADER_GEOMETRY:
1502       ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM,
1503                     stp->Base.info.gs.input_primitive);
1504       ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM,
1505                     stp->Base.info.gs.output_primitive);
1506       ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
1507                     stp->Base.info.gs.vertices_out);
1508       ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS,
1509                     stp->Base.info.gs.invocations);
1510       break;
1511
1512    default:
1513       break;
1514    }
1515
1516    ubyte inputSlotToAttr[VARYING_SLOT_TESS_MAX];
1517    ubyte inputMapping[VARYING_SLOT_TESS_MAX];
1518    ubyte outputMapping[VARYING_SLOT_TESS_MAX];
1519    GLuint attr;
1520
1521    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
1522    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
1523    uint num_inputs = 0;
1524
1525    ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
1526    ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
1527    uint num_outputs = 0;
1528
1529    GLint i;
1530
1531    memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr));
1532    memset(inputMapping, 0, sizeof(inputMapping));
1533    memset(outputMapping, 0, sizeof(outputMapping));
1534    memset(&stp->state, 0, sizeof(stp->state));
1535
1536    if (prog->info.clip_distance_array_size)
1537       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
1538                     prog->info.clip_distance_array_size);
1539    if (prog->info.cull_distance_array_size)
1540       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
1541                     prog->info.cull_distance_array_size);
1542
1543    /*
1544     * Convert Mesa program inputs to TGSI input register semantics.
1545     */
1546    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1547       if ((prog->info.inputs_read & BITFIELD64_BIT(attr)) == 0)
1548          continue;
1549
1550       unsigned slot = num_inputs++;
1551
1552       inputMapping[attr] = slot;
1553       inputSlotToAttr[slot] = attr;
1554
1555       unsigned semantic_name, semantic_index;
1556       tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1557                                    &semantic_name, &semantic_index);
1558       input_semantic_name[slot] = semantic_name;
1559       input_semantic_index[slot] = semantic_index;
1560    }
1561
1562    /* Also add patch inputs. */
1563    for (attr = 0; attr < 32; attr++) {
1564       if (prog->info.patch_inputs_read & (1u << attr)) {
1565          GLuint slot = num_inputs++;
1566          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1567
1568          inputMapping[patch_attr] = slot;
1569          inputSlotToAttr[slot] = patch_attr;
1570          input_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1571          input_semantic_index[slot] = attr;
1572       }
1573    }
1574
1575    /* initialize output semantics to defaults */
1576    for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
1577       output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
1578       output_semantic_index[i] = 0;
1579    }
1580
1581    /*
1582     * Determine number of outputs, the (default) output register
1583     * mapping and the semantic information for each output.
1584     */
1585    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1586       if (prog->info.outputs_written & BITFIELD64_BIT(attr)) {
1587          GLuint slot = num_outputs++;
1588
1589          outputMapping[attr] = slot;
1590
1591          unsigned semantic_name, semantic_index;
1592          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1593                                       &semantic_name, &semantic_index);
1594          output_semantic_name[slot] = semantic_name;
1595          output_semantic_index[slot] = semantic_index;
1596       }
1597    }
1598
1599    /* Also add patch outputs. */
1600    for (attr = 0; attr < 32; attr++) {
1601       if (prog->info.patch_outputs_written & (1u << attr)) {
1602          GLuint slot = num_outputs++;
1603          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1604
1605          outputMapping[patch_attr] = slot;
1606          output_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1607          output_semantic_index[slot] = attr;
1608       }
1609    }
1610
1611    st_translate_program(st->ctx,
1612                         stage,
1613                         ureg,
1614                         stp->glsl_to_tgsi,
1615                         prog,
1616                         /* inputs */
1617                         num_inputs,
1618                         inputMapping,
1619                         inputSlotToAttr,
1620                         input_semantic_name,
1621                         input_semantic_index,
1622                         NULL,
1623                         /* outputs */
1624                         num_outputs,
1625                         outputMapping,
1626                         output_semantic_name,
1627                         output_semantic_index);
1628
1629    stp->state.tokens = ureg_get_tokens(ureg, NULL);
1630
1631    ureg_destroy(ureg);
1632
1633    st_translate_stream_output_info(prog);
1634
1635    st_store_ir_in_disk_cache(st, prog, false);
1636
1637    if (ST_DEBUG & DEBUG_PRINT_IR && ST_DEBUG & DEBUG_MESA)
1638       _mesa_print_program(prog);
1639
1640    free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
1641    stp->glsl_to_tgsi = NULL;
1642    return true;
1643 }
1644
1645
1646 /**
1647  * Get/create a basic program variant.
1648  */
1649 struct st_variant *
1650 st_get_common_variant(struct st_context *st,
1651                       struct st_program *prog,
1652                       const struct st_common_variant_key *key)
1653 {
1654    struct pipe_context *pipe = st->pipe;
1655    struct st_variant *v;
1656    struct pipe_shader_state state = {0};
1657
1658    /* Search for existing variant */
1659    for (v = prog->variants; v; v = v->next) {
1660       if (memcmp(&st_common_variant(v)->key, key, sizeof(*key)) == 0)
1661          break;
1662    }
1663
1664    if (!v) {
1665       /* create new */
1666       v = (struct st_variant*)CALLOC_STRUCT(st_common_variant);
1667       if (v) {
1668          if (prog->state.type == PIPE_SHADER_IR_NIR) {
1669             bool finalize = false;
1670
1671             state.type = PIPE_SHADER_IR_NIR;
1672             state.ir.nir = nir_shader_clone(NULL, prog->Base.nir);
1673
1674             if (key->clamp_color) {
1675                NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1676                finalize = true;
1677             }
1678
1679             state.stream_output = prog->state.stream_output;
1680
1681             if (finalize || !st->allow_st_finalize_nir_twice) {
1682                st_finalize_nir(st, &prog->Base, prog->shader_program,
1683                                state.ir.nir, true);
1684             }
1685
1686             if (ST_DEBUG & DEBUG_PRINT_IR)
1687                nir_print_shader(state.ir.nir, stderr);
1688          } else {
1689             if (key->lower_depth_clamp) {
1690                struct gl_program_parameter_list *params = prog->Base.Parameters;
1691
1692                unsigned depth_range_const =
1693                      _mesa_add_state_reference(params, depth_range_state);
1694
1695                const struct tgsi_token *tokens;
1696                tokens =
1697                      st_tgsi_lower_depth_clamp(prog->state.tokens,
1698                                                depth_range_const,
1699                                                key->clip_negative_one_to_one);
1700
1701                if (tokens != prog->state.tokens)
1702                   tgsi_free_tokens(prog->state.tokens);
1703
1704                prog->state.tokens = tokens;
1705             }
1706             state = prog->state;
1707
1708             if (ST_DEBUG & DEBUG_PRINT_IR)
1709                tgsi_dump(state.tokens, 0);
1710          }
1711          /* fill in new variant */
1712          switch (prog->Base.info.stage) {
1713          case MESA_SHADER_TESS_CTRL:
1714             v->driver_shader = pipe->create_tcs_state(pipe, &state);
1715             break;
1716          case MESA_SHADER_TESS_EVAL:
1717             v->driver_shader = pipe->create_tes_state(pipe, &state);
1718             break;
1719          case MESA_SHADER_GEOMETRY:
1720             v->driver_shader = pipe->create_gs_state(pipe, &state);
1721             break;
1722          case MESA_SHADER_COMPUTE: {
1723             struct pipe_compute_state cs = {0};
1724             cs.ir_type = state.type;
1725             cs.req_local_mem = prog->Base.info.cs.shared_size;
1726
1727             if (state.type == PIPE_SHADER_IR_NIR)
1728                cs.prog = state.ir.nir;
1729             else
1730                cs.prog = state.tokens;
1731
1732             v->driver_shader = pipe->create_compute_state(pipe, &cs);
1733             break;
1734          }
1735          default:
1736             assert(!"unhandled shader type");
1737             free(v);
1738             return NULL;
1739          }
1740
1741          st_common_variant(v)->key = *key;
1742          v->st = key->st;
1743
1744          /* insert into list */
1745          v->next = prog->variants;
1746          prog->variants = v;
1747       }
1748    }
1749
1750    return v;
1751 }
1752
1753
1754 /**
1755  * Vert/Geom/Frag programs have per-context variants.  Free all the
1756  * variants attached to the given program which match the given context.
1757  */
1758 static void
1759 destroy_program_variants(struct st_context *st, struct gl_program *target)
1760 {
1761    if (!target || target == &_mesa_DummyProgram)
1762       return;
1763
1764    struct st_program *p = st_program(target);
1765    struct st_variant *v, **prevPtr = &p->variants;
1766
1767    for (v = p->variants; v; ) {
1768       struct st_variant *next = v->next;
1769       if (v->st == st) {
1770          /* unlink from list */
1771          *prevPtr = next;
1772          /* destroy this variant */
1773          delete_variant(st, v, target->Target);
1774       }
1775       else {
1776          prevPtr = &v->next;
1777       }
1778       v = next;
1779    }
1780 }
1781
1782
1783 /**
1784  * Callback for _mesa_HashWalk.  Free all the shader's program variants
1785  * which match the given context.
1786  */
1787 static void
1788 destroy_shader_program_variants_cb(GLuint key, void *data, void *userData)
1789 {
1790    struct st_context *st = (struct st_context *) userData;
1791    struct gl_shader *shader = (struct gl_shader *) data;
1792
1793    switch (shader->Type) {
1794    case GL_SHADER_PROGRAM_MESA:
1795       {
1796          struct gl_shader_program *shProg = (struct gl_shader_program *) data;
1797          GLuint i;
1798
1799          for (i = 0; i < ARRAY_SIZE(shProg->_LinkedShaders); i++) {
1800             if (shProg->_LinkedShaders[i])
1801                destroy_program_variants(st, shProg->_LinkedShaders[i]->Program);
1802          }
1803       }
1804       break;
1805    case GL_VERTEX_SHADER:
1806    case GL_FRAGMENT_SHADER:
1807    case GL_GEOMETRY_SHADER:
1808    case GL_TESS_CONTROL_SHADER:
1809    case GL_TESS_EVALUATION_SHADER:
1810    case GL_COMPUTE_SHADER:
1811       break;
1812    default:
1813       assert(0);
1814    }
1815 }
1816
1817
1818 /**
1819  * Callback for _mesa_HashWalk.  Free all the program variants which match
1820  * the given context.
1821  */
1822 static void
1823 destroy_program_variants_cb(GLuint key, void *data, void *userData)
1824 {
1825    struct st_context *st = (struct st_context *) userData;
1826    struct gl_program *program = (struct gl_program *) data;
1827    destroy_program_variants(st, program);
1828 }
1829
1830
1831 /**
1832  * Walk over all shaders and programs to delete any variants which
1833  * belong to the given context.
1834  * This is called during context tear-down.
1835  */
1836 void
1837 st_destroy_program_variants(struct st_context *st)
1838 {
1839    /* If shaders can be shared with other contexts, the last context will
1840     * call DeleteProgram on all shaders, releasing everything.
1841     */
1842    if (st->has_shareable_shaders)
1843       return;
1844
1845    /* ARB vert/frag program */
1846    _mesa_HashWalk(st->ctx->Shared->Programs,
1847                   destroy_program_variants_cb, st);
1848
1849    /* GLSL vert/frag/geom shaders */
1850    _mesa_HashWalk(st->ctx->Shared->ShaderObjects,
1851                   destroy_shader_program_variants_cb, st);
1852 }
1853
1854
1855 /**
1856  * Compile one shader variant.
1857  */
1858 static void
1859 st_precompile_shader_variant(struct st_context *st,
1860                              struct gl_program *prog)
1861 {
1862    switch (prog->Target) {
1863    case GL_VERTEX_PROGRAM_ARB: {
1864       struct st_program *p = (struct st_program *)prog;
1865       struct st_common_variant_key key;
1866
1867       memset(&key, 0, sizeof(key));
1868
1869       key.st = st->has_shareable_shaders ? NULL : st;
1870       st_get_vp_variant(st, p, &key);
1871       break;
1872    }
1873
1874    case GL_FRAGMENT_PROGRAM_ARB: {
1875       struct st_program *p = (struct st_program *)prog;
1876       struct st_fp_variant_key key;
1877
1878       memset(&key, 0, sizeof(key));
1879
1880       key.st = st->has_shareable_shaders ? NULL : st;
1881       st_get_fp_variant(st, p, &key);
1882       break;
1883    }
1884
1885    case GL_TESS_CONTROL_PROGRAM_NV:
1886    case GL_TESS_EVALUATION_PROGRAM_NV:
1887    case GL_GEOMETRY_PROGRAM_NV:
1888    case GL_COMPUTE_PROGRAM_NV: {
1889       struct st_program *p = st_program(prog);
1890       struct st_common_variant_key key;
1891
1892       memset(&key, 0, sizeof(key));
1893
1894       key.st = st->has_shareable_shaders ? NULL : st;
1895       st_get_common_variant(st, p, &key);
1896       break;
1897    }
1898
1899    default:
1900       assert(0);
1901    }
1902 }
1903
1904 void
1905 st_finalize_program(struct st_context *st, struct gl_program *prog)
1906 {
1907    if (st->current_program[prog->info.stage] == prog) {
1908       if (prog->info.stage == MESA_SHADER_VERTEX)
1909          st->dirty |= ST_NEW_VERTEX_PROGRAM(st, (struct st_program *)prog);
1910       else
1911          st->dirty |= ((struct st_program *)prog)->affected_states;
1912    }
1913
1914    if (prog->nir)
1915       nir_sweep(prog->nir);
1916
1917    /* Create Gallium shaders now instead of on demand. */
1918    if (ST_DEBUG & DEBUG_PRECOMPILE ||
1919        st->shader_has_one_variant[prog->info.stage])
1920       st_precompile_shader_variant(st, prog);
1921 }