src/mesa/state_tracker/st_program.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   *   Brian Paul
  31   */
  32
  33
  34 #include "main/errors.h"
  35 #include "main/imports.h"
  36 #include "main/hash.h"
  37 #include "main/mtypes.h"
  38 #include "program/prog_parameter.h"
  39 #include "program/prog_print.h"
  40 #include "program/prog_to_nir.h"
  41 #include "program/programopt.h"
  42
  43 #include "compiler/nir/nir.h"
  44
  45 #include "pipe/p_context.h"
  46 #include "pipe/p_defines.h"
  47 #include "pipe/p_shader_tokens.h"
  48 #include "draw/draw_context.h"
  49 #include "tgsi/tgsi_dump.h"
  50 #include "tgsi/tgsi_emulate.h"
  51 #include "tgsi/tgsi_parse.h"
  52 #include "tgsi/tgsi_ureg.h"
  53
  54 #include "st_debug.h"
  55 #include "st_cb_bitmap.h"
  56 #include "st_cb_drawpixels.h"
  57 #include "st_context.h"
  58 #include "st_tgsi_lower_depth_clamp.h"
  59 #include "st_tgsi_lower_yuv.h"
  60 #include "st_program.h"
  61 #include "st_mesa_to_tgsi.h"
  62 #include "st_atifs_to_tgsi.h"
  63 #include "st_nir.h"
  64 #include "st_shader_cache.h"
  65 #include "st_util.h"
  66 #include "cso_cache/cso_context.h"
  67
  68
  69
  70 static void
  71 set_affected_state_flags(uint64_t *states,
  72                          struct gl_program *prog,
  73                          uint64_t new_constants,
  74                          uint64_t new_sampler_views,
  75                          uint64_t new_samplers,
  76                          uint64_t new_images,
  77                          uint64_t new_ubos,
  78                          uint64_t new_ssbos,
  79                          uint64_t new_atomics)
  80 {
  81    if (prog->Parameters->NumParameters)
  82       *states |= new_constants;
  83
  84    if (prog->info.num_textures)
  85       *states |= new_sampler_views | new_samplers;
  86
  87    if (prog->info.num_images)
  88       *states |= new_images;
  89
  90    if (prog->info.num_ubos)
  91       *states |= new_ubos;
  92
  93    if (prog->info.num_ssbos)
  94       *states |= new_ssbos;
  95
  96    if (prog->info.num_abos)
  97       *states |= new_atomics;
  98 }
  99
 100 /**
 101  * This determines which states will be updated when the shader is bound.
 102  */
 103 void
 104 st_set_prog_affected_state_flags(struct gl_program *prog)
 105 {
 106    uint64_t *states;
 107
 108    switch (prog->info.stage) {
 109    case MESA_SHADER_VERTEX:
 110       states = &((struct st_program*)prog)->affected_states;
 111
 112       *states = ST_NEW_VS_STATE |
 113                 ST_NEW_RASTERIZER |
 114                 ST_NEW_VERTEX_ARRAYS;
 115
 116       set_affected_state_flags(states, prog,
 117                                ST_NEW_VS_CONSTANTS,
 118                                ST_NEW_VS_SAMPLER_VIEWS,
 119                                ST_NEW_VS_SAMPLERS,
 120                                ST_NEW_VS_IMAGES,
 121                                ST_NEW_VS_UBOS,
 122                                ST_NEW_VS_SSBOS,
 123                                ST_NEW_VS_ATOMICS);
 124       break;
 125
 126    case MESA_SHADER_TESS_CTRL:
 127       states = &(st_program(prog))->affected_states;
 128
 129       *states = ST_NEW_TCS_STATE;
 130
 131       set_affected_state_flags(states, prog,
 132                                ST_NEW_TCS_CONSTANTS,
 133                                ST_NEW_TCS_SAMPLER_VIEWS,
 134                                ST_NEW_TCS_SAMPLERS,
 135                                ST_NEW_TCS_IMAGES,
 136                                ST_NEW_TCS_UBOS,
 137                                ST_NEW_TCS_SSBOS,
 138                                ST_NEW_TCS_ATOMICS);
 139       break;
 140
 141    case MESA_SHADER_TESS_EVAL:
 142       states = &(st_program(prog))->affected_states;
 143
 144       *states = ST_NEW_TES_STATE |
 145                 ST_NEW_RASTERIZER;
 146
 147       set_affected_state_flags(states, prog,
 148                                ST_NEW_TES_CONSTANTS,
 149                                ST_NEW_TES_SAMPLER_VIEWS,
 150                                ST_NEW_TES_SAMPLERS,
 151                                ST_NEW_TES_IMAGES,
 152                                ST_NEW_TES_UBOS,
 153                                ST_NEW_TES_SSBOS,
 154                                ST_NEW_TES_ATOMICS);
 155       break;
 156
 157    case MESA_SHADER_GEOMETRY:
 158       states = &(st_program(prog))->affected_states;
 159
 160       *states = ST_NEW_GS_STATE |
 161                 ST_NEW_RASTERIZER;
 162
 163       set_affected_state_flags(states, prog,
 164                                ST_NEW_GS_CONSTANTS,
 165                                ST_NEW_GS_SAMPLER_VIEWS,
 166                                ST_NEW_GS_SAMPLERS,
 167                                ST_NEW_GS_IMAGES,
 168                                ST_NEW_GS_UBOS,
 169                                ST_NEW_GS_SSBOS,
 170                                ST_NEW_GS_ATOMICS);
 171       break;
 172
 173    case MESA_SHADER_FRAGMENT:
 174       states = &((struct st_program*)prog)->affected_states;
 175
 176       /* gl_FragCoord and glDrawPixels always use constants. */
 177       *states = ST_NEW_FS_STATE |
 178                 ST_NEW_SAMPLE_SHADING |
 179                 ST_NEW_FS_CONSTANTS;
 180
 181       set_affected_state_flags(states, prog,
 182                                ST_NEW_FS_CONSTANTS,
 183                                ST_NEW_FS_SAMPLER_VIEWS,
 184                                ST_NEW_FS_SAMPLERS,
 185                                ST_NEW_FS_IMAGES,
 186                                ST_NEW_FS_UBOS,
 187                                ST_NEW_FS_SSBOS,
 188                                ST_NEW_FS_ATOMICS);
 189       break;
 190
 191    case MESA_SHADER_COMPUTE:
 192       states = &((struct st_program*)prog)->affected_states;
 193
 194       *states = ST_NEW_CS_STATE;
 195
 196       set_affected_state_flags(states, prog,
 197                                ST_NEW_CS_CONSTANTS,
 198                                ST_NEW_CS_SAMPLER_VIEWS,
 199                                ST_NEW_CS_SAMPLERS,
 200                                ST_NEW_CS_IMAGES,
 201                                ST_NEW_CS_UBOS,
 202                                ST_NEW_CS_SSBOS,
 203                                ST_NEW_CS_ATOMICS);
 204       break;
 205
 206    default:
 207       unreachable("unhandled shader stage");
 208    }
 209 }
 210
 211
 212 /**
 213  * Delete a shader variant.  Note the caller must unlink the variant from
 214  * the linked list.
 215  */
 216 static void
 217 delete_variant(struct st_context *st, struct st_variant *v, GLenum target)
 218 {
 219    if (v->driver_shader) {
 220       if (st->has_shareable_shaders || v->st == st) {
 221          /* The shader's context matches the calling context, or we
 222           * don't care.
 223           */
 224          switch (target) {
 225          case GL_VERTEX_PROGRAM_ARB:
 226             cso_delete_vertex_shader(st->cso_context, v->driver_shader);
 227             break;
 228          case GL_TESS_CONTROL_PROGRAM_NV:
 229             cso_delete_tessctrl_shader(st->cso_context, v->driver_shader);
 230             break;
 231          case GL_TESS_EVALUATION_PROGRAM_NV:
 232             cso_delete_tesseval_shader(st->cso_context, v->driver_shader);
 233             break;
 234          case GL_GEOMETRY_PROGRAM_NV:
 235             cso_delete_geometry_shader(st->cso_context, v->driver_shader);
 236             break;
 237          case GL_FRAGMENT_PROGRAM_ARB:
 238             cso_delete_fragment_shader(st->cso_context, v->driver_shader);
 239             break;
 240          case GL_COMPUTE_PROGRAM_NV:
 241             cso_delete_compute_shader(st->cso_context, v->driver_shader);
 242             break;
 243          default:
 244             unreachable("bad shader type in delete_basic_variant");
 245          }
 246       } else {
 247          /* We can't delete a shader with a context different from the one
 248           * that created it.  Add it to the creating context's zombie list.
 249           */
 250          enum pipe_shader_type type =
 251             pipe_shader_type_from_mesa(_mesa_program_enum_to_shader_stage(target));
 252
 253          st_save_zombie_shader(v->st, type, v->driver_shader);
 254       }
 255    }
 256
 257    if (target == GL_VERTEX_PROGRAM_ARB) {
 258       struct st_vp_variant *vpv = (struct st_vp_variant *)v;
 259
 260       if (vpv->draw_shader)
 261          draw_delete_vertex_shader( st->draw, vpv->draw_shader );
 262
 263       if (vpv->tokens)
 264          ureg_free_tokens(vpv->tokens);
 265    }
 266
 267    free(v);
 268 }
 269
 270
 271 /**
 272  * Free all basic program variants.
 273  */
 274 void
 275 st_release_variants(struct st_context *st, struct st_program *p)
 276 {
 277    struct st_variant *v;
 278
 279    for (v = p->variants; v; ) {
 280       struct st_variant *next = v->next;
 281       delete_variant(st, v, p->Base.Target);
 282       v = next;
 283    }
 284
 285    p->variants = NULL;
 286
 287    if (p->state.tokens) {
 288       ureg_free_tokens(p->state.tokens);
 289       p->state.tokens = NULL;
 290    }
 291
 292    /* Note: Any setup of ->ir.nir that has had pipe->create_*_state called on
 293     * it has resulted in the driver taking ownership of the NIR.  Those
 294     * callers should be NULLing out the nir field in any pipe_shader_state
 295     * that might have this called in order to indicate that.
 296     *
 297     * GLSL IR and ARB programs will have set gl_program->nir to the same
 298     * shader as ir->ir.nir, so it will be freed by _mesa_delete_program().
 299     */
 300 }
 301
 302 void
 303 st_finalize_nir_before_variants(struct nir_shader *nir)
 304 {
 305    NIR_PASS_V(nir, nir_opt_access);
 306
 307    NIR_PASS_V(nir, nir_split_var_copies);
 308    NIR_PASS_V(nir, nir_lower_var_copies);
 309    if (nir->options->lower_all_io_to_temps ||
 310        nir->options->lower_all_io_to_elements ||
 311        nir->info.stage == MESA_SHADER_VERTEX ||
 312        nir->info.stage == MESA_SHADER_GEOMETRY) {
 313       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
 314    } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
 315       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
 316    }
 317
 318    st_nir_assign_vs_in_locations(nir);
 319 }
 320
 321 /**
 322  * Translate ARB (asm) program to NIR
 323  */
 324 static nir_shader *
 325 st_translate_prog_to_nir(struct st_context *st, struct gl_program *prog,
 326                          gl_shader_stage stage)
 327 {
 328    struct pipe_screen *screen = st->pipe->screen;
 329    const struct gl_shader_compiler_options *options =
 330       &st->ctx->Const.ShaderCompilerOptions[stage];
 331
 332    /* Translate to NIR */
 333    nir_shader *nir = prog_to_nir(prog, options->NirOptions);
 334    NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
 335    nir_validate_shader(nir, "after st/ptn lower_regs_to_ssa");
 336
 337    NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, prog, screen);
 338    NIR_PASS_V(nir, nir_lower_system_values);
 339
 340    /* Optimise NIR */
 341    NIR_PASS_V(nir, nir_opt_constant_folding);
 342    st_nir_opts(nir);
 343    st_finalize_nir_before_variants(nir);
 344
 345    if (st->allow_st_finalize_nir_twice)
 346       st_finalize_nir(st, prog, NULL, nir, true);
 347
 348    nir_validate_shader(nir, "after st/glsl finalize_nir");
 349
 350    return nir;
 351 }
 352
 353 void
 354 st_prepare_vertex_program(struct st_program *stp)
 355 {
 356    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 357
 358    stvp->num_inputs = 0;
 359    memset(stvp->input_to_index, ~0, sizeof(stvp->input_to_index));
 360    memset(stvp->result_to_output, ~0, sizeof(stvp->result_to_output));
 361
 362    /* Determine number of inputs, the mappings between VERT_ATTRIB_x
 363     * and TGSI generic input indexes, plus input attrib semantic info.
 364     */
 365    for (unsigned attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
 366       if ((stp->Base.info.inputs_read & BITFIELD64_BIT(attr)) != 0) {
 367          stvp->input_to_index[attr] = stvp->num_inputs;
 368          stvp->index_to_input[stvp->num_inputs] = attr;
 369          stvp->num_inputs++;
 370
 371          if ((stp->Base.DualSlotInputs & BITFIELD64_BIT(attr)) != 0) {
 372             /* add placeholder for second part of a double attribute */
 373             stvp->index_to_input[stvp->num_inputs] = ST_DOUBLE_ATTRIB_PLACEHOLDER;
 374             stvp->num_inputs++;
 375          }
 376       }
 377    }
 378    /* pre-setup potentially unused edgeflag input */
 379    stvp->input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
 380    stvp->index_to_input[stvp->num_inputs] = VERT_ATTRIB_EDGEFLAG;
 381
 382    /* Compute mapping of vertex program outputs to slots. */
 383    unsigned num_outputs = 0;
 384    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 385       if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr))
 386          stvp->result_to_output[attr] = num_outputs++;
 387    }
 388    /* pre-setup potentially unused edgeflag output */
 389    stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs;
 390 }
 391
 392 void
 393 st_translate_stream_output_info(struct gl_program *prog)
 394 {
 395    struct gl_transform_feedback_info *info = prog->sh.LinkedTransformFeedback;
 396    if (!info)
 397       return;
 398
 399    /* Determine the (default) output register mapping for each output. */
 400    unsigned num_outputs = 0;
 401    ubyte output_mapping[VARYING_SLOT_TESS_MAX];
 402    memset(output_mapping, 0, sizeof(output_mapping));
 403
 404    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 405       if (prog->info.outputs_written & BITFIELD64_BIT(attr))
 406          output_mapping[attr] = num_outputs++;
 407    }
 408
 409    /* Translate stream output info. */
 410    struct pipe_stream_output_info *so_info =
 411       &((struct st_program*)prog)->state.stream_output;
 412
 413    for (unsigned i = 0; i < info->NumOutputs; i++) {
 414       so_info->output[i].register_index =
 415          output_mapping[info->Outputs[i].OutputRegister];
 416       so_info->output[i].start_component = info->Outputs[i].ComponentOffset;
 417       so_info->output[i].num_components = info->Outputs[i].NumComponents;
 418       so_info->output[i].output_buffer = info->Outputs[i].OutputBuffer;
 419       so_info->output[i].dst_offset = info->Outputs[i].DstOffset;
 420       so_info->output[i].stream = info->Outputs[i].StreamId;
 421    }
 422
 423    for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
 424       so_info->stride[i] = info->Buffers[i].Stride;
 425    }
 426    so_info->num_outputs = info->NumOutputs;
 427 }
 428
 429 /**
 430  * Translate a vertex program.
 431  */
 432 bool
 433 st_translate_vertex_program(struct st_context *st,
 434                             struct st_program *stp)
 435 {
 436    struct ureg_program *ureg;
 437    enum pipe_error error;
 438    unsigned num_outputs = 0;
 439    unsigned attr;
 440    ubyte output_semantic_name[VARYING_SLOT_MAX] = {0};
 441    ubyte output_semantic_index[VARYING_SLOT_MAX] = {0};
 442
 443    if (stp->Base.arb.IsPositionInvariant)
 444       _mesa_insert_mvp_code(st->ctx, &stp->Base);
 445
 446    st_prepare_vertex_program(stp);
 447
 448    /* ARB_vp: */
 449    if (!stp->glsl_to_tgsi) {
 450       _mesa_remove_output_reads(&stp->Base, PROGRAM_OUTPUT);
 451
 452       /* This determines which states will be updated when the assembly
 453        * shader is bound.
 454        */
 455       stp->affected_states = ST_NEW_VS_STATE |
 456                               ST_NEW_RASTERIZER |
 457                               ST_NEW_VERTEX_ARRAYS;
 458
 459       if (stp->Base.Parameters->NumParameters)
 460          stp->affected_states |= ST_NEW_VS_CONSTANTS;
 461
 462       /* No samplers are allowed in ARB_vp. */
 463    }
 464
 465    /* Get semantic names and indices. */
 466    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 467       if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr)) {
 468          unsigned slot = num_outputs++;
 469          unsigned semantic_name, semantic_index;
 470          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
 471                                       &semantic_name, &semantic_index);
 472          output_semantic_name[slot] = semantic_name;
 473          output_semantic_index[slot] = semantic_index;
 474       }
 475    }
 476    /* pre-setup potentially unused edgeflag output */
 477    output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
 478    output_semantic_index[num_outputs] = 0;
 479
 480    ureg = ureg_create_with_screen(PIPE_SHADER_VERTEX, st->pipe->screen);
 481    if (ureg == NULL)
 482       return false;
 483
 484    if (stp->Base.info.clip_distance_array_size)
 485       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
 486                     stp->Base.info.clip_distance_array_size);
 487    if (stp->Base.info.cull_distance_array_size)
 488       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
 489                     stp->Base.info.cull_distance_array_size);
 490
 491    if (ST_DEBUG & DEBUG_MESA) {
 492       _mesa_print_program(&stp->Base);
 493       _mesa_print_program_parameters(st->ctx, &stp->Base);
 494       debug_printf("\n");
 495    }
 496
 497    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 498
 499    if (stp->glsl_to_tgsi) {
 500       error = st_translate_program(st->ctx,
 501                                    PIPE_SHADER_VERTEX,
 502                                    ureg,
 503                                    stp->glsl_to_tgsi,
 504                                    &stp->Base,
 505                                    /* inputs */
 506                                    stvp->num_inputs,
 507                                    stvp->input_to_index,
 508                                    NULL, /* inputSlotToAttr */
 509                                    NULL, /* input semantic name */
 510                                    NULL, /* input semantic index */
 511                                    NULL, /* interp mode */
 512                                    /* outputs */
 513                                    num_outputs,
 514                                    stvp->result_to_output,
 515                                    output_semantic_name,
 516                                    output_semantic_index);
 517
 518       st_translate_stream_output_info(&stp->Base);
 519
 520       free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
 521    } else
 522       error = st_translate_mesa_program(st->ctx,
 523                                         PIPE_SHADER_VERTEX,
 524                                         ureg,
 525                                         &stp->Base,
 526                                         /* inputs */
 527                                         stvp->num_inputs,
 528                                         stvp->input_to_index,
 529                                         NULL, /* input semantic name */
 530                                         NULL, /* input semantic index */
 531                                         NULL,
 532                                         /* outputs */
 533                                         num_outputs,
 534                                         stvp->result_to_output,
 535                                         output_semantic_name,
 536                                         output_semantic_index);
 537
 538    if (error) {
 539       debug_printf("%s: failed to translate Mesa program:\n", __func__);
 540       _mesa_print_program(&stp->Base);
 541       debug_assert(0);
 542       return false;
 543    }
 544
 545    stp->state.tokens = ureg_get_tokens(ureg, NULL);
 546    ureg_destroy(ureg);
 547
 548    if (stp->glsl_to_tgsi) {
 549       stp->glsl_to_tgsi = NULL;
 550       st_store_ir_in_disk_cache(st, &stp->Base, false);
 551    }
 552
 553    /* Translate to NIR.
 554     *
 555     * This must be done after the translation to TGSI is done, because
 556     * we'll pass the NIR shader to the driver and the TGSI version to
 557     * the draw module for the select/feedback/rasterpos code.
 558     */
 559    if (st->pipe->screen->get_shader_param(st->pipe->screen,
 560                                           PIPE_SHADER_VERTEX,
 561                                           PIPE_SHADER_CAP_PREFERRED_IR)) {
 562       assert(!stp->glsl_to_tgsi);
 563
 564       nir_shader *nir =
 565          st_translate_prog_to_nir(st, &stp->Base, MESA_SHADER_VERTEX);
 566
 567       if (stp->Base.nir)
 568          ralloc_free(stp->Base.nir);
 569       stp->state.type = PIPE_SHADER_IR_NIR;
 570       stp->Base.nir = nir;
 571       return true;
 572    }
 573
 574    return stp->state.tokens != NULL;
 575 }
 576
 577 static const gl_state_index16 depth_range_state[STATE_LENGTH] =
 578    { STATE_DEPTH_RANGE };
 579
 580 static struct st_vp_variant *
 581 st_create_vp_variant(struct st_context *st,
 582                      struct st_program *stvp,
 583                      const struct st_common_variant_key *key)
 584 {
 585    struct st_vp_variant *vpv = CALLOC_STRUCT(st_vp_variant);
 586    struct pipe_context *pipe = st->pipe;
 587    struct pipe_screen *screen = pipe->screen;
 588    struct pipe_shader_state state = {0};
 589
 590    static const gl_state_index16 point_size_state[STATE_LENGTH] =
 591       { STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED, 0 };
 592    struct gl_program_parameter_list *params = stvp->Base.Parameters;
 593
 594    vpv->key = *key;
 595    vpv->num_inputs = ((struct st_vertex_program*)stvp)->num_inputs;
 596
 597    state.stream_output = stvp->state.stream_output;
 598
 599    if (stvp->state.type == PIPE_SHADER_IR_NIR) {
 600       bool finalize = false;
 601
 602       state.type = PIPE_SHADER_IR_NIR;
 603       state.ir.nir = nir_shader_clone(NULL, stvp->Base.nir);
 604       if (key->clamp_color) {
 605          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
 606          finalize = true;
 607       }
 608       if (key->passthrough_edgeflags) {
 609          NIR_PASS_V(state.ir.nir, nir_lower_passthrough_edgeflags);
 610          vpv->num_inputs++;
 611          finalize = true;
 612       }
 613
 614       if (key->lower_point_size) {
 615          _mesa_add_state_reference(params, point_size_state);
 616          NIR_PASS_V(state.ir.nir, nir_lower_point_size_mov,
 617                     point_size_state);
 618          finalize = true;
 619       }
 620
 621       if (key->lower_ucp) {
 622          bool can_compact = screen->get_param(screen,
 623                                               PIPE_CAP_NIR_COMPACT_ARRAYS);
 624
 625          bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;
 626          gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH];
 627          for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
 628             if (use_eye) {
 629                clipplane_state[i][0] = STATE_CLIPPLANE;
 630                clipplane_state[i][1] = i;
 631             } else {
 632                clipplane_state[i][0] = STATE_INTERNAL;
 633                clipplane_state[i][1] = STATE_CLIP_INTERNAL;
 634                clipplane_state[i][2] = i;
 635             }
 636             _mesa_add_state_reference(params, clipplane_state[i]);
 637          }
 638
 639          NIR_PASS_V(state.ir.nir, nir_lower_clip_vs, key->lower_ucp,
 640                     true, can_compact, clipplane_state);
 641          NIR_PASS_V(state.ir.nir, nir_lower_io_to_temporaries,
 642                     nir_shader_get_entrypoint(state.ir.nir), true, false);
 643          NIR_PASS_V(state.ir.nir, nir_lower_global_vars_to_local);
 644          finalize = true;
 645       }
 646
 647       if (finalize || !st->allow_st_finalize_nir_twice) {
 648          st_finalize_nir(st, &stvp->Base, stvp->shader_program, state.ir.nir,
 649                          true);
 650
 651          /* Some of the lowering above may have introduced new varyings */
 652          nir_shader_gather_info(state.ir.nir,
 653                                 nir_shader_get_entrypoint(state.ir.nir));
 654       }
 655
 656       if (ST_DEBUG & DEBUG_PRINT_IR)
 657          nir_print_shader(state.ir.nir, stderr);
 658
 659       vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
 660
 661       /* When generating a NIR program, we usually don't have TGSI tokens.
 662        * However, we do create them for ARB_vertex_program / fixed-function VS
 663        * programs which we may need to use with the draw module for legacy
 664        * feedback/select emulation.  If they exist, copy them.
 665        *
 666        * TODO: Lowering for shader variants is not applied to TGSI when
 667        * generating a NIR shader.
 668        */
 669       if (stvp->state.tokens)
 670          vpv->tokens = tgsi_dup_tokens(stvp->state.tokens);
 671
 672       return vpv;
 673    }
 674
 675    state.type = PIPE_SHADER_IR_TGSI;
 676    state.tokens = tgsi_dup_tokens(stvp->state.tokens);
 677
 678    /* Emulate features. */
 679    if (key->clamp_color || key->passthrough_edgeflags) {
 680       const struct tgsi_token *tokens;
 681       unsigned flags =
 682          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
 683          (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
 684
 685       tokens = tgsi_emulate(state.tokens, flags);
 686
 687       if (tokens) {
 688          tgsi_free_tokens(state.tokens);
 689          state.tokens = tokens;
 690
 691          if (key->passthrough_edgeflags)
 692             vpv->num_inputs++;
 693       } else
 694          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
 695    }
 696
 697    if (key->lower_depth_clamp) {
 698       unsigned depth_range_const =
 699             _mesa_add_state_reference(params, depth_range_state);
 700
 701       const struct tgsi_token *tokens;
 702       tokens = st_tgsi_lower_depth_clamp(state.tokens, depth_range_const,
 703                                          key->clip_negative_one_to_one);
 704       if (tokens != state.tokens)
 705          tgsi_free_tokens(state.tokens);
 706       state.tokens = tokens;
 707    }
 708
 709    if (ST_DEBUG & DEBUG_PRINT_IR)
 710       tgsi_dump(state.tokens, 0);
 711
 712    vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
 713    /* Save this for selection/feedback/rasterpos. */
 714    vpv->tokens = state.tokens;
 715    return vpv;
 716 }
 717
 718
 719 /**
 720  * Find/create a vertex program variant.
 721  */
 722 struct st_vp_variant *
 723 st_get_vp_variant(struct st_context *st,
 724                   struct st_program *stp,
 725                   const struct st_common_variant_key *key)
 726 {
 727    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 728    struct st_vp_variant *vpv;
 729
 730    /* Search for existing variant */
 731    for (vpv = st_vp_variant(stp->variants); vpv;
 732         vpv = st_vp_variant(vpv->base.next)) {
 733       if (memcmp(&vpv->key, key, sizeof(*key)) == 0) {
 734          break;
 735       }
 736    }
 737
 738    if (!vpv) {
 739       /* create now */
 740       vpv = st_create_vp_variant(st, stp, key);
 741       if (vpv) {
 742          vpv->base.st = key->st;
 743
 744          for (unsigned index = 0; index < vpv->num_inputs; ++index) {
 745             unsigned attr = stvp->index_to_input[index];
 746             if (attr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
 747                continue;
 748             vpv->vert_attrib_mask |= 1u << attr;
 749          }
 750
 751          /* insert into list */
 752          vpv->base.next = stp->variants;
 753          stp->variants = &vpv->base;
 754       }
 755    }
 756
 757    return vpv;
 758 }
 759
 760
 761 /**
 762  * Translate a Mesa fragment shader into a TGSI shader.
 763  */
 764 bool
 765 st_translate_fragment_program(struct st_context *st,
 766                               struct st_program *stfp)
 767 {
 768    /* Non-GLSL programs: */
 769    if (!stfp->glsl_to_tgsi) {
 770       _mesa_remove_output_reads(&stfp->Base, PROGRAM_OUTPUT);
 771       if (st->ctx->Const.GLSLFragCoordIsSysVal)
 772          _mesa_program_fragment_position_to_sysval(&stfp->Base);
 773
 774       /* This determines which states will be updated when the assembly
 775        * shader is bound.
 776        *
 777        * fragment.position and glDrawPixels always use constants.
 778        */
 779       stfp->affected_states = ST_NEW_FS_STATE |
 780                               ST_NEW_SAMPLE_SHADING |
 781                               ST_NEW_FS_CONSTANTS;
 782
 783       if (stfp->ati_fs) {
 784          /* Just set them for ATI_fs unconditionally. */
 785          stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 786                                   ST_NEW_FS_SAMPLERS;
 787       } else {
 788          /* ARB_fp */
 789          if (stfp->Base.SamplersUsed)
 790             stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 791                                      ST_NEW_FS_SAMPLERS;
 792       }
 793
 794       /* Translate to NIR. */
 795       if (!stfp->ati_fs &&
 796           st->pipe->screen->get_shader_param(st->pipe->screen,
 797                                              PIPE_SHADER_FRAGMENT,
 798                                              PIPE_SHADER_CAP_PREFERRED_IR)) {
 799          nir_shader *nir =
 800             st_translate_prog_to_nir(st, &stfp->Base, MESA_SHADER_FRAGMENT);
 801
 802          if (stfp->Base.nir)
 803             ralloc_free(stfp->Base.nir);
 804          stfp->state.type = PIPE_SHADER_IR_NIR;
 805          stfp->Base.nir = nir;
 806          return true;
 807       }
 808    }
 809
 810    ubyte outputMapping[2 * FRAG_RESULT_MAX];
 811    ubyte inputMapping[VARYING_SLOT_MAX];
 812    ubyte inputSlotToAttr[VARYING_SLOT_MAX];
 813    ubyte interpMode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
 814    GLuint attr;
 815    GLbitfield64 inputsRead;
 816    struct ureg_program *ureg;
 817
 818    GLboolean write_all = GL_FALSE;
 819
 820    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
 821    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
 822    uint fs_num_inputs = 0;
 823
 824    ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
 825    ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
 826    uint fs_num_outputs = 0;
 827
 828    memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
 829
 830    /*
 831     * Convert Mesa program inputs to TGSI input register semantics.
 832     */
 833    inputsRead = stfp->Base.info.inputs_read;
 834    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 835       if ((inputsRead & BITFIELD64_BIT(attr)) != 0) {
 836          const GLuint slot = fs_num_inputs++;
 837
 838          inputMapping[attr] = slot;
 839          inputSlotToAttr[slot] = attr;
 840
 841          switch (attr) {
 842          case VARYING_SLOT_POS:
 843             input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
 844             input_semantic_index[slot] = 0;
 845             interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 846             break;
 847          case VARYING_SLOT_COL0:
 848             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 849             input_semantic_index[slot] = 0;
 850             interpMode[slot] = stfp->glsl_to_tgsi ?
 851                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 852             break;
 853          case VARYING_SLOT_COL1:
 854             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 855             input_semantic_index[slot] = 1;
 856             interpMode[slot] = stfp->glsl_to_tgsi ?
 857                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 858             break;
 859          case VARYING_SLOT_FOGC:
 860             input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
 861             input_semantic_index[slot] = 0;
 862             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 863             break;
 864          case VARYING_SLOT_FACE:
 865             input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
 866             input_semantic_index[slot] = 0;
 867             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 868             break;
 869          case VARYING_SLOT_PRIMITIVE_ID:
 870             input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
 871             input_semantic_index[slot] = 0;
 872             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 873             break;
 874          case VARYING_SLOT_LAYER:
 875             input_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
 876             input_semantic_index[slot] = 0;
 877             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 878             break;
 879          case VARYING_SLOT_VIEWPORT:
 880             input_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
 881             input_semantic_index[slot] = 0;
 882             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 883             break;
 884          case VARYING_SLOT_CLIP_DIST0:
 885             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
 886             input_semantic_index[slot] = 0;
 887             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 888             break;
 889          case VARYING_SLOT_CLIP_DIST1:
 890             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
 891             input_semantic_index[slot] = 1;
 892             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 893             break;
 894          case VARYING_SLOT_CULL_DIST0:
 895          case VARYING_SLOT_CULL_DIST1:
 896             /* these should have been lowered by GLSL */
 897             assert(0);
 898             break;
 899             /* In most cases, there is nothing special about these
 900              * inputs, so adopt a convention to use the generic
 901              * semantic name and the mesa VARYING_SLOT_ number as the
 902              * index.
 903              *
 904              * All that is required is that the vertex shader labels
 905              * its own outputs similarly, and that the vertex shader
 906              * generates at least every output required by the
 907              * fragment shader plus fixed-function hardware (such as
 908              * BFC).
 909              *
 910              * However, some drivers may need us to identify the PNTC and TEXi
 911              * varyings if, for example, their capability to replace them with
 912              * sprite coordinates is limited.
 913              */
 914          case VARYING_SLOT_PNTC:
 915             if (st->needs_texcoord_semantic) {
 916                input_semantic_name[slot] = TGSI_SEMANTIC_PCOORD;
 917                input_semantic_index[slot] = 0;
 918                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 919                break;
 920             }
 921             /* fall through */
 922          case VARYING_SLOT_TEX0:
 923          case VARYING_SLOT_TEX1:
 924          case VARYING_SLOT_TEX2:
 925          case VARYING_SLOT_TEX3:
 926          case VARYING_SLOT_TEX4:
 927          case VARYING_SLOT_TEX5:
 928          case VARYING_SLOT_TEX6:
 929          case VARYING_SLOT_TEX7:
 930             if (st->needs_texcoord_semantic) {
 931                input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
 932                input_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
 933                interpMode[slot] = stfp->glsl_to_tgsi ?
 934                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
 935                break;
 936             }
 937             /* fall through */
 938          case VARYING_SLOT_VAR0:
 939          default:
 940             /* Semantic indices should be zero-based because drivers may choose
 941              * to assign a fixed slot determined by that index.
 942              * This is useful because ARB_separate_shader_objects uses location
 943              * qualifiers for linkage, and if the semantic index corresponds to
 944              * these locations, linkage passes in the driver become unecessary.
 945              *
 946              * If needs_texcoord_semantic is true, no semantic indices will be
 947              * consumed for the TEXi varyings, and we can base the locations of
 948              * the user varyings on VAR0.  Otherwise, we use TEX0 as base index.
 949              */
 950             assert(attr >= VARYING_SLOT_VAR0 || attr == VARYING_SLOT_PNTC ||
 951                    (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
 952             input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
 953             input_semantic_index[slot] = st_get_generic_varying_index(st, attr);
 954             if (attr == VARYING_SLOT_PNTC)
 955                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 956             else {
 957                interpMode[slot] = stfp->glsl_to_tgsi ?
 958                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
 959             }
 960             break;
 961          }
 962       }
 963       else {
 964          inputMapping[attr] = -1;
 965       }
 966    }
 967
 968    /*
 969     * Semantics and mapping for outputs
 970     */
 971    GLbitfield64 outputsWritten = stfp->Base.info.outputs_written;
 972
 973    /* if z is written, emit that first */
 974    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
 975       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
 976       fs_output_semantic_index[fs_num_outputs] = 0;
 977       outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
 978       fs_num_outputs++;
 979       outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
 980    }
 981
 982    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
 983       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
 984       fs_output_semantic_index[fs_num_outputs] = 0;
 985       outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
 986       fs_num_outputs++;
 987       outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
 988    }
 989
 990    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
 991       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_SAMPLEMASK;
 992       fs_output_semantic_index[fs_num_outputs] = 0;
 993       outputMapping[FRAG_RESULT_SAMPLE_MASK] = fs_num_outputs;
 994       fs_num_outputs++;
 995       outputsWritten &= ~(1 << FRAG_RESULT_SAMPLE_MASK);
 996    }
 997
 998    /* handle remaining outputs (color) */
 999    for (attr = 0; attr < ARRAY_SIZE(outputMapping); attr++) {
1000       const GLbitfield64 written = attr < FRAG_RESULT_MAX ? outputsWritten :
1001          stfp->Base.SecondaryOutputsWritten;
1002       const unsigned loc = attr % FRAG_RESULT_MAX;
1003
1004       if (written & BITFIELD64_BIT(loc)) {
1005          switch (loc) {
1006          case FRAG_RESULT_DEPTH:
1007          case FRAG_RESULT_STENCIL:
1008          case FRAG_RESULT_SAMPLE_MASK:
1009             /* handled above */
1010             assert(0);
1011             break;
1012          case FRAG_RESULT_COLOR:
1013             write_all = GL_TRUE; /* fallthrough */
1014          default: {
1015             int index;
1016             assert(loc == FRAG_RESULT_COLOR ||
1017                    (FRAG_RESULT_DATA0 <= loc && loc < FRAG_RESULT_MAX));
1018
1019             index = (loc == FRAG_RESULT_COLOR) ? 0 : (loc - FRAG_RESULT_DATA0);
1020
1021             if (attr >= FRAG_RESULT_MAX) {
1022                /* Secondary color for dual source blending. */
1023                assert(index == 0);
1024                index++;
1025             }
1026
1027             fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
1028             fs_output_semantic_index[fs_num_outputs] = index;
1029             outputMapping[attr] = fs_num_outputs;
1030             break;
1031          }
1032          }
1033
1034          fs_num_outputs++;
1035       }
1036    }
1037
1038    ureg = ureg_create_with_screen(PIPE_SHADER_FRAGMENT, st->pipe->screen);
1039    if (ureg == NULL)
1040       return false;
1041
1042    if (ST_DEBUG & DEBUG_MESA) {
1043       _mesa_print_program(&stfp->Base);
1044       _mesa_print_program_parameters(st->ctx, &stfp->Base);
1045       debug_printf("\n");
1046    }
1047    if (write_all == GL_TRUE)
1048       ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1);
1049
1050    if (stfp->Base.info.fs.depth_layout != FRAG_DEPTH_LAYOUT_NONE) {
1051       switch (stfp->Base.info.fs.depth_layout) {
1052       case FRAG_DEPTH_LAYOUT_ANY:
1053          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1054                        TGSI_FS_DEPTH_LAYOUT_ANY);
1055          break;
1056       case FRAG_DEPTH_LAYOUT_GREATER:
1057          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1058                        TGSI_FS_DEPTH_LAYOUT_GREATER);
1059          break;
1060       case FRAG_DEPTH_LAYOUT_LESS:
1061          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1062                        TGSI_FS_DEPTH_LAYOUT_LESS);
1063          break;
1064       case FRAG_DEPTH_LAYOUT_UNCHANGED:
1065          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1066                        TGSI_FS_DEPTH_LAYOUT_UNCHANGED);
1067          break;
1068       default:
1069          assert(0);
1070       }
1071    }
1072
1073    if (stfp->glsl_to_tgsi) {
1074       st_translate_program(st->ctx,
1075                            PIPE_SHADER_FRAGMENT,
1076                            ureg,
1077                            stfp->glsl_to_tgsi,
1078                            &stfp->Base,
1079                            /* inputs */
1080                            fs_num_inputs,
1081                            inputMapping,
1082                            inputSlotToAttr,
1083                            input_semantic_name,
1084                            input_semantic_index,
1085                            interpMode,
1086                            /* outputs */
1087                            fs_num_outputs,
1088                            outputMapping,
1089                            fs_output_semantic_name,
1090                            fs_output_semantic_index);
1091
1092       free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
1093    } else if (stfp->ati_fs)
1094       st_translate_atifs_program(ureg,
1095                                  stfp->ati_fs,
1096                                  &stfp->Base,
1097                                  /* inputs */
1098                                  fs_num_inputs,
1099                                  inputMapping,
1100                                  input_semantic_name,
1101                                  input_semantic_index,
1102                                  interpMode,
1103                                  /* outputs */
1104                                  fs_num_outputs,
1105                                  outputMapping,
1106                                  fs_output_semantic_name,
1107                                  fs_output_semantic_index);
1108    else
1109       st_translate_mesa_program(st->ctx,
1110                                 PIPE_SHADER_FRAGMENT,
1111                                 ureg,
1112                                 &stfp->Base,
1113                                 /* inputs */
1114                                 fs_num_inputs,
1115                                 inputMapping,
1116                                 input_semantic_name,
1117                                 input_semantic_index,
1118                                 interpMode,
1119                                 /* outputs */
1120                                 fs_num_outputs,
1121                                 outputMapping,
1122                                 fs_output_semantic_name,
1123                                 fs_output_semantic_index);
1124
1125    stfp->state.tokens = ureg_get_tokens(ureg, NULL);
1126    ureg_destroy(ureg);
1127
1128    if (stfp->glsl_to_tgsi) {
1129       stfp->glsl_to_tgsi = NULL;
1130       st_store_ir_in_disk_cache(st, &stfp->Base, false);
1131    }
1132
1133    return stfp->state.tokens != NULL;
1134 }
1135
1136 static struct st_fp_variant *
1137 st_create_fp_variant(struct st_context *st,
1138                      struct st_program *stfp,
1139                      const struct st_fp_variant_key *key)
1140 {
1141    struct pipe_context *pipe = st->pipe;
1142    struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
1143    struct pipe_shader_state state = {0};
1144    struct gl_program_parameter_list *params = stfp->Base.Parameters;
1145    static const gl_state_index16 texcoord_state[STATE_LENGTH] =
1146       { STATE_INTERNAL, STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
1147    static const gl_state_index16 scale_state[STATE_LENGTH] =
1148       { STATE_INTERNAL, STATE_PT_SCALE };
1149    static const gl_state_index16 bias_state[STATE_LENGTH] =
1150       { STATE_INTERNAL, STATE_PT_BIAS };
1151    static const gl_state_index16 alpha_ref_state[STATE_LENGTH] =
1152       { STATE_INTERNAL, STATE_ALPHA_REF };
1153
1154    if (!variant)
1155       return NULL;
1156
1157    if (stfp->state.type == PIPE_SHADER_IR_NIR) {
1158       bool finalize = false;
1159
1160       state.type = PIPE_SHADER_IR_NIR;
1161       state.ir.nir = nir_shader_clone(NULL, stfp->Base.nir);
1162
1163       if (key->clamp_color) {
1164          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1165          finalize = true;
1166       }
1167
1168       if (key->lower_flatshade) {
1169          NIR_PASS_V(state.ir.nir, nir_lower_flatshade);
1170          finalize = true;
1171       }
1172
1173       if (key->lower_alpha_func != COMPARE_FUNC_NEVER) {
1174          _mesa_add_state_reference(params, alpha_ref_state);
1175          NIR_PASS_V(state.ir.nir, nir_lower_alpha_test, key->lower_alpha_func,
1176                     false, alpha_ref_state);
1177          finalize = true;
1178       }
1179
1180       if (key->lower_two_sided_color) {
1181          NIR_PASS_V(state.ir.nir, nir_lower_two_sided_color);
1182          finalize = true;
1183       }
1184
1185       if (key->persample_shading) {
1186           nir_shader *shader = state.ir.nir;
1187           nir_foreach_variable(var, &shader->inputs)
1188              var->data.sample = true;
1189           finalize = true;
1190       }
1191
1192       assert(!(key->bitmap && key->drawpixels));
1193
1194       /* glBitmap */
1195       if (key->bitmap) {
1196          nir_lower_bitmap_options options = {0};
1197
1198          variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1199          options.sampler = variant->bitmap_sampler;
1200          options.swizzle_xxxx = st->bitmap.tex_format == PIPE_FORMAT_R8_UNORM;
1201
1202          NIR_PASS_V(state.ir.nir, nir_lower_bitmap, &options);
1203          finalize = true;
1204       }
1205
1206       /* glDrawPixels (color only) */
1207       if (key->drawpixels) {
1208          nir_lower_drawpixels_options options = {{0}};
1209          unsigned samplers_used = stfp->Base.SamplersUsed;
1210
1211          /* Find the first unused slot. */
1212          variant->drawpix_sampler = ffs(~samplers_used) - 1;
1213          options.drawpix_sampler = variant->drawpix_sampler;
1214          samplers_used |= (1 << variant->drawpix_sampler);
1215
1216          options.pixel_maps = key->pixelMaps;
1217          if (key->pixelMaps) {
1218             variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1219             options.pixelmap_sampler = variant->pixelmap_sampler;
1220          }
1221
1222          options.scale_and_bias = key->scaleAndBias;
1223          if (key->scaleAndBias) {
1224             _mesa_add_state_reference(params, scale_state);
1225             memcpy(options.scale_state_tokens, scale_state,
1226                    sizeof(options.scale_state_tokens));
1227             _mesa_add_state_reference(params, bias_state);
1228             memcpy(options.bias_state_tokens, bias_state,
1229                    sizeof(options.bias_state_tokens));
1230          }
1231
1232          _mesa_add_state_reference(params, texcoord_state);
1233          memcpy(options.texcoord_state_tokens, texcoord_state,
1234                 sizeof(options.texcoord_state_tokens));
1235
1236          NIR_PASS_V(state.ir.nir, nir_lower_drawpixels, &options);
1237          finalize = true;
1238       }
1239
1240       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1241                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1242                    key->external.lower_ayuv || key->external.lower_xyuv)) {
1243          nir_lower_tex_options options = {0};
1244          options.lower_y_uv_external = key->external.lower_nv12;
1245          options.lower_y_u_v_external = key->external.lower_iyuv;
1246          options.lower_xy_uxvx_external = key->external.lower_xy_uxvx;
1247          options.lower_yx_xuxv_external = key->external.lower_yx_xuxv;
1248          options.lower_ayuv_external = key->external.lower_ayuv;
1249          options.lower_xyuv_external = key->external.lower_xyuv;
1250          NIR_PASS_V(state.ir.nir, nir_lower_tex, &options);
1251          finalize = true;
1252       }
1253
1254       if (finalize || !st->allow_st_finalize_nir_twice) {
1255          st_finalize_nir(st, &stfp->Base, stfp->shader_program, state.ir.nir,
1256                          false);
1257       }
1258
1259       /* This pass needs to happen *after* nir_lower_sampler */
1260       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1261                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1262          NIR_PASS_V(state.ir.nir, st_nir_lower_tex_src_plane,
1263                     ~stfp->Base.SamplersUsed,
1264                     key->external.lower_nv12 || key->external.lower_xy_uxvx ||
1265                        key->external.lower_yx_xuxv,
1266                     key->external.lower_iyuv);
1267          finalize = true;
1268       }
1269
1270       if (finalize || !st->allow_st_finalize_nir_twice) {
1271          /* Some of the lowering above may have introduced new varyings */
1272          nir_shader_gather_info(state.ir.nir,
1273                                 nir_shader_get_entrypoint(state.ir.nir));
1274
1275          struct pipe_screen *screen = pipe->screen;
1276          if (screen->finalize_nir)
1277             screen->finalize_nir(screen, state.ir.nir, false);
1278       }
1279
1280       if (ST_DEBUG & DEBUG_PRINT_IR)
1281          nir_print_shader(state.ir.nir, stderr);
1282
1283       variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1284       variant->key = *key;
1285
1286       return variant;
1287    }
1288
1289    state.tokens = stfp->state.tokens;
1290
1291    assert(!(key->bitmap && key->drawpixels));
1292
1293    /* Fix texture targets and add fog for ATI_fs */
1294    if (stfp->ati_fs) {
1295       const struct tgsi_token *tokens = st_fixup_atifs(state.tokens, key);
1296
1297       if (tokens)
1298          state.tokens = tokens;
1299       else
1300          fprintf(stderr, "mesa: cannot post-process ATI_fs\n");
1301    }
1302
1303    /* Emulate features. */
1304    if (key->clamp_color || key->persample_shading) {
1305       const struct tgsi_token *tokens;
1306       unsigned flags =
1307          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
1308          (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
1309
1310       tokens = tgsi_emulate(state.tokens, flags);
1311
1312       if (tokens) {
1313          if (state.tokens != stfp->state.tokens)
1314             tgsi_free_tokens(state.tokens);
1315          state.tokens = tokens;
1316       } else
1317          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
1318    }
1319
1320    /* glBitmap */
1321    if (key->bitmap) {
1322       const struct tgsi_token *tokens;
1323
1324       variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1325
1326       tokens = st_get_bitmap_shader(state.tokens,
1327                                     st->internal_target,
1328                                     variant->bitmap_sampler,
1329                                     st->needs_texcoord_semantic,
1330                                     st->bitmap.tex_format ==
1331                                     PIPE_FORMAT_R8_UNORM);
1332
1333       if (tokens) {
1334          if (state.tokens != stfp->state.tokens)
1335             tgsi_free_tokens(state.tokens);
1336          state.tokens = tokens;
1337       } else
1338          fprintf(stderr, "mesa: cannot create a shader for glBitmap\n");
1339    }
1340
1341    /* glDrawPixels (color only) */
1342    if (key->drawpixels) {
1343       const struct tgsi_token *tokens;
1344       unsigned scale_const = 0, bias_const = 0, texcoord_const = 0;
1345
1346       /* Find the first unused slot. */
1347       variant->drawpix_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1348
1349       if (key->pixelMaps) {
1350          unsigned samplers_used = stfp->Base.SamplersUsed |
1351                                   (1 << variant->drawpix_sampler);
1352
1353          variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1354       }
1355
1356       if (key->scaleAndBias) {
1357          scale_const = _mesa_add_state_reference(params, scale_state);
1358          bias_const = _mesa_add_state_reference(params, bias_state);
1359       }
1360
1361       texcoord_const = _mesa_add_state_reference(params, texcoord_state);
1362
1363       tokens = st_get_drawpix_shader(state.tokens,
1364                                      st->needs_texcoord_semantic,
1365                                      key->scaleAndBias, scale_const,
1366                                      bias_const, key->pixelMaps,
1367                                      variant->drawpix_sampler,
1368                                      variant->pixelmap_sampler,
1369                                      texcoord_const, st->internal_target);
1370
1371       if (tokens) {
1372          if (state.tokens != stfp->state.tokens)
1373             tgsi_free_tokens(state.tokens);
1374          state.tokens = tokens;
1375       } else
1376          fprintf(stderr, "mesa: cannot create a shader for glDrawPixels\n");
1377    }
1378
1379    if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1380                 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1381       const struct tgsi_token *tokens;
1382
1383       /* samplers inserted would conflict, but this should be unpossible: */
1384       assert(!(key->bitmap || key->drawpixels));
1385
1386       tokens = st_tgsi_lower_yuv(state.tokens,
1387                                  ~stfp->Base.SamplersUsed,
1388                                  key->external.lower_nv12 ||
1389                                     key->external.lower_xy_uxvx ||
1390                                     key->external.lower_yx_xuxv,
1391                                  key->external.lower_iyuv);
1392       if (tokens) {
1393          if (state.tokens != stfp->state.tokens)
1394             tgsi_free_tokens(state.tokens);
1395          state.tokens = tokens;
1396       } else {
1397          fprintf(stderr, "mesa: cannot create a shader for samplerExternalOES\n");
1398       }
1399    }
1400
1401    if (key->lower_depth_clamp) {
1402       unsigned depth_range_const = _mesa_add_state_reference(params, depth_range_state);
1403
1404       const struct tgsi_token *tokens;
1405       tokens = st_tgsi_lower_depth_clamp_fs(state.tokens, depth_range_const);
1406       if (state.tokens != stfp->state.tokens)
1407          tgsi_free_tokens(state.tokens);
1408       state.tokens = tokens;
1409    }
1410
1411    if (ST_DEBUG & DEBUG_PRINT_IR)
1412       tgsi_dump(state.tokens, 0);
1413
1414    /* fill in variant */
1415    variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1416    variant->key = *key;
1417
1418    if (state.tokens != stfp->state.tokens)
1419       tgsi_free_tokens(state.tokens);
1420    return variant;
1421 }
1422
1423 /**
1424  * Translate fragment program if needed.
1425  */
1426 struct st_fp_variant *
1427 st_get_fp_variant(struct st_context *st,
1428                   struct st_program *stfp,
1429                   const struct st_fp_variant_key *key)
1430 {
1431    struct st_fp_variant *fpv;
1432
1433    /* Search for existing variant */
1434    for (fpv = st_fp_variant(stfp->variants); fpv;
1435         fpv = st_fp_variant(fpv->base.next)) {
1436       if (memcmp(&fpv->key, key, sizeof(*key)) == 0) {
1437          break;
1438       }
1439    }
1440
1441    if (!fpv) {
1442       /* create new */
1443       fpv = st_create_fp_variant(st, stfp, key);
1444       if (fpv) {
1445          fpv->base.st = key->st;
1446
1447          if (key->bitmap || key->drawpixels) {
1448             /* Regular variants should always come before the
1449              * bitmap & drawpixels variants, (unless there
1450              * are no regular variants) so that
1451              * st_update_fp can take a fast path when
1452              * shader_has_one_variant is set.
1453              */
1454             if (!stfp->variants) {
1455                stfp->variants = &fpv->base;
1456             } else {
1457                /* insert into list after the first one */
1458                fpv->base.next = stfp->variants->next;
1459                stfp->variants->next = &fpv->base;
1460             }
1461          } else {
1462             /* insert into list */
1463             fpv->base.next = stfp->variants;
1464             stfp->variants = &fpv->base;
1465          }
1466       }
1467    }
1468
1469    return fpv;
1470 }
1471
1472 /**
1473  * Translate a program. This is common code for geometry and tessellation
1474  * shaders.
1475  */
1476 bool
1477 st_translate_common_program(struct st_context *st,
1478                             struct st_program *stp)
1479 {
1480    struct gl_program *prog = &stp->Base;
1481    enum pipe_shader_type stage =
1482       pipe_shader_type_from_mesa(stp->Base.info.stage);
1483    struct ureg_program *ureg = ureg_create_with_screen(stage, st->pipe->screen);
1484
1485    if (ureg == NULL)
1486       return false;
1487
1488    switch (stage) {
1489    case PIPE_SHADER_TESS_CTRL:
1490       ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT,
1491                     stp->Base.info.tess.tcs_vertices_out);
1492       break;
1493
1494    case PIPE_SHADER_TESS_EVAL:
1495       if (stp->Base.info.tess.primitive_mode == GL_ISOLINES)
1496          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, GL_LINES);
1497       else
1498          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE,
1499                        stp->Base.info.tess.primitive_mode);
1500
1501       STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL);
1502       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 ==
1503                     PIPE_TESS_SPACING_FRACTIONAL_ODD);
1504       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 ==
1505                     PIPE_TESS_SPACING_FRACTIONAL_EVEN);
1506
1507       ureg_property(ureg, TGSI_PROPERTY_TES_SPACING,
1508                     (stp->Base.info.tess.spacing + 1) % 3);
1509
1510       ureg_property(ureg, TGSI_PROPERTY_TES_VERTEX_ORDER_CW,
1511                     !stp->Base.info.tess.ccw);
1512       ureg_property(ureg, TGSI_PROPERTY_TES_POINT_MODE,
1513                     stp->Base.info.tess.point_mode);
1514       break;
1515
1516    case PIPE_SHADER_GEOMETRY:
1517       ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM,
1518                     stp->Base.info.gs.input_primitive);
1519       ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM,
1520                     stp->Base.info.gs.output_primitive);
1521       ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
1522                     stp->Base.info.gs.vertices_out);
1523       ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS,
1524                     stp->Base.info.gs.invocations);
1525       break;
1526
1527    default:
1528       break;
1529    }
1530
1531    ubyte inputSlotToAttr[VARYING_SLOT_TESS_MAX];
1532    ubyte inputMapping[VARYING_SLOT_TESS_MAX];
1533    ubyte outputMapping[VARYING_SLOT_TESS_MAX];
1534    GLuint attr;
1535
1536    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
1537    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
1538    uint num_inputs = 0;
1539
1540    ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
1541    ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
1542    uint num_outputs = 0;
1543
1544    GLint i;
1545
1546    memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr));
1547    memset(inputMapping, 0, sizeof(inputMapping));
1548    memset(outputMapping, 0, sizeof(outputMapping));
1549    memset(&stp->state, 0, sizeof(stp->state));
1550
1551    if (prog->info.clip_distance_array_size)
1552       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
1553                     prog->info.clip_distance_array_size);
1554    if (prog->info.cull_distance_array_size)
1555       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
1556                     prog->info.cull_distance_array_size);
1557
1558    /*
1559     * Convert Mesa program inputs to TGSI input register semantics.
1560     */
1561    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1562       if ((prog->info.inputs_read & BITFIELD64_BIT(attr)) == 0)
1563          continue;
1564
1565       unsigned slot = num_inputs++;
1566
1567       inputMapping[attr] = slot;
1568       inputSlotToAttr[slot] = attr;
1569
1570       unsigned semantic_name, semantic_index;
1571       tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1572                                    &semantic_name, &semantic_index);
1573       input_semantic_name[slot] = semantic_name;
1574       input_semantic_index[slot] = semantic_index;
1575    }
1576
1577    /* Also add patch inputs. */
1578    for (attr = 0; attr < 32; attr++) {
1579       if (prog->info.patch_inputs_read & (1u << attr)) {
1580          GLuint slot = num_inputs++;
1581          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1582
1583          inputMapping[patch_attr] = slot;
1584          inputSlotToAttr[slot] = patch_attr;
1585          input_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1586          input_semantic_index[slot] = attr;
1587       }
1588    }
1589
1590    /* initialize output semantics to defaults */
1591    for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
1592       output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
1593       output_semantic_index[i] = 0;
1594    }
1595
1596    /*
1597     * Determine number of outputs, the (default) output register
1598     * mapping and the semantic information for each output.
1599     */
1600    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1601       if (prog->info.outputs_written & BITFIELD64_BIT(attr)) {
1602          GLuint slot = num_outputs++;
1603
1604          outputMapping[attr] = slot;
1605
1606          unsigned semantic_name, semantic_index;
1607          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1608                                       &semantic_name, &semantic_index);
1609          output_semantic_name[slot] = semantic_name;
1610          output_semantic_index[slot] = semantic_index;
1611       }
1612    }
1613
1614    /* Also add patch outputs. */
1615    for (attr = 0; attr < 32; attr++) {
1616       if (prog->info.patch_outputs_written & (1u << attr)) {
1617          GLuint slot = num_outputs++;
1618          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1619
1620          outputMapping[patch_attr] = slot;
1621          output_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1622          output_semantic_index[slot] = attr;
1623       }
1624    }
1625
1626    st_translate_program(st->ctx,
1627                         stage,
1628                         ureg,
1629                         stp->glsl_to_tgsi,
1630                         prog,
1631                         /* inputs */
1632                         num_inputs,
1633                         inputMapping,
1634                         inputSlotToAttr,
1635                         input_semantic_name,
1636                         input_semantic_index,
1637                         NULL,
1638                         /* outputs */
1639                         num_outputs,
1640                         outputMapping,
1641                         output_semantic_name,
1642                         output_semantic_index);
1643
1644    stp->state.tokens = ureg_get_tokens(ureg, NULL);
1645
1646    ureg_destroy(ureg);
1647
1648    st_translate_stream_output_info(prog);
1649
1650    st_store_ir_in_disk_cache(st, prog, false);
1651
1652    if (ST_DEBUG & DEBUG_PRINT_IR && ST_DEBUG & DEBUG_MESA)
1653       _mesa_print_program(prog);
1654
1655    free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
1656    stp->glsl_to_tgsi = NULL;
1657    return true;
1658 }
1659
1660
1661 /**
1662  * Get/create a basic program variant.
1663  */
1664 struct st_variant *
1665 st_get_common_variant(struct st_context *st,
1666                       struct st_program *prog,
1667                       const struct st_common_variant_key *key)
1668 {
1669    struct pipe_context *pipe = st->pipe;
1670    struct st_variant *v;
1671    struct pipe_shader_state state = {0};
1672
1673    /* Search for existing variant */
1674    for (v = prog->variants; v; v = v->next) {
1675       if (memcmp(&st_common_variant(v)->key, key, sizeof(*key)) == 0)
1676          break;
1677    }
1678
1679    if (!v) {
1680       /* create new */
1681       v = (struct st_variant*)CALLOC_STRUCT(st_common_variant);
1682       if (v) {
1683          if (prog->state.type == PIPE_SHADER_IR_NIR) {
1684             bool finalize = false;
1685
1686             state.type = PIPE_SHADER_IR_NIR;
1687             state.ir.nir = nir_shader_clone(NULL, prog->Base.nir);
1688
1689             if (key->clamp_color) {
1690                NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1691                finalize = true;
1692             }
1693
1694             state.stream_output = prog->state.stream_output;
1695
1696             if (finalize || !st->allow_st_finalize_nir_twice) {
1697                st_finalize_nir(st, &prog->Base, prog->shader_program,
1698                                state.ir.nir, true);
1699             }
1700
1701             if (ST_DEBUG & DEBUG_PRINT_IR)
1702                nir_print_shader(state.ir.nir, stderr);
1703          } else {
1704             if (key->lower_depth_clamp) {
1705                struct gl_program_parameter_list *params = prog->Base.Parameters;
1706
1707                unsigned depth_range_const =
1708                      _mesa_add_state_reference(params, depth_range_state);
1709
1710                const struct tgsi_token *tokens;
1711                tokens =
1712                      st_tgsi_lower_depth_clamp(prog->state.tokens,
1713                                                depth_range_const,
1714                                                key->clip_negative_one_to_one);
1715
1716                if (tokens != prog->state.tokens)
1717                   tgsi_free_tokens(prog->state.tokens);
1718
1719                prog->state.tokens = tokens;
1720             }
1721             state = prog->state;
1722
1723             if (ST_DEBUG & DEBUG_PRINT_IR)
1724                tgsi_dump(state.tokens, 0);
1725          }
1726          /* fill in new variant */
1727          switch (prog->Base.info.stage) {
1728          case MESA_SHADER_TESS_CTRL:
1729             v->driver_shader = pipe->create_tcs_state(pipe, &state);
1730             break;
1731          case MESA_SHADER_TESS_EVAL:
1732             v->driver_shader = pipe->create_tes_state(pipe, &state);
1733             break;
1734          case MESA_SHADER_GEOMETRY:
1735             v->driver_shader = pipe->create_gs_state(pipe, &state);
1736             break;
1737          case MESA_SHADER_COMPUTE: {
1738             struct pipe_compute_state cs = {0};
1739             cs.ir_type = state.type;
1740             cs.req_local_mem = prog->Base.info.cs.shared_size;
1741
1742             if (state.type == PIPE_SHADER_IR_NIR)
1743                cs.prog = state.ir.nir;
1744             else
1745                cs.prog = state.tokens;
1746
1747             v->driver_shader = pipe->create_compute_state(pipe, &cs);
1748             break;
1749          }
1750          default:
1751             assert(!"unhandled shader type");
1752             free(v);
1753             return NULL;
1754          }
1755
1756          st_common_variant(v)->key = *key;
1757          v->st = key->st;
1758
1759          /* insert into list */
1760          v->next = prog->variants;
1761          prog->variants = v;
1762       }
1763    }
1764
1765    return v;
1766 }
1767
1768
1769 /**
1770  * Vert/Geom/Frag programs have per-context variants.  Free all the
1771  * variants attached to the given program which match the given context.
1772  */
1773 static void
1774 destroy_program_variants(struct st_context *st, struct gl_program *target)
1775 {
1776    if (!target || target == &_mesa_DummyProgram)
1777       return;
1778
1779    struct st_program *p = st_program(target);
1780    struct st_variant *v, **prevPtr = &p->variants;
1781
1782    for (v = p->variants; v; ) {
1783       struct st_variant *next = v->next;
1784       if (v->st == st) {
1785          /* unlink from list */
1786          *prevPtr = next;
1787          /* destroy this variant */
1788          delete_variant(st, v, target->Target);
1789       }
1790       else {
1791          prevPtr = &v->next;
1792       }
1793       v = next;
1794    }
1795 }
1796
1797
1798 /**
1799  * Callback for _mesa_HashWalk.  Free all the shader's program variants
1800  * which match the given context.
1801  */
1802 static void
1803 destroy_shader_program_variants_cb(GLuint key, void *data, void *userData)
1804 {
1805    struct st_context *st = (struct st_context *) userData;
1806    struct gl_shader *shader = (struct gl_shader *) data;
1807
1808    switch (shader->Type) {
1809    case GL_SHADER_PROGRAM_MESA:
1810       {
1811          struct gl_shader_program *shProg = (struct gl_shader_program *) data;
1812          GLuint i;
1813
1814          for (i = 0; i < ARRAY_SIZE(shProg->_LinkedShaders); i++) {
1815             if (shProg->_LinkedShaders[i])
1816                destroy_program_variants(st, shProg->_LinkedShaders[i]->Program);
1817          }
1818       }
1819       break;
1820    case GL_VERTEX_SHADER:
1821    case GL_FRAGMENT_SHADER:
1822    case GL_GEOMETRY_SHADER:
1823    case GL_TESS_CONTROL_SHADER:
1824    case GL_TESS_EVALUATION_SHADER:
1825    case GL_COMPUTE_SHADER:
1826       break;
1827    default:
1828       assert(0);
1829    }
1830 }
1831
1832
1833 /**
1834  * Callback for _mesa_HashWalk.  Free all the program variants which match
1835  * the given context.
1836  */
1837 static void
1838 destroy_program_variants_cb(GLuint key, void *data, void *userData)
1839 {
1840    struct st_context *st = (struct st_context *) userData;
1841    struct gl_program *program = (struct gl_program *) data;
1842    destroy_program_variants(st, program);
1843 }
1844
1845
1846 /**
1847  * Walk over all shaders and programs to delete any variants which
1848  * belong to the given context.
1849  * This is called during context tear-down.
1850  */
1851 void
1852 st_destroy_program_variants(struct st_context *st)
1853 {
1854    /* If shaders can be shared with other contexts, the last context will
1855     * call DeleteProgram on all shaders, releasing everything.
1856     */
1857    if (st->has_shareable_shaders)
1858       return;
1859
1860    /* ARB vert/frag program */
1861    _mesa_HashWalk(st->ctx->Shared->Programs,
1862                   destroy_program_variants_cb, st);
1863
1864    /* GLSL vert/frag/geom shaders */
1865    _mesa_HashWalk(st->ctx->Shared->ShaderObjects,
1866                   destroy_shader_program_variants_cb, st);
1867 }
1868
1869
1870 /**
1871  * Compile one shader variant.
1872  */
1873 static void
1874 st_precompile_shader_variant(struct st_context *st,
1875                              struct gl_program *prog)
1876 {
1877    switch (prog->Target) {
1878    case GL_VERTEX_PROGRAM_ARB: {
1879       struct st_program *p = (struct st_program *)prog;
1880       struct st_common_variant_key key;
1881
1882       memset(&key, 0, sizeof(key));
1883
1884       key.st = st->has_shareable_shaders ? NULL : st;
1885       st_get_vp_variant(st, p, &key);
1886       break;
1887    }
1888
1889    case GL_FRAGMENT_PROGRAM_ARB: {
1890       struct st_program *p = (struct st_program *)prog;
1891       struct st_fp_variant_key key;
1892
1893       memset(&key, 0, sizeof(key));
1894
1895       key.st = st->has_shareable_shaders ? NULL : st;
1896       st_get_fp_variant(st, p, &key);
1897       break;
1898    }
1899
1900    case GL_TESS_CONTROL_PROGRAM_NV:
1901    case GL_TESS_EVALUATION_PROGRAM_NV:
1902    case GL_GEOMETRY_PROGRAM_NV:
1903    case GL_COMPUTE_PROGRAM_NV: {
1904       struct st_program *p = st_program(prog);
1905       struct st_common_variant_key key;
1906
1907       memset(&key, 0, sizeof(key));
1908
1909       key.st = st->has_shareable_shaders ? NULL : st;
1910       st_get_common_variant(st, p, &key);
1911       break;
1912    }
1913
1914    default:
1915       assert(0);
1916    }
1917 }
1918
1919 void
1920 st_finalize_program(struct st_context *st, struct gl_program *prog)
1921 {
1922    if (st->current_program[prog->info.stage] == prog) {
1923       if (prog->info.stage == MESA_SHADER_VERTEX)
1924          st->dirty |= ST_NEW_VERTEX_PROGRAM(st, (struct st_program *)prog);
1925       else
1926          st->dirty |= ((struct st_program *)prog)->affected_states;
1927    }
1928
1929    if (prog->nir)
1930       nir_sweep(prog->nir);
1931
1932    /* Create Gallium shaders now instead of on demand. */
1933    if (ST_DEBUG & DEBUG_PRECOMPILE ||
1934        st->shader_has_one_variant[prog->info.stage])
1935       st_precompile_shader_variant(st, prog);
1936 }