src/mesa/state_tracker/st_program.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   *   Brian Paul
  31   */
  32
  33
  34 #include "main/errors.h"
  35 #include "main/imports.h"
  36 #include "main/hash.h"
  37 #include "main/mtypes.h"
  38 #include "program/prog_parameter.h"
  39 #include "program/prog_print.h"
  40 #include "program/prog_to_nir.h"
  41 #include "program/programopt.h"
  42
  43 #include "compiler/nir/nir.h"
  44
  45 #include "pipe/p_context.h"
  46 #include "pipe/p_defines.h"
  47 #include "pipe/p_shader_tokens.h"
  48 #include "draw/draw_context.h"
  49 #include "tgsi/tgsi_dump.h"
  50 #include "tgsi/tgsi_emulate.h"
  51 #include "tgsi/tgsi_parse.h"
  52 #include "tgsi/tgsi_ureg.h"
  53
  54 #include "st_debug.h"
  55 #include "st_cb_bitmap.h"
  56 #include "st_cb_drawpixels.h"
  57 #include "st_context.h"
  58 #include "st_tgsi_lower_depth_clamp.h"
  59 #include "st_tgsi_lower_yuv.h"
  60 #include "st_program.h"
  61 #include "st_mesa_to_tgsi.h"
  62 #include "st_atifs_to_tgsi.h"
  63 #include "st_nir.h"
  64 #include "st_shader_cache.h"
  65 #include "cso_cache/cso_context.h"
  66
  67
  68
  69 static void
  70 set_affected_state_flags(uint64_t *states,
  71                          struct gl_program *prog,
  72                          uint64_t new_constants,
  73                          uint64_t new_sampler_views,
  74                          uint64_t new_samplers,
  75                          uint64_t new_images,
  76                          uint64_t new_ubos,
  77                          uint64_t new_ssbos,
  78                          uint64_t new_atomics)
  79 {
  80    if (prog->Parameters->NumParameters)
  81       *states |= new_constants;
  82
  83    if (prog->info.num_textures)
  84       *states |= new_sampler_views | new_samplers;
  85
  86    if (prog->info.num_images)
  87       *states |= new_images;
  88
  89    if (prog->info.num_ubos)
  90       *states |= new_ubos;
  91
  92    if (prog->info.num_ssbos)
  93       *states |= new_ssbos;
  94
  95    if (prog->info.num_abos)
  96       *states |= new_atomics;
  97 }
  98
  99 /**
 100  * This determines which states will be updated when the shader is bound.
 101  */
 102 void
 103 st_set_prog_affected_state_flags(struct gl_program *prog)
 104 {
 105    uint64_t *states;
 106
 107    switch (prog->info.stage) {
 108    case MESA_SHADER_VERTEX:
 109       states = &((struct st_vertex_program*)prog)->affected_states;
 110
 111       *states = ST_NEW_VS_STATE |
 112                 ST_NEW_RASTERIZER |
 113                 ST_NEW_VERTEX_ARRAYS;
 114
 115       set_affected_state_flags(states, prog,
 116                                ST_NEW_VS_CONSTANTS,
 117                                ST_NEW_VS_SAMPLER_VIEWS,
 118                                ST_NEW_VS_SAMPLERS,
 119                                ST_NEW_VS_IMAGES,
 120                                ST_NEW_VS_UBOS,
 121                                ST_NEW_VS_SSBOS,
 122                                ST_NEW_VS_ATOMICS);
 123       break;
 124
 125    case MESA_SHADER_TESS_CTRL:
 126       states = &(st_common_program(prog))->affected_states;
 127
 128       *states = ST_NEW_TCS_STATE;
 129
 130       set_affected_state_flags(states, prog,
 131                                ST_NEW_TCS_CONSTANTS,
 132                                ST_NEW_TCS_SAMPLER_VIEWS,
 133                                ST_NEW_TCS_SAMPLERS,
 134                                ST_NEW_TCS_IMAGES,
 135                                ST_NEW_TCS_UBOS,
 136                                ST_NEW_TCS_SSBOS,
 137                                ST_NEW_TCS_ATOMICS);
 138       break;
 139
 140    case MESA_SHADER_TESS_EVAL:
 141       states = &(st_common_program(prog))->affected_states;
 142
 143       *states = ST_NEW_TES_STATE |
 144                 ST_NEW_RASTERIZER;
 145
 146       set_affected_state_flags(states, prog,
 147                                ST_NEW_TES_CONSTANTS,
 148                                ST_NEW_TES_SAMPLER_VIEWS,
 149                                ST_NEW_TES_SAMPLERS,
 150                                ST_NEW_TES_IMAGES,
 151                                ST_NEW_TES_UBOS,
 152                                ST_NEW_TES_SSBOS,
 153                                ST_NEW_TES_ATOMICS);
 154       break;
 155
 156    case MESA_SHADER_GEOMETRY:
 157       states = &(st_common_program(prog))->affected_states;
 158
 159       *states = ST_NEW_GS_STATE |
 160                 ST_NEW_RASTERIZER;
 161
 162       set_affected_state_flags(states, prog,
 163                                ST_NEW_GS_CONSTANTS,
 164                                ST_NEW_GS_SAMPLER_VIEWS,
 165                                ST_NEW_GS_SAMPLERS,
 166                                ST_NEW_GS_IMAGES,
 167                                ST_NEW_GS_UBOS,
 168                                ST_NEW_GS_SSBOS,
 169                                ST_NEW_GS_ATOMICS);
 170       break;
 171
 172    case MESA_SHADER_FRAGMENT:
 173       states = &((struct st_fragment_program*)prog)->affected_states;
 174
 175       /* gl_FragCoord and glDrawPixels always use constants. */
 176       *states = ST_NEW_FS_STATE |
 177                 ST_NEW_SAMPLE_SHADING |
 178                 ST_NEW_FS_CONSTANTS;
 179
 180       set_affected_state_flags(states, prog,
 181                                ST_NEW_FS_CONSTANTS,
 182                                ST_NEW_FS_SAMPLER_VIEWS,
 183                                ST_NEW_FS_SAMPLERS,
 184                                ST_NEW_FS_IMAGES,
 185                                ST_NEW_FS_UBOS,
 186                                ST_NEW_FS_SSBOS,
 187                                ST_NEW_FS_ATOMICS);
 188       break;
 189
 190    case MESA_SHADER_COMPUTE:
 191       states = &((struct st_common_program*)prog)->affected_states;
 192
 193       *states = ST_NEW_CS_STATE;
 194
 195       set_affected_state_flags(states, prog,
 196                                ST_NEW_CS_CONSTANTS,
 197                                ST_NEW_CS_SAMPLER_VIEWS,
 198                                ST_NEW_CS_SAMPLERS,
 199                                ST_NEW_CS_IMAGES,
 200                                ST_NEW_CS_UBOS,
 201                                ST_NEW_CS_SSBOS,
 202                                ST_NEW_CS_ATOMICS);
 203       break;
 204
 205    default:
 206       unreachable("unhandled shader stage");
 207    }
 208 }
 209
 210 static void
 211 delete_ir(struct pipe_shader_state *ir)
 212 {
 213    if (ir->tokens) {
 214       ureg_free_tokens(ir->tokens);
 215       ir->tokens = NULL;
 216    }
 217
 218    /* Note: Any setup of ->ir.nir that has had pipe->create_*_state called on
 219     * it has resulted in the driver taking ownership of the NIR.  Those
 220     * callers should be NULLing out the nir field in any pipe_shader_state
 221     * that might have this called in order to indicate that.
 222     *
 223     * GLSL IR and ARB programs will have set gl_program->nir to the same
 224     * shader as ir->ir.nir, so it will be freed by _mesa_delete_program().
 225     */
 226 }
 227
 228 /**
 229  * Delete a vertex program variant.  Note the caller must unlink
 230  * the variant from the linked list.
 231  */
 232 static void
 233 delete_vp_variant(struct st_context *st, struct st_vp_variant *vpv)
 234 {
 235    if (vpv->driver_shader) {
 236       if (st->has_shareable_shaders || vpv->key.st == st) {
 237          cso_delete_vertex_shader(st->cso_context, vpv->driver_shader);
 238       } else {
 239          st_save_zombie_shader(vpv->key.st, PIPE_SHADER_VERTEX,
 240                                vpv->driver_shader);
 241       }
 242    }
 243
 244    if (vpv->draw_shader)
 245       draw_delete_vertex_shader( st->draw, vpv->draw_shader );
 246
 247    delete_ir(&vpv->tgsi);
 248
 249    free( vpv );
 250 }
 251
 252
 253
 254 /**
 255  * Clean out any old compilations:
 256  */
 257 void
 258 st_release_vp_variants( struct st_context *st,
 259                         struct st_vertex_program *stvp )
 260 {
 261    struct st_vp_variant *vpv;
 262
 263    for (vpv = stvp->variants; vpv; ) {
 264       struct st_vp_variant *next = vpv->next;
 265       delete_vp_variant(st, vpv);
 266       vpv = next;
 267    }
 268
 269    stvp->variants = NULL;
 270
 271    delete_ir(&stvp->tgsi);
 272 }
 273
 274
 275
 276 /**
 277  * Delete a fragment program variant.  Note the caller must unlink
 278  * the variant from the linked list.
 279  */
 280 static void
 281 delete_fp_variant(struct st_context *st, struct st_fp_variant *fpv)
 282 {
 283    if (fpv->driver_shader) {
 284       if (st->has_shareable_shaders || fpv->key.st == st) {
 285          cso_delete_fragment_shader(st->cso_context, fpv->driver_shader);
 286       } else {
 287          st_save_zombie_shader(fpv->key.st, PIPE_SHADER_FRAGMENT,
 288                                fpv->driver_shader);
 289       }
 290    }
 291
 292    free(fpv);
 293 }
 294
 295
 296 /**
 297  * Free all variants of a fragment program.
 298  */
 299 void
 300 st_release_fp_variants(struct st_context *st, struct st_fragment_program *stfp)
 301 {
 302    struct st_fp_variant *fpv;
 303
 304    for (fpv = stfp->variants; fpv; ) {
 305       struct st_fp_variant *next = fpv->next;
 306       delete_fp_variant(st, fpv);
 307       fpv = next;
 308    }
 309
 310    stfp->variants = NULL;
 311
 312    delete_ir(&stfp->tgsi);
 313 }
 314
 315
 316 /**
 317  * Delete a basic program variant.  Note the caller must unlink
 318  * the variant from the linked list.
 319  */
 320 static void
 321 delete_basic_variant(struct st_context *st, struct st_basic_variant *v,
 322                      GLenum target)
 323 {
 324    if (v->driver_shader) {
 325       if (st->has_shareable_shaders || v->key.st == st) {
 326          /* The shader's context matches the calling context, or we
 327           * don't care.
 328           */
 329          switch (target) {
 330          case GL_TESS_CONTROL_PROGRAM_NV:
 331             cso_delete_tessctrl_shader(st->cso_context, v->driver_shader);
 332             break;
 333          case GL_TESS_EVALUATION_PROGRAM_NV:
 334             cso_delete_tesseval_shader(st->cso_context, v->driver_shader);
 335             break;
 336          case GL_GEOMETRY_PROGRAM_NV:
 337             cso_delete_geometry_shader(st->cso_context, v->driver_shader);
 338             break;
 339          case GL_COMPUTE_PROGRAM_NV:
 340             cso_delete_compute_shader(st->cso_context, v->driver_shader);
 341             break;
 342          default:
 343             unreachable("bad shader type in delete_basic_variant");
 344          }
 345       } else {
 346          /* We can't delete a shader with a context different from the one
 347           * that created it.  Add it to the creating context's zombie list.
 348           */
 349          enum pipe_shader_type type;
 350          switch (target) {
 351          case GL_TESS_CONTROL_PROGRAM_NV:
 352             type = PIPE_SHADER_TESS_CTRL;
 353             break;
 354          case GL_TESS_EVALUATION_PROGRAM_NV:
 355             type = PIPE_SHADER_TESS_EVAL;
 356             break;
 357          case GL_GEOMETRY_PROGRAM_NV:
 358             type = PIPE_SHADER_GEOMETRY;
 359             break;
 360          default:
 361             unreachable("");
 362          }
 363          st_save_zombie_shader(v->key.st, type, v->driver_shader);
 364       }
 365    }
 366
 367    free(v);
 368 }
 369
 370
 371 /**
 372  * Free all basic program variants.
 373  */
 374 void
 375 st_release_basic_variants(struct st_context *st, struct st_common_program *p)
 376 {
 377    struct st_basic_variant *v;
 378
 379    for (v = p->variants; v; ) {
 380       struct st_basic_variant *next = v->next;
 381       delete_basic_variant(st, v, p->Base.Target);
 382       v = next;
 383    }
 384
 385    p->variants = NULL;
 386    delete_ir(&p->tgsi);
 387 }
 388
 389
 390 /**
 391  * Translate ARB (asm) program to NIR
 392  */
 393 static nir_shader *
 394 st_translate_prog_to_nir(struct st_context *st, struct gl_program *prog,
 395                          gl_shader_stage stage)
 396 {
 397    const struct gl_shader_compiler_options *options =
 398       &st->ctx->Const.ShaderCompilerOptions[stage];
 399
 400    /* Translate to NIR */
 401    nir_shader *nir = prog_to_nir(prog, options->NirOptions);
 402    NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
 403    nir_validate_shader(nir, "after st/ptn lower_regs_to_ssa");
 404
 405    NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, prog, st->pipe->screen);
 406    NIR_PASS_V(nir, nir_lower_system_values);
 407
 408    /* Optimise NIR */
 409    NIR_PASS_V(nir, nir_opt_constant_folding);
 410    st_nir_opts(nir);
 411    nir_validate_shader(nir, "after st/ptn NIR opts");
 412
 413    return nir;
 414 }
 415
 416 void
 417 st_prepare_vertex_program(struct st_vertex_program *stvp)
 418 {
 419    stvp->num_inputs = 0;
 420    memset(stvp->input_to_index, ~0, sizeof(stvp->input_to_index));
 421    memset(stvp->result_to_output, ~0, sizeof(stvp->result_to_output));
 422
 423    /* Determine number of inputs, the mappings between VERT_ATTRIB_x
 424     * and TGSI generic input indexes, plus input attrib semantic info.
 425     */
 426    for (unsigned attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
 427       if ((stvp->Base.info.inputs_read & BITFIELD64_BIT(attr)) != 0) {
 428          stvp->input_to_index[attr] = stvp->num_inputs;
 429          stvp->index_to_input[stvp->num_inputs] = attr;
 430          stvp->num_inputs++;
 431
 432          if ((stvp->Base.DualSlotInputs & BITFIELD64_BIT(attr)) != 0) {
 433             /* add placeholder for second part of a double attribute */
 434             stvp->index_to_input[stvp->num_inputs] = ST_DOUBLE_ATTRIB_PLACEHOLDER;
 435             stvp->num_inputs++;
 436          }
 437       }
 438    }
 439    /* pre-setup potentially unused edgeflag input */
 440    stvp->input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
 441    stvp->index_to_input[stvp->num_inputs] = VERT_ATTRIB_EDGEFLAG;
 442
 443    /* Compute mapping of vertex program outputs to slots. */
 444    unsigned num_outputs = 0;
 445    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 446       if (stvp->Base.info.outputs_written & BITFIELD64_BIT(attr))
 447          stvp->result_to_output[attr] = num_outputs++;
 448    }
 449    /* pre-setup potentially unused edgeflag output */
 450    stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs;
 451 }
 452
 453 void
 454 st_translate_stream_output_info(struct gl_program *prog)
 455 {
 456    struct gl_transform_feedback_info *info = prog->sh.LinkedTransformFeedback;
 457    if (!info)
 458       return;
 459
 460    /* Determine the (default) output register mapping for each output. */
 461    unsigned num_outputs = 0;
 462    ubyte output_mapping[VARYING_SLOT_TESS_MAX];
 463    memset(output_mapping, 0, sizeof(output_mapping));
 464
 465    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 466       if (prog->info.outputs_written & BITFIELD64_BIT(attr))
 467          output_mapping[attr] = num_outputs++;
 468    }
 469
 470    /* Translate stream output info. */
 471    struct pipe_stream_output_info *so_info = NULL;
 472    if (prog->info.stage == MESA_SHADER_VERTEX)
 473       so_info = &((struct st_vertex_program*)prog)->tgsi.stream_output;
 474    else
 475       so_info = &((struct st_common_program*)prog)->tgsi.stream_output;
 476
 477    for (unsigned i = 0; i < info->NumOutputs; i++) {
 478       so_info->output[i].register_index =
 479          output_mapping[info->Outputs[i].OutputRegister];
 480       so_info->output[i].start_component = info->Outputs[i].ComponentOffset;
 481       so_info->output[i].num_components = info->Outputs[i].NumComponents;
 482       so_info->output[i].output_buffer = info->Outputs[i].OutputBuffer;
 483       so_info->output[i].dst_offset = info->Outputs[i].DstOffset;
 484       so_info->output[i].stream = info->Outputs[i].StreamId;
 485    }
 486
 487    for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
 488       so_info->stride[i] = info->Buffers[i].Stride;
 489    }
 490    so_info->num_outputs = info->NumOutputs;
 491 }
 492
 493 /**
 494  * Translate a vertex program.
 495  */
 496 bool
 497 st_translate_vertex_program(struct st_context *st,
 498                             struct st_vertex_program *stvp)
 499 {
 500    struct ureg_program *ureg;
 501    enum pipe_error error;
 502    unsigned num_outputs = 0;
 503    unsigned attr;
 504    ubyte output_semantic_name[VARYING_SLOT_MAX] = {0};
 505    ubyte output_semantic_index[VARYING_SLOT_MAX] = {0};
 506
 507    if (stvp->Base.arb.IsPositionInvariant)
 508       _mesa_insert_mvp_code(st->ctx, &stvp->Base);
 509
 510    st_prepare_vertex_program(stvp);
 511
 512    /* ARB_vp: */
 513    if (!stvp->glsl_to_tgsi) {
 514       _mesa_remove_output_reads(&stvp->Base, PROGRAM_OUTPUT);
 515
 516       /* This determines which states will be updated when the assembly
 517        * shader is bound.
 518        */
 519       stvp->affected_states = ST_NEW_VS_STATE |
 520                               ST_NEW_RASTERIZER |
 521                               ST_NEW_VERTEX_ARRAYS;
 522
 523       if (stvp->Base.Parameters->NumParameters)
 524          stvp->affected_states |= ST_NEW_VS_CONSTANTS;
 525
 526       /* No samplers are allowed in ARB_vp. */
 527    }
 528
 529    /* Get semantic names and indices. */
 530    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 531       if (stvp->Base.info.outputs_written & BITFIELD64_BIT(attr)) {
 532          unsigned slot = num_outputs++;
 533          unsigned semantic_name, semantic_index;
 534          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
 535                                       &semantic_name, &semantic_index);
 536          output_semantic_name[slot] = semantic_name;
 537          output_semantic_index[slot] = semantic_index;
 538       }
 539    }
 540    /* pre-setup potentially unused edgeflag output */
 541    output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
 542    output_semantic_index[num_outputs] = 0;
 543
 544    ureg = ureg_create_with_screen(PIPE_SHADER_VERTEX, st->pipe->screen);
 545    if (ureg == NULL)
 546       return false;
 547
 548    if (stvp->Base.info.clip_distance_array_size)
 549       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
 550                     stvp->Base.info.clip_distance_array_size);
 551    if (stvp->Base.info.cull_distance_array_size)
 552       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
 553                     stvp->Base.info.cull_distance_array_size);
 554
 555    if (ST_DEBUG & DEBUG_MESA) {
 556       _mesa_print_program(&stvp->Base);
 557       _mesa_print_program_parameters(st->ctx, &stvp->Base);
 558       debug_printf("\n");
 559    }
 560
 561    if (stvp->glsl_to_tgsi) {
 562       error = st_translate_program(st->ctx,
 563                                    PIPE_SHADER_VERTEX,
 564                                    ureg,
 565                                    stvp->glsl_to_tgsi,
 566                                    &stvp->Base,
 567                                    /* inputs */
 568                                    stvp->num_inputs,
 569                                    stvp->input_to_index,
 570                                    NULL, /* inputSlotToAttr */
 571                                    NULL, /* input semantic name */
 572                                    NULL, /* input semantic index */
 573                                    NULL, /* interp mode */
 574                                    /* outputs */
 575                                    num_outputs,
 576                                    stvp->result_to_output,
 577                                    output_semantic_name,
 578                                    output_semantic_index);
 579
 580       st_translate_stream_output_info(&stvp->Base);
 581
 582       free_glsl_to_tgsi_visitor(stvp->glsl_to_tgsi);
 583    } else
 584       error = st_translate_mesa_program(st->ctx,
 585                                         PIPE_SHADER_VERTEX,
 586                                         ureg,
 587                                         &stvp->Base,
 588                                         /* inputs */
 589                                         stvp->num_inputs,
 590                                         stvp->input_to_index,
 591                                         NULL, /* input semantic name */
 592                                         NULL, /* input semantic index */
 593                                         NULL,
 594                                         /* outputs */
 595                                         num_outputs,
 596                                         stvp->result_to_output,
 597                                         output_semantic_name,
 598                                         output_semantic_index);
 599
 600    if (error) {
 601       debug_printf("%s: failed to translate Mesa program:\n", __func__);
 602       _mesa_print_program(&stvp->Base);
 603       debug_assert(0);
 604       return false;
 605    }
 606
 607    stvp->tgsi.tokens = ureg_get_tokens(ureg, &stvp->num_tgsi_tokens);
 608    ureg_destroy(ureg);
 609
 610    if (stvp->glsl_to_tgsi) {
 611       stvp->glsl_to_tgsi = NULL;
 612       st_store_ir_in_disk_cache(st, &stvp->Base, false);
 613    }
 614
 615    /* Translate to NIR.
 616     *
 617     * This must be done after the translation to TGSI is done, because
 618     * we'll pass the NIR shader to the driver and the TGSI version to
 619     * the draw module for the select/feedback/rasterpos code.
 620     */
 621    if (st->pipe->screen->get_shader_param(st->pipe->screen,
 622                                           PIPE_SHADER_VERTEX,
 623                                           PIPE_SHADER_CAP_PREFERRED_IR)) {
 624       assert(!stvp->glsl_to_tgsi);
 625
 626       nir_shader *nir =
 627          st_translate_prog_to_nir(st, &stvp->Base, MESA_SHADER_VERTEX);
 628
 629       if (stvp->tgsi.ir.nir)
 630          ralloc_free(stvp->tgsi.ir.nir);
 631       stvp->tgsi.type = PIPE_SHADER_IR_NIR;
 632       stvp->tgsi.ir.nir = nir;
 633       stvp->Base.nir = nir;
 634       return true;
 635    }
 636
 637    return stvp->tgsi.tokens != NULL;
 638 }
 639
 640 static const gl_state_index16 depth_range_state[STATE_LENGTH] =
 641    { STATE_DEPTH_RANGE };
 642
 643 static struct st_vp_variant *
 644 st_create_vp_variant(struct st_context *st,
 645                      struct st_vertex_program *stvp,
 646                      const struct st_vp_variant_key *key)
 647 {
 648    struct st_vp_variant *vpv = CALLOC_STRUCT(st_vp_variant);
 649    struct pipe_context *pipe = st->pipe;
 650
 651    static const gl_state_index16 point_size_state[STATE_LENGTH] =
 652       { STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED, 0 };
 653    struct gl_program_parameter_list *params = stvp->Base.Parameters;
 654
 655    vpv->key = *key;
 656    vpv->tgsi.stream_output = stvp->tgsi.stream_output;
 657    vpv->num_inputs = stvp->num_inputs;
 658
 659    /* When generating a NIR program, we usually don't have TGSI tokens.
 660     * However, we do create them for ARB_vertex_program / fixed-function VS
 661     * programs which we may need to use with the draw module for legacy
 662     * feedback/select emulation.  If they exist, copy them.
 663     */
 664    if (stvp->tgsi.tokens)
 665       vpv->tgsi.tokens = tgsi_dup_tokens(stvp->tgsi.tokens);
 666
 667    if (stvp->tgsi.type == PIPE_SHADER_IR_NIR) {
 668       vpv->tgsi.type = PIPE_SHADER_IR_NIR;
 669       vpv->tgsi.ir.nir = nir_shader_clone(NULL, stvp->tgsi.ir.nir);
 670       if (key->clamp_color)
 671          NIR_PASS_V(vpv->tgsi.ir.nir, nir_lower_clamp_color_outputs);
 672       if (key->passthrough_edgeflags) {
 673          NIR_PASS_V(vpv->tgsi.ir.nir, nir_lower_passthrough_edgeflags);
 674          vpv->num_inputs++;
 675       }
 676
 677       if (key->lower_point_size) {
 678          _mesa_add_state_reference(params, point_size_state);
 679          NIR_PASS_V(vpv->tgsi.ir.nir, nir_lower_point_size_mov,
 680                     point_size_state);
 681       }
 682
 683       if (key->lower_ucp) {
 684          struct pipe_screen *screen = pipe->screen;
 685          bool can_compact = screen->get_param(screen,
 686                                               PIPE_CAP_NIR_COMPACT_ARRAYS);
 687
 688          bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;
 689          gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH];
 690          for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
 691             if (use_eye) {
 692                clipplane_state[i][0] = STATE_CLIPPLANE;
 693                clipplane_state[i][1] = i;
 694             } else {
 695                clipplane_state[i][0] = STATE_INTERNAL;
 696                clipplane_state[i][1] = STATE_CLIP_INTERNAL;
 697                clipplane_state[i][2] = i;
 698             }
 699             _mesa_add_state_reference(params, clipplane_state[i]);
 700          }
 701
 702          NIR_PASS_V(vpv->tgsi.ir.nir, nir_lower_clip_vs, key->lower_ucp,
 703                     true, can_compact, clipplane_state);
 704          NIR_PASS_V(vpv->tgsi.ir.nir, nir_lower_io_to_temporaries,
 705                     nir_shader_get_entrypoint(vpv->tgsi.ir.nir), true, false);
 706       }
 707
 708       st_finalize_nir(st, &stvp->Base, stvp->shader_program,
 709                       vpv->tgsi.ir.nir);
 710
 711       vpv->driver_shader = pipe->create_vs_state(pipe, &vpv->tgsi);
 712       /* driver takes ownership of IR: */
 713       vpv->tgsi.ir.nir = NULL;
 714       return vpv;
 715    }
 716
 717    /* Emulate features. */
 718    if (key->clamp_color || key->passthrough_edgeflags) {
 719       const struct tgsi_token *tokens;
 720       unsigned flags =
 721          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
 722          (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
 723
 724       tokens = tgsi_emulate(vpv->tgsi.tokens, flags);
 725
 726       if (tokens) {
 727          tgsi_free_tokens(vpv->tgsi.tokens);
 728          vpv->tgsi.tokens = tokens;
 729
 730          if (key->passthrough_edgeflags)
 731             vpv->num_inputs++;
 732       } else
 733          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
 734    }
 735
 736    if (key->lower_depth_clamp) {
 737       unsigned depth_range_const =
 738             _mesa_add_state_reference(params, depth_range_state);
 739
 740       const struct tgsi_token *tokens;
 741       tokens = st_tgsi_lower_depth_clamp(vpv->tgsi.tokens, depth_range_const,
 742                                          key->clip_negative_one_to_one);
 743       if (tokens != vpv->tgsi.tokens)
 744          tgsi_free_tokens(vpv->tgsi.tokens);
 745       vpv->tgsi.tokens = tokens;
 746    }
 747
 748    if (ST_DEBUG & DEBUG_TGSI) {
 749       tgsi_dump(vpv->tgsi.tokens, 0);
 750       debug_printf("\n");
 751    }
 752
 753    vpv->driver_shader = pipe->create_vs_state(pipe, &vpv->tgsi);
 754    return vpv;
 755 }
 756
 757
 758 /**
 759  * Find/create a vertex program variant.
 760  */
 761 struct st_vp_variant *
 762 st_get_vp_variant(struct st_context *st,
 763                   struct st_vertex_program *stvp,
 764                   const struct st_vp_variant_key *key)
 765 {
 766    struct st_vp_variant *vpv;
 767
 768    /* Search for existing variant */
 769    for (vpv = stvp->variants; vpv; vpv = vpv->next) {
 770       if (memcmp(&vpv->key, key, sizeof(*key)) == 0) {
 771          break;
 772       }
 773    }
 774
 775    if (!vpv) {
 776       /* create now */
 777       vpv = st_create_vp_variant(st, stvp, key);
 778       if (vpv) {
 779           for (unsigned index = 0; index < vpv->num_inputs; ++index) {
 780              unsigned attr = stvp->index_to_input[index];
 781              if (attr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
 782                 continue;
 783              vpv->vert_attrib_mask |= 1u << attr;
 784           }
 785
 786          /* insert into list */
 787          vpv->next = stvp->variants;
 788          stvp->variants = vpv;
 789       }
 790    }
 791
 792    return vpv;
 793 }
 794
 795
 796 /**
 797  * Translate a Mesa fragment shader into a TGSI shader.
 798  */
 799 bool
 800 st_translate_fragment_program(struct st_context *st,
 801                               struct st_fragment_program *stfp)
 802 {
 803    /* Non-GLSL programs: */
 804    if (!stfp->glsl_to_tgsi) {
 805       _mesa_remove_output_reads(&stfp->Base, PROGRAM_OUTPUT);
 806       if (st->ctx->Const.GLSLFragCoordIsSysVal)
 807          _mesa_program_fragment_position_to_sysval(&stfp->Base);
 808
 809       /* This determines which states will be updated when the assembly
 810        * shader is bound.
 811        *
 812        * fragment.position and glDrawPixels always use constants.
 813        */
 814       stfp->affected_states = ST_NEW_FS_STATE |
 815                               ST_NEW_SAMPLE_SHADING |
 816                               ST_NEW_FS_CONSTANTS;
 817
 818       if (stfp->ati_fs) {
 819          /* Just set them for ATI_fs unconditionally. */
 820          stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 821                                   ST_NEW_FS_SAMPLERS;
 822       } else {
 823          /* ARB_fp */
 824          if (stfp->Base.SamplersUsed)
 825             stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 826                                      ST_NEW_FS_SAMPLERS;
 827       }
 828
 829       /* Translate to NIR. */
 830       if (!stfp->ati_fs &&
 831           st->pipe->screen->get_shader_param(st->pipe->screen,
 832                                              PIPE_SHADER_FRAGMENT,
 833                                              PIPE_SHADER_CAP_PREFERRED_IR)) {
 834          nir_shader *nir =
 835             st_translate_prog_to_nir(st, &stfp->Base, MESA_SHADER_FRAGMENT);
 836
 837          if (stfp->tgsi.ir.nir)
 838             ralloc_free(stfp->tgsi.ir.nir);
 839          stfp->tgsi.type = PIPE_SHADER_IR_NIR;
 840          stfp->tgsi.ir.nir = nir;
 841          stfp->Base.nir = nir;
 842          return true;
 843       }
 844    }
 845
 846    ubyte outputMapping[2 * FRAG_RESULT_MAX];
 847    ubyte inputMapping[VARYING_SLOT_MAX];
 848    ubyte inputSlotToAttr[VARYING_SLOT_MAX];
 849    ubyte interpMode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
 850    GLuint attr;
 851    GLbitfield64 inputsRead;
 852    struct ureg_program *ureg;
 853
 854    GLboolean write_all = GL_FALSE;
 855
 856    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
 857    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
 858    uint fs_num_inputs = 0;
 859
 860    ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
 861    ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
 862    uint fs_num_outputs = 0;
 863
 864    memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
 865
 866    /*
 867     * Convert Mesa program inputs to TGSI input register semantics.
 868     */
 869    inputsRead = stfp->Base.info.inputs_read;
 870    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 871       if ((inputsRead & BITFIELD64_BIT(attr)) != 0) {
 872          const GLuint slot = fs_num_inputs++;
 873
 874          inputMapping[attr] = slot;
 875          inputSlotToAttr[slot] = attr;
 876
 877          switch (attr) {
 878          case VARYING_SLOT_POS:
 879             input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
 880             input_semantic_index[slot] = 0;
 881             interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 882             break;
 883          case VARYING_SLOT_COL0:
 884             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 885             input_semantic_index[slot] = 0;
 886             interpMode[slot] = stfp->glsl_to_tgsi ?
 887                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 888             break;
 889          case VARYING_SLOT_COL1:
 890             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 891             input_semantic_index[slot] = 1;
 892             interpMode[slot] = stfp->glsl_to_tgsi ?
 893                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 894             break;
 895          case VARYING_SLOT_FOGC:
 896             input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
 897             input_semantic_index[slot] = 0;
 898             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 899             break;
 900          case VARYING_SLOT_FACE:
 901             input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
 902             input_semantic_index[slot] = 0;
 903             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 904             break;
 905          case VARYING_SLOT_PRIMITIVE_ID:
 906             input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
 907             input_semantic_index[slot] = 0;
 908             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 909             break;
 910          case VARYING_SLOT_LAYER:
 911             input_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
 912             input_semantic_index[slot] = 0;
 913             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 914             break;
 915          case VARYING_SLOT_VIEWPORT:
 916             input_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
 917             input_semantic_index[slot] = 0;
 918             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 919             break;
 920          case VARYING_SLOT_CLIP_DIST0:
 921             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
 922             input_semantic_index[slot] = 0;
 923             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 924             break;
 925          case VARYING_SLOT_CLIP_DIST1:
 926             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
 927             input_semantic_index[slot] = 1;
 928             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 929             break;
 930          case VARYING_SLOT_CULL_DIST0:
 931          case VARYING_SLOT_CULL_DIST1:
 932             /* these should have been lowered by GLSL */
 933             assert(0);
 934             break;
 935             /* In most cases, there is nothing special about these
 936              * inputs, so adopt a convention to use the generic
 937              * semantic name and the mesa VARYING_SLOT_ number as the
 938              * index.
 939              *
 940              * All that is required is that the vertex shader labels
 941              * its own outputs similarly, and that the vertex shader
 942              * generates at least every output required by the
 943              * fragment shader plus fixed-function hardware (such as
 944              * BFC).
 945              *
 946              * However, some drivers may need us to identify the PNTC and TEXi
 947              * varyings if, for example, their capability to replace them with
 948              * sprite coordinates is limited.
 949              */
 950          case VARYING_SLOT_PNTC:
 951             if (st->needs_texcoord_semantic) {
 952                input_semantic_name[slot] = TGSI_SEMANTIC_PCOORD;
 953                input_semantic_index[slot] = 0;
 954                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 955                break;
 956             }
 957             /* fall through */
 958          case VARYING_SLOT_TEX0:
 959          case VARYING_SLOT_TEX1:
 960          case VARYING_SLOT_TEX2:
 961          case VARYING_SLOT_TEX3:
 962          case VARYING_SLOT_TEX4:
 963          case VARYING_SLOT_TEX5:
 964          case VARYING_SLOT_TEX6:
 965          case VARYING_SLOT_TEX7:
 966             if (st->needs_texcoord_semantic) {
 967                input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
 968                input_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
 969                interpMode[slot] = stfp->glsl_to_tgsi ?
 970                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
 971                break;
 972             }
 973             /* fall through */
 974          case VARYING_SLOT_VAR0:
 975          default:
 976             /* Semantic indices should be zero-based because drivers may choose
 977              * to assign a fixed slot determined by that index.
 978              * This is useful because ARB_separate_shader_objects uses location
 979              * qualifiers for linkage, and if the semantic index corresponds to
 980              * these locations, linkage passes in the driver become unecessary.
 981              *
 982              * If needs_texcoord_semantic is true, no semantic indices will be
 983              * consumed for the TEXi varyings, and we can base the locations of
 984              * the user varyings on VAR0.  Otherwise, we use TEX0 as base index.
 985              */
 986             assert(attr >= VARYING_SLOT_VAR0 || attr == VARYING_SLOT_PNTC ||
 987                    (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
 988             input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
 989             input_semantic_index[slot] = st_get_generic_varying_index(st, attr);
 990             if (attr == VARYING_SLOT_PNTC)
 991                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 992             else {
 993                interpMode[slot] = stfp->glsl_to_tgsi ?
 994                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
 995             }
 996             break;
 997          }
 998       }
 999       else {
1000          inputMapping[attr] = -1;
1001       }
1002    }
1003
1004    /*
1005     * Semantics and mapping for outputs
1006     */
1007    GLbitfield64 outputsWritten = stfp->Base.info.outputs_written;
1008
1009    /* if z is written, emit that first */
1010    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
1011       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
1012       fs_output_semantic_index[fs_num_outputs] = 0;
1013       outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
1014       fs_num_outputs++;
1015       outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
1016    }
1017
1018    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
1019       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
1020       fs_output_semantic_index[fs_num_outputs] = 0;
1021       outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
1022       fs_num_outputs++;
1023       outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
1024    }
1025
1026    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
1027       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_SAMPLEMASK;
1028       fs_output_semantic_index[fs_num_outputs] = 0;
1029       outputMapping[FRAG_RESULT_SAMPLE_MASK] = fs_num_outputs;
1030       fs_num_outputs++;
1031       outputsWritten &= ~(1 << FRAG_RESULT_SAMPLE_MASK);
1032    }
1033
1034    /* handle remaining outputs (color) */
1035    for (attr = 0; attr < ARRAY_SIZE(outputMapping); attr++) {
1036       const GLbitfield64 written = attr < FRAG_RESULT_MAX ? outputsWritten :
1037          stfp->Base.SecondaryOutputsWritten;
1038       const unsigned loc = attr % FRAG_RESULT_MAX;
1039
1040       if (written & BITFIELD64_BIT(loc)) {
1041          switch (loc) {
1042          case FRAG_RESULT_DEPTH:
1043          case FRAG_RESULT_STENCIL:
1044          case FRAG_RESULT_SAMPLE_MASK:
1045             /* handled above */
1046             assert(0);
1047             break;
1048          case FRAG_RESULT_COLOR:
1049             write_all = GL_TRUE; /* fallthrough */
1050          default: {
1051             int index;
1052             assert(loc == FRAG_RESULT_COLOR ||
1053                    (FRAG_RESULT_DATA0 <= loc && loc < FRAG_RESULT_MAX));
1054
1055             index = (loc == FRAG_RESULT_COLOR) ? 0 : (loc - FRAG_RESULT_DATA0);
1056
1057             if (attr >= FRAG_RESULT_MAX) {
1058                /* Secondary color for dual source blending. */
1059                assert(index == 0);
1060                index++;
1061             }
1062
1063             fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
1064             fs_output_semantic_index[fs_num_outputs] = index;
1065             outputMapping[attr] = fs_num_outputs;
1066             break;
1067          }
1068          }
1069
1070          fs_num_outputs++;
1071       }
1072    }
1073
1074    ureg = ureg_create_with_screen(PIPE_SHADER_FRAGMENT, st->pipe->screen);
1075    if (ureg == NULL)
1076       return false;
1077
1078    if (ST_DEBUG & DEBUG_MESA) {
1079       _mesa_print_program(&stfp->Base);
1080       _mesa_print_program_parameters(st->ctx, &stfp->Base);
1081       debug_printf("\n");
1082    }
1083    if (write_all == GL_TRUE)
1084       ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1);
1085
1086    if (stfp->Base.info.fs.depth_layout != FRAG_DEPTH_LAYOUT_NONE) {
1087       switch (stfp->Base.info.fs.depth_layout) {
1088       case FRAG_DEPTH_LAYOUT_ANY:
1089          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1090                        TGSI_FS_DEPTH_LAYOUT_ANY);
1091          break;
1092       case FRAG_DEPTH_LAYOUT_GREATER:
1093          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1094                        TGSI_FS_DEPTH_LAYOUT_GREATER);
1095          break;
1096       case FRAG_DEPTH_LAYOUT_LESS:
1097          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1098                        TGSI_FS_DEPTH_LAYOUT_LESS);
1099          break;
1100       case FRAG_DEPTH_LAYOUT_UNCHANGED:
1101          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1102                        TGSI_FS_DEPTH_LAYOUT_UNCHANGED);
1103          break;
1104       default:
1105          assert(0);
1106       }
1107    }
1108
1109    if (stfp->glsl_to_tgsi) {
1110       st_translate_program(st->ctx,
1111                            PIPE_SHADER_FRAGMENT,
1112                            ureg,
1113                            stfp->glsl_to_tgsi,
1114                            &stfp->Base,
1115                            /* inputs */
1116                            fs_num_inputs,
1117                            inputMapping,
1118                            inputSlotToAttr,
1119                            input_semantic_name,
1120                            input_semantic_index,
1121                            interpMode,
1122                            /* outputs */
1123                            fs_num_outputs,
1124                            outputMapping,
1125                            fs_output_semantic_name,
1126                            fs_output_semantic_index);
1127
1128       free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
1129    } else if (stfp->ati_fs)
1130       st_translate_atifs_program(ureg,
1131                                  stfp->ati_fs,
1132                                  &stfp->Base,
1133                                  /* inputs */
1134                                  fs_num_inputs,
1135                                  inputMapping,
1136                                  input_semantic_name,
1137                                  input_semantic_index,
1138                                  interpMode,
1139                                  /* outputs */
1140                                  fs_num_outputs,
1141                                  outputMapping,
1142                                  fs_output_semantic_name,
1143                                  fs_output_semantic_index);
1144    else
1145       st_translate_mesa_program(st->ctx,
1146                                 PIPE_SHADER_FRAGMENT,
1147                                 ureg,
1148                                 &stfp->Base,
1149                                 /* inputs */
1150                                 fs_num_inputs,
1151                                 inputMapping,
1152                                 input_semantic_name,
1153                                 input_semantic_index,
1154                                 interpMode,
1155                                 /* outputs */
1156                                 fs_num_outputs,
1157                                 outputMapping,
1158                                 fs_output_semantic_name,
1159                                 fs_output_semantic_index);
1160
1161    stfp->tgsi.tokens = ureg_get_tokens(ureg, &stfp->num_tgsi_tokens);
1162    ureg_destroy(ureg);
1163
1164    if (stfp->glsl_to_tgsi) {
1165       stfp->glsl_to_tgsi = NULL;
1166       st_store_ir_in_disk_cache(st, &stfp->Base, false);
1167    }
1168
1169    return stfp->tgsi.tokens != NULL;
1170 }
1171
1172 static struct st_fp_variant *
1173 st_create_fp_variant(struct st_context *st,
1174                      struct st_fragment_program *stfp,
1175                      const struct st_fp_variant_key *key)
1176 {
1177    struct pipe_context *pipe = st->pipe;
1178    struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
1179    struct pipe_shader_state tgsi = {0};
1180    struct gl_program_parameter_list *params = stfp->Base.Parameters;
1181    static const gl_state_index16 texcoord_state[STATE_LENGTH] =
1182       { STATE_INTERNAL, STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
1183    static const gl_state_index16 scale_state[STATE_LENGTH] =
1184       { STATE_INTERNAL, STATE_PT_SCALE };
1185    static const gl_state_index16 bias_state[STATE_LENGTH] =
1186       { STATE_INTERNAL, STATE_PT_BIAS };
1187    static const gl_state_index16 alpha_ref_state[STATE_LENGTH] =
1188       { STATE_INTERNAL, STATE_ALPHA_REF };
1189
1190    if (!variant)
1191       return NULL;
1192
1193    if (stfp->tgsi.type == PIPE_SHADER_IR_NIR) {
1194       tgsi.type = PIPE_SHADER_IR_NIR;
1195       tgsi.ir.nir = nir_shader_clone(NULL, stfp->tgsi.ir.nir);
1196
1197       if (key->clamp_color)
1198          NIR_PASS_V(tgsi.ir.nir, nir_lower_clamp_color_outputs);
1199
1200       if (key->lower_flatshade)
1201          NIR_PASS_V(tgsi.ir.nir, nir_lower_flatshade);
1202
1203       if (key->lower_alpha_func != COMPARE_FUNC_NEVER) {
1204          _mesa_add_state_reference(params, alpha_ref_state);
1205          NIR_PASS_V(tgsi.ir.nir, nir_lower_alpha_test, key->lower_alpha_func,
1206                     false, alpha_ref_state);
1207       }
1208
1209       if (key->lower_two_sided_color)
1210          NIR_PASS_V(tgsi.ir.nir, nir_lower_two_sided_color);
1211
1212       if (key->persample_shading) {
1213           nir_shader *shader = tgsi.ir.nir;
1214           nir_foreach_variable(var, &shader->inputs)
1215              var->data.sample = true;
1216       }
1217
1218       assert(!(key->bitmap && key->drawpixels));
1219
1220       /* glBitmap */
1221       if (key->bitmap) {
1222          nir_lower_bitmap_options options = {0};
1223
1224          variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1225          options.sampler = variant->bitmap_sampler;
1226          options.swizzle_xxxx = st->bitmap.tex_format == PIPE_FORMAT_R8_UNORM;
1227
1228          NIR_PASS_V(tgsi.ir.nir, nir_lower_bitmap, &options);
1229       }
1230
1231       /* glDrawPixels (color only) */
1232       if (key->drawpixels) {
1233          nir_lower_drawpixels_options options = {{0}};
1234          unsigned samplers_used = stfp->Base.SamplersUsed;
1235
1236          /* Find the first unused slot. */
1237          variant->drawpix_sampler = ffs(~samplers_used) - 1;
1238          options.drawpix_sampler = variant->drawpix_sampler;
1239          samplers_used |= (1 << variant->drawpix_sampler);
1240
1241          options.pixel_maps = key->pixelMaps;
1242          if (key->pixelMaps) {
1243             variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1244             options.pixelmap_sampler = variant->pixelmap_sampler;
1245          }
1246
1247          options.scale_and_bias = key->scaleAndBias;
1248          if (key->scaleAndBias) {
1249             _mesa_add_state_reference(params, scale_state);
1250             memcpy(options.scale_state_tokens, scale_state,
1251                    sizeof(options.scale_state_tokens));
1252             _mesa_add_state_reference(params, bias_state);
1253             memcpy(options.bias_state_tokens, bias_state,
1254                    sizeof(options.bias_state_tokens));
1255          }
1256
1257          _mesa_add_state_reference(params, texcoord_state);
1258          memcpy(options.texcoord_state_tokens, texcoord_state,
1259                 sizeof(options.texcoord_state_tokens));
1260
1261          NIR_PASS_V(tgsi.ir.nir, nir_lower_drawpixels, &options);
1262       }
1263
1264       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1265                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1266                    key->external.lower_ayuv || key->external.lower_xyuv)) {
1267          nir_lower_tex_options options = {0};
1268          options.lower_y_uv_external = key->external.lower_nv12;
1269          options.lower_y_u_v_external = key->external.lower_iyuv;
1270          options.lower_xy_uxvx_external = key->external.lower_xy_uxvx;
1271          options.lower_yx_xuxv_external = key->external.lower_yx_xuxv;
1272          options.lower_ayuv_external = key->external.lower_ayuv;
1273          options.lower_xyuv_external = key->external.lower_xyuv;
1274          NIR_PASS_V(tgsi.ir.nir, nir_lower_tex, &options);
1275       }
1276
1277       st_finalize_nir(st, &stfp->Base, stfp->shader_program, tgsi.ir.nir);
1278
1279       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1280                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1281          /* This pass needs to happen *after* nir_lower_sampler */
1282          NIR_PASS_V(tgsi.ir.nir, st_nir_lower_tex_src_plane,
1283                     ~stfp->Base.SamplersUsed,
1284                     key->external.lower_nv12 || key->external.lower_xy_uxvx ||
1285                        key->external.lower_yx_xuxv,
1286                     key->external.lower_iyuv);
1287       }
1288
1289       /* Some of the lowering above may have introduced new varyings */
1290       nir_shader_gather_info(tgsi.ir.nir,
1291                              nir_shader_get_entrypoint(tgsi.ir.nir));
1292
1293       variant->driver_shader = pipe->create_fs_state(pipe, &tgsi);
1294       variant->key = *key;
1295
1296       return variant;
1297    }
1298
1299    tgsi.tokens = stfp->tgsi.tokens;
1300
1301    assert(!(key->bitmap && key->drawpixels));
1302
1303    /* Fix texture targets and add fog for ATI_fs */
1304    if (stfp->ati_fs) {
1305       const struct tgsi_token *tokens = st_fixup_atifs(tgsi.tokens, key);
1306
1307       if (tokens)
1308          tgsi.tokens = tokens;
1309       else
1310          fprintf(stderr, "mesa: cannot post-process ATI_fs\n");
1311    }
1312
1313    /* Emulate features. */
1314    if (key->clamp_color || key->persample_shading) {
1315       const struct tgsi_token *tokens;
1316       unsigned flags =
1317          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
1318          (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
1319
1320       tokens = tgsi_emulate(tgsi.tokens, flags);
1321
1322       if (tokens) {
1323          if (tgsi.tokens != stfp->tgsi.tokens)
1324             tgsi_free_tokens(tgsi.tokens);
1325          tgsi.tokens = tokens;
1326       } else
1327          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
1328    }
1329
1330    /* glBitmap */
1331    if (key->bitmap) {
1332       const struct tgsi_token *tokens;
1333
1334       variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1335
1336       tokens = st_get_bitmap_shader(tgsi.tokens,
1337                                     st->internal_target,
1338                                     variant->bitmap_sampler,
1339                                     st->needs_texcoord_semantic,
1340                                     st->bitmap.tex_format ==
1341                                     PIPE_FORMAT_R8_UNORM);
1342
1343       if (tokens) {
1344          if (tgsi.tokens != stfp->tgsi.tokens)
1345             tgsi_free_tokens(tgsi.tokens);
1346          tgsi.tokens = tokens;
1347       } else
1348          fprintf(stderr, "mesa: cannot create a shader for glBitmap\n");
1349    }
1350
1351    /* glDrawPixels (color only) */
1352    if (key->drawpixels) {
1353       const struct tgsi_token *tokens;
1354       unsigned scale_const = 0, bias_const = 0, texcoord_const = 0;
1355
1356       /* Find the first unused slot. */
1357       variant->drawpix_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1358
1359       if (key->pixelMaps) {
1360          unsigned samplers_used = stfp->Base.SamplersUsed |
1361                                   (1 << variant->drawpix_sampler);
1362
1363          variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1364       }
1365
1366       if (key->scaleAndBias) {
1367          scale_const = _mesa_add_state_reference(params, scale_state);
1368          bias_const = _mesa_add_state_reference(params, bias_state);
1369       }
1370
1371       texcoord_const = _mesa_add_state_reference(params, texcoord_state);
1372
1373       tokens = st_get_drawpix_shader(tgsi.tokens,
1374                                      st->needs_texcoord_semantic,
1375                                      key->scaleAndBias, scale_const,
1376                                      bias_const, key->pixelMaps,
1377                                      variant->drawpix_sampler,
1378                                      variant->pixelmap_sampler,
1379                                      texcoord_const, st->internal_target);
1380
1381       if (tokens) {
1382          if (tgsi.tokens != stfp->tgsi.tokens)
1383             tgsi_free_tokens(tgsi.tokens);
1384          tgsi.tokens = tokens;
1385       } else
1386          fprintf(stderr, "mesa: cannot create a shader for glDrawPixels\n");
1387    }
1388
1389    if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1390                 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1391       const struct tgsi_token *tokens;
1392
1393       /* samplers inserted would conflict, but this should be unpossible: */
1394       assert(!(key->bitmap || key->drawpixels));
1395
1396       tokens = st_tgsi_lower_yuv(tgsi.tokens,
1397                                  ~stfp->Base.SamplersUsed,
1398                                  key->external.lower_nv12 ||
1399                                     key->external.lower_xy_uxvx ||
1400                                     key->external.lower_yx_xuxv,
1401                                  key->external.lower_iyuv);
1402       if (tokens) {
1403          if (tgsi.tokens != stfp->tgsi.tokens)
1404             tgsi_free_tokens(tgsi.tokens);
1405          tgsi.tokens = tokens;
1406       } else {
1407          fprintf(stderr, "mesa: cannot create a shader for samplerExternalOES\n");
1408       }
1409    }
1410
1411    if (key->lower_depth_clamp) {
1412       unsigned depth_range_const = _mesa_add_state_reference(params, depth_range_state);
1413
1414       const struct tgsi_token *tokens;
1415       tokens = st_tgsi_lower_depth_clamp_fs(tgsi.tokens, depth_range_const);
1416       if (tgsi.tokens != stfp->tgsi.tokens)
1417          tgsi_free_tokens(tgsi.tokens);
1418       tgsi.tokens = tokens;
1419    }
1420
1421    if (ST_DEBUG & DEBUG_TGSI) {
1422       tgsi_dump(tgsi.tokens, 0);
1423       debug_printf("\n");
1424    }
1425
1426    /* fill in variant */
1427    variant->driver_shader = pipe->create_fs_state(pipe, &tgsi);
1428    variant->key = *key;
1429
1430    if (tgsi.tokens != stfp->tgsi.tokens)
1431       tgsi_free_tokens(tgsi.tokens);
1432    return variant;
1433 }
1434
1435 /**
1436  * Translate fragment program if needed.
1437  */
1438 struct st_fp_variant *
1439 st_get_fp_variant(struct st_context *st,
1440                   struct st_fragment_program *stfp,
1441                   const struct st_fp_variant_key *key)
1442 {
1443    struct st_fp_variant *fpv;
1444
1445    /* Search for existing variant */
1446    for (fpv = stfp->variants; fpv; fpv = fpv->next) {
1447       if (memcmp(&fpv->key, key, sizeof(*key)) == 0) {
1448          break;
1449       }
1450    }
1451
1452    if (!fpv) {
1453       /* create new */
1454       fpv = st_create_fp_variant(st, stfp, key);
1455       if (fpv) {
1456          if (key->bitmap || key->drawpixels) {
1457             /* Regular variants should always come before the
1458              * bitmap & drawpixels variants, (unless there
1459              * are no regular variants) so that
1460              * st_update_fp can take a fast path when
1461              * shader_has_one_variant is set.
1462              */
1463             if (!stfp->variants) {
1464                stfp->variants = fpv;
1465             } else {
1466                /* insert into list after the first one */
1467                fpv->next = stfp->variants->next;
1468                stfp->variants->next = fpv;
1469             }
1470          } else {
1471             /* insert into list */
1472             fpv->next = stfp->variants;
1473             stfp->variants = fpv;
1474          }
1475       }
1476    }
1477
1478    return fpv;
1479 }
1480
1481 /**
1482  * Translate a program. This is common code for geometry and tessellation
1483  * shaders.
1484  */
1485 bool
1486 st_translate_common_program(struct st_context *st,
1487                             struct st_common_program *stcp)
1488 {
1489    struct gl_program *prog = &stcp->Base;
1490    enum pipe_shader_type stage =
1491       pipe_shader_type_from_mesa(stcp->Base.info.stage);
1492    struct ureg_program *ureg = ureg_create_with_screen(stage, st->pipe->screen);
1493
1494    if (ureg == NULL)
1495       return false;
1496
1497    switch (stage) {
1498    case PIPE_SHADER_TESS_CTRL:
1499       ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT,
1500                     stcp->Base.info.tess.tcs_vertices_out);
1501       break;
1502
1503    case PIPE_SHADER_TESS_EVAL:
1504       if (stcp->Base.info.tess.primitive_mode == GL_ISOLINES)
1505          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, GL_LINES);
1506       else
1507          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE,
1508                        stcp->Base.info.tess.primitive_mode);
1509
1510       STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL);
1511       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 ==
1512                     PIPE_TESS_SPACING_FRACTIONAL_ODD);
1513       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 ==
1514                     PIPE_TESS_SPACING_FRACTIONAL_EVEN);
1515
1516       ureg_property(ureg, TGSI_PROPERTY_TES_SPACING,
1517                     (stcp->Base.info.tess.spacing + 1) % 3);
1518
1519       ureg_property(ureg, TGSI_PROPERTY_TES_VERTEX_ORDER_CW,
1520                     !stcp->Base.info.tess.ccw);
1521       ureg_property(ureg, TGSI_PROPERTY_TES_POINT_MODE,
1522                     stcp->Base.info.tess.point_mode);
1523       break;
1524
1525    case PIPE_SHADER_GEOMETRY:
1526       ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM,
1527                     stcp->Base.info.gs.input_primitive);
1528       ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM,
1529                     stcp->Base.info.gs.output_primitive);
1530       ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
1531                     stcp->Base.info.gs.vertices_out);
1532       ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS,
1533                     stcp->Base.info.gs.invocations);
1534       break;
1535
1536    default:
1537       break;
1538    }
1539
1540    ubyte inputSlotToAttr[VARYING_SLOT_TESS_MAX];
1541    ubyte inputMapping[VARYING_SLOT_TESS_MAX];
1542    ubyte outputMapping[VARYING_SLOT_TESS_MAX];
1543    GLuint attr;
1544
1545    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
1546    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
1547    uint num_inputs = 0;
1548
1549    ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
1550    ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
1551    uint num_outputs = 0;
1552
1553    GLint i;
1554
1555    memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr));
1556    memset(inputMapping, 0, sizeof(inputMapping));
1557    memset(outputMapping, 0, sizeof(outputMapping));
1558    memset(&stcp->tgsi, 0, sizeof(stcp->tgsi));
1559
1560    if (prog->info.clip_distance_array_size)
1561       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
1562                     prog->info.clip_distance_array_size);
1563    if (prog->info.cull_distance_array_size)
1564       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
1565                     prog->info.cull_distance_array_size);
1566
1567    /*
1568     * Convert Mesa program inputs to TGSI input register semantics.
1569     */
1570    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1571       if ((prog->info.inputs_read & BITFIELD64_BIT(attr)) == 0)
1572          continue;
1573
1574       unsigned slot = num_inputs++;
1575
1576       inputMapping[attr] = slot;
1577       inputSlotToAttr[slot] = attr;
1578
1579       unsigned semantic_name, semantic_index;
1580       tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1581                                    &semantic_name, &semantic_index);
1582       input_semantic_name[slot] = semantic_name;
1583       input_semantic_index[slot] = semantic_index;
1584    }
1585
1586    /* Also add patch inputs. */
1587    for (attr = 0; attr < 32; attr++) {
1588       if (prog->info.patch_inputs_read & (1u << attr)) {
1589          GLuint slot = num_inputs++;
1590          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1591
1592          inputMapping[patch_attr] = slot;
1593          inputSlotToAttr[slot] = patch_attr;
1594          input_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1595          input_semantic_index[slot] = attr;
1596       }
1597    }
1598
1599    /* initialize output semantics to defaults */
1600    for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
1601       output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
1602       output_semantic_index[i] = 0;
1603    }
1604
1605    /*
1606     * Determine number of outputs, the (default) output register
1607     * mapping and the semantic information for each output.
1608     */
1609    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1610       if (prog->info.outputs_written & BITFIELD64_BIT(attr)) {
1611          GLuint slot = num_outputs++;
1612
1613          outputMapping[attr] = slot;
1614
1615          unsigned semantic_name, semantic_index;
1616          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1617                                       &semantic_name, &semantic_index);
1618          output_semantic_name[slot] = semantic_name;
1619          output_semantic_index[slot] = semantic_index;
1620       }
1621    }
1622
1623    /* Also add patch outputs. */
1624    for (attr = 0; attr < 32; attr++) {
1625       if (prog->info.patch_outputs_written & (1u << attr)) {
1626          GLuint slot = num_outputs++;
1627          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1628
1629          outputMapping[patch_attr] = slot;
1630          output_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1631          output_semantic_index[slot] = attr;
1632       }
1633    }
1634
1635    st_translate_program(st->ctx,
1636                         stage,
1637                         ureg,
1638                         stcp->glsl_to_tgsi,
1639                         prog,
1640                         /* inputs */
1641                         num_inputs,
1642                         inputMapping,
1643                         inputSlotToAttr,
1644                         input_semantic_name,
1645                         input_semantic_index,
1646                         NULL,
1647                         /* outputs */
1648                         num_outputs,
1649                         outputMapping,
1650                         output_semantic_name,
1651                         output_semantic_index);
1652
1653    stcp->tgsi.tokens = ureg_get_tokens(ureg, &stcp->num_tgsi_tokens);
1654
1655    ureg_destroy(ureg);
1656
1657    st_translate_stream_output_info(prog);
1658
1659    st_store_ir_in_disk_cache(st, prog, false);
1660
1661    if ((ST_DEBUG & DEBUG_TGSI) && (ST_DEBUG & DEBUG_MESA)) {
1662       _mesa_print_program(prog);
1663       debug_printf("\n");
1664    }
1665
1666    if (ST_DEBUG & DEBUG_TGSI) {
1667       tgsi_dump(stcp->tgsi.tokens, 0);
1668       debug_printf("\n");
1669    }
1670
1671    free_glsl_to_tgsi_visitor(stcp->glsl_to_tgsi);
1672    stcp->glsl_to_tgsi = NULL;
1673    return true;
1674 }
1675
1676
1677 /**
1678  * Get/create a basic program variant.
1679  */
1680 struct st_basic_variant *
1681 st_get_basic_variant(struct st_context *st,
1682                      struct st_common_program *prog,
1683                      const struct st_basic_variant_key *key)
1684 {
1685    struct pipe_context *pipe = st->pipe;
1686    struct st_basic_variant *v;
1687    struct pipe_shader_state tgsi = {0};
1688
1689    /* Search for existing variant */
1690    for (v = prog->variants; v; v = v->next) {
1691       if (memcmp(&v->key, key, sizeof(*key)) == 0) {
1692          break;
1693       }
1694    }
1695
1696    if (!v) {
1697       /* create new */
1698       v = CALLOC_STRUCT(st_basic_variant);
1699       if (v) {
1700
1701          if (prog->tgsi.type == PIPE_SHADER_IR_NIR) {
1702             tgsi.type = PIPE_SHADER_IR_NIR;
1703             tgsi.ir.nir = nir_shader_clone(NULL, prog->tgsi.ir.nir);
1704
1705             if (key->clamp_color)
1706                NIR_PASS_V(tgsi.ir.nir, nir_lower_clamp_color_outputs);
1707
1708             tgsi.stream_output = prog->tgsi.stream_output;
1709
1710             st_finalize_nir(st, &prog->Base, prog->shader_program,
1711                             tgsi.ir.nir);
1712          } else {
1713             if (key->lower_depth_clamp) {
1714                struct gl_program_parameter_list *params = prog->Base.Parameters;
1715
1716                unsigned depth_range_const =
1717                      _mesa_add_state_reference(params, depth_range_state);
1718
1719                const struct tgsi_token *tokens;
1720                tokens =
1721                      st_tgsi_lower_depth_clamp(prog->tgsi.tokens,
1722                                                depth_range_const,
1723                                                key->clip_negative_one_to_one);
1724
1725                if (tokens != prog->tgsi.tokens)
1726                   tgsi_free_tokens(prog->tgsi.tokens);
1727
1728                prog->tgsi.tokens = tokens;
1729                prog->num_tgsi_tokens = tgsi_num_tokens(tokens);
1730             }
1731             tgsi = prog->tgsi;
1732          }
1733          /* fill in new variant */
1734          switch (prog->Base.info.stage) {
1735          case MESA_SHADER_TESS_CTRL:
1736             v->driver_shader = pipe->create_tcs_state(pipe, &tgsi);
1737             break;
1738          case MESA_SHADER_TESS_EVAL:
1739             v->driver_shader = pipe->create_tes_state(pipe, &tgsi);
1740             break;
1741          case MESA_SHADER_GEOMETRY:
1742             v->driver_shader = pipe->create_gs_state(pipe, &tgsi);
1743             break;
1744          case MESA_SHADER_COMPUTE: {
1745             struct pipe_compute_state cs = {0};
1746             cs.ir_type = tgsi.type;
1747             cs.req_local_mem = prog->Base.info.cs.shared_size;
1748
1749             if (tgsi.type == PIPE_SHADER_IR_NIR)
1750                cs.prog = tgsi.ir.nir;
1751             else
1752                cs.prog = tgsi.tokens;
1753
1754             v->driver_shader = pipe->create_compute_state(pipe, &cs);
1755             break;
1756          }
1757          default:
1758             assert(!"unhandled shader type");
1759             free(v);
1760             return NULL;
1761          }
1762
1763          v->key = *key;
1764
1765          /* insert into list */
1766          v->next = prog->variants;
1767          prog->variants = v;
1768       }
1769    }
1770
1771    return v;
1772 }
1773
1774
1775 /**
1776  * Vert/Geom/Frag programs have per-context variants.  Free all the
1777  * variants attached to the given program which match the given context.
1778  */
1779 static void
1780 destroy_program_variants(struct st_context *st, struct gl_program *target)
1781 {
1782    if (!target || target == &_mesa_DummyProgram)
1783       return;
1784
1785    switch (target->Target) {
1786    case GL_VERTEX_PROGRAM_ARB:
1787       {
1788          struct st_vertex_program *stvp = (struct st_vertex_program *) target;
1789          struct st_vp_variant *vpv, **prevPtr = &stvp->variants;
1790
1791          for (vpv = stvp->variants; vpv; ) {
1792             struct st_vp_variant *next = vpv->next;
1793             if (vpv->key.st == st) {
1794                /* unlink from list */
1795                *prevPtr = next;
1796                /* destroy this variant */
1797                delete_vp_variant(st, vpv);
1798             }
1799             else {
1800                prevPtr = &vpv->next;
1801             }
1802             vpv = next;
1803          }
1804       }
1805       break;
1806    case GL_FRAGMENT_PROGRAM_ARB:
1807       {
1808          struct st_fragment_program *stfp =
1809             (struct st_fragment_program *) target;
1810          struct st_fp_variant *fpv, **prevPtr = &stfp->variants;
1811
1812          for (fpv = stfp->variants; fpv; ) {
1813             struct st_fp_variant *next = fpv->next;
1814             if (fpv->key.st == st) {
1815                /* unlink from list */
1816                *prevPtr = next;
1817                /* destroy this variant */
1818                delete_fp_variant(st, fpv);
1819             }
1820             else {
1821                prevPtr = &fpv->next;
1822             }
1823             fpv = next;
1824          }
1825       }
1826       break;
1827    case GL_GEOMETRY_PROGRAM_NV:
1828    case GL_TESS_CONTROL_PROGRAM_NV:
1829    case GL_TESS_EVALUATION_PROGRAM_NV:
1830    case GL_COMPUTE_PROGRAM_NV:
1831       {
1832          struct st_common_program *p = st_common_program(target);
1833          struct st_basic_variant *v, **prevPtr = &p->variants;
1834
1835          for (v = p->variants; v; ) {
1836             struct st_basic_variant *next = v->next;
1837             if (v->key.st == st) {
1838                /* unlink from list */
1839                *prevPtr = next;
1840                /* destroy this variant */
1841                delete_basic_variant(st, v, target->Target);
1842             }
1843             else {
1844                prevPtr = &v->next;
1845             }
1846             v = next;
1847          }
1848       }
1849       break;
1850    default:
1851       _mesa_problem(NULL, "Unexpected program target 0x%x in "
1852                     "destroy_program_variants_cb()", target->Target);
1853    }
1854 }
1855
1856
1857 /**
1858  * Callback for _mesa_HashWalk.  Free all the shader's program variants
1859  * which match the given context.
1860  */
1861 static void
1862 destroy_shader_program_variants_cb(GLuint key, void *data, void *userData)
1863 {
1864    struct st_context *st = (struct st_context *) userData;
1865    struct gl_shader *shader = (struct gl_shader *) data;
1866
1867    switch (shader->Type) {
1868    case GL_SHADER_PROGRAM_MESA:
1869       {
1870          struct gl_shader_program *shProg = (struct gl_shader_program *) data;
1871          GLuint i;
1872
1873          for (i = 0; i < ARRAY_SIZE(shProg->_LinkedShaders); i++) {
1874             if (shProg->_LinkedShaders[i])
1875                destroy_program_variants(st, shProg->_LinkedShaders[i]->Program);
1876          }
1877       }
1878       break;
1879    case GL_VERTEX_SHADER:
1880    case GL_FRAGMENT_SHADER:
1881    case GL_GEOMETRY_SHADER:
1882    case GL_TESS_CONTROL_SHADER:
1883    case GL_TESS_EVALUATION_SHADER:
1884    case GL_COMPUTE_SHADER:
1885       break;
1886    default:
1887       assert(0);
1888    }
1889 }
1890
1891
1892 /**
1893  * Callback for _mesa_HashWalk.  Free all the program variants which match
1894  * the given context.
1895  */
1896 static void
1897 destroy_program_variants_cb(GLuint key, void *data, void *userData)
1898 {
1899    struct st_context *st = (struct st_context *) userData;
1900    struct gl_program *program = (struct gl_program *) data;
1901    destroy_program_variants(st, program);
1902 }
1903
1904
1905 /**
1906  * Walk over all shaders and programs to delete any variants which
1907  * belong to the given context.
1908  * This is called during context tear-down.
1909  */
1910 void
1911 st_destroy_program_variants(struct st_context *st)
1912 {
1913    /* If shaders can be shared with other contexts, the last context will
1914     * call DeleteProgram on all shaders, releasing everything.
1915     */
1916    if (st->has_shareable_shaders)
1917       return;
1918
1919    /* ARB vert/frag program */
1920    _mesa_HashWalk(st->ctx->Shared->Programs,
1921                   destroy_program_variants_cb, st);
1922
1923    /* GLSL vert/frag/geom shaders */
1924    _mesa_HashWalk(st->ctx->Shared->ShaderObjects,
1925                   destroy_shader_program_variants_cb, st);
1926 }
1927
1928
1929 /**
1930  * For debugging, print/dump the current vertex program.
1931  */
1932 void
1933 st_print_current_vertex_program(void)
1934 {
1935    GET_CURRENT_CONTEXT(ctx);
1936
1937    if (ctx->VertexProgram._Current) {
1938       struct st_vertex_program *stvp =
1939          (struct st_vertex_program *) ctx->VertexProgram._Current;
1940       struct st_vp_variant *stv;
1941
1942       debug_printf("Vertex program %u\n", stvp->Base.Id);
1943
1944       for (stv = stvp->variants; stv; stv = stv->next) {
1945          debug_printf("variant %p\n", stv);
1946          tgsi_dump(stv->tgsi.tokens, 0);
1947       }
1948    }
1949 }
1950
1951
1952 /**
1953  * Compile one shader variant.
1954  */
1955 void
1956 st_precompile_shader_variant(struct st_context *st,
1957                              struct gl_program *prog)
1958 {
1959    switch (prog->Target) {
1960    case GL_VERTEX_PROGRAM_ARB: {
1961       struct st_vertex_program *p = (struct st_vertex_program *)prog;
1962       struct st_vp_variant_key key;
1963
1964       memset(&key, 0, sizeof(key));
1965
1966       key.st = st->has_shareable_shaders ? NULL : st;
1967       st_get_vp_variant(st, p, &key);
1968       break;
1969    }
1970
1971    case GL_FRAGMENT_PROGRAM_ARB: {
1972       struct st_fragment_program *p = (struct st_fragment_program *)prog;
1973       struct st_fp_variant_key key;
1974
1975       memset(&key, 0, sizeof(key));
1976
1977       key.st = st->has_shareable_shaders ? NULL : st;
1978       st_get_fp_variant(st, p, &key);
1979       break;
1980    }
1981
1982    case GL_TESS_CONTROL_PROGRAM_NV:
1983    case GL_TESS_EVALUATION_PROGRAM_NV:
1984    case GL_GEOMETRY_PROGRAM_NV:
1985    case GL_COMPUTE_PROGRAM_NV: {
1986       struct st_common_program *p = st_common_program(prog);
1987       struct st_basic_variant_key key;
1988
1989       memset(&key, 0, sizeof(key));
1990
1991       key.st = st->has_shareable_shaders ? NULL : st;
1992       st_get_basic_variant(st, p, &key);
1993       break;
1994    }
1995
1996    default:
1997       assert(0);
1998    }
1999 }