src/mesa/state_tracker/st_program.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   *   Brian Paul
  31   */
  32
  33
  34 #include "main/errors.h"
  35 #include "main/imports.h"
  36 #include "main/hash.h"
  37 #include "main/mtypes.h"
  38 #include "program/prog_parameter.h"
  39 #include "program/prog_print.h"
  40 #include "program/prog_to_nir.h"
  41 #include "program/programopt.h"
  42
  43 #include "compiler/nir/nir.h"
  44
  45 #include "pipe/p_context.h"
  46 #include "pipe/p_defines.h"
  47 #include "pipe/p_shader_tokens.h"
  48 #include "draw/draw_context.h"
  49 #include "tgsi/tgsi_dump.h"
  50 #include "tgsi/tgsi_emulate.h"
  51 #include "tgsi/tgsi_parse.h"
  52 #include "tgsi/tgsi_ureg.h"
  53
  54 #include "st_debug.h"
  55 #include "st_cb_bitmap.h"
  56 #include "st_cb_drawpixels.h"
  57 #include "st_context.h"
  58 #include "st_tgsi_lower_depth_clamp.h"
  59 #include "st_tgsi_lower_yuv.h"
  60 #include "st_program.h"
  61 #include "st_mesa_to_tgsi.h"
  62 #include "st_atifs_to_tgsi.h"
  63 #include "st_nir.h"
  64 #include "st_shader_cache.h"
  65 #include "cso_cache/cso_context.h"
  66
  67
  68
  69 static void
  70 set_affected_state_flags(uint64_t *states,
  71                          struct gl_program *prog,
  72                          uint64_t new_constants,
  73                          uint64_t new_sampler_views,
  74                          uint64_t new_samplers,
  75                          uint64_t new_images,
  76                          uint64_t new_ubos,
  77                          uint64_t new_ssbos,
  78                          uint64_t new_atomics)
  79 {
  80    if (prog->Parameters->NumParameters)
  81       *states |= new_constants;
  82
  83    if (prog->info.num_textures)
  84       *states |= new_sampler_views | new_samplers;
  85
  86    if (prog->info.num_images)
  87       *states |= new_images;
  88
  89    if (prog->info.num_ubos)
  90       *states |= new_ubos;
  91
  92    if (prog->info.num_ssbos)
  93       *states |= new_ssbos;
  94
  95    if (prog->info.num_abos)
  96       *states |= new_atomics;
  97 }
  98
  99 /**
 100  * This determines which states will be updated when the shader is bound.
 101  */
 102 void
 103 st_set_prog_affected_state_flags(struct gl_program *prog)
 104 {
 105    uint64_t *states;
 106
 107    switch (prog->info.stage) {
 108    case MESA_SHADER_VERTEX:
 109       states = &((struct st_vertex_program*)prog)->affected_states;
 110
 111       *states = ST_NEW_VS_STATE |
 112                 ST_NEW_RASTERIZER |
 113                 ST_NEW_VERTEX_ARRAYS;
 114
 115       set_affected_state_flags(states, prog,
 116                                ST_NEW_VS_CONSTANTS,
 117                                ST_NEW_VS_SAMPLER_VIEWS,
 118                                ST_NEW_VS_SAMPLERS,
 119                                ST_NEW_VS_IMAGES,
 120                                ST_NEW_VS_UBOS,
 121                                ST_NEW_VS_SSBOS,
 122                                ST_NEW_VS_ATOMICS);
 123       break;
 124
 125    case MESA_SHADER_TESS_CTRL:
 126       states = &(st_common_program(prog))->affected_states;
 127
 128       *states = ST_NEW_TCS_STATE;
 129
 130       set_affected_state_flags(states, prog,
 131                                ST_NEW_TCS_CONSTANTS,
 132                                ST_NEW_TCS_SAMPLER_VIEWS,
 133                                ST_NEW_TCS_SAMPLERS,
 134                                ST_NEW_TCS_IMAGES,
 135                                ST_NEW_TCS_UBOS,
 136                                ST_NEW_TCS_SSBOS,
 137                                ST_NEW_TCS_ATOMICS);
 138       break;
 139
 140    case MESA_SHADER_TESS_EVAL:
 141       states = &(st_common_program(prog))->affected_states;
 142
 143       *states = ST_NEW_TES_STATE |
 144                 ST_NEW_RASTERIZER;
 145
 146       set_affected_state_flags(states, prog,
 147                                ST_NEW_TES_CONSTANTS,
 148                                ST_NEW_TES_SAMPLER_VIEWS,
 149                                ST_NEW_TES_SAMPLERS,
 150                                ST_NEW_TES_IMAGES,
 151                                ST_NEW_TES_UBOS,
 152                                ST_NEW_TES_SSBOS,
 153                                ST_NEW_TES_ATOMICS);
 154       break;
 155
 156    case MESA_SHADER_GEOMETRY:
 157       states = &(st_common_program(prog))->affected_states;
 158
 159       *states = ST_NEW_GS_STATE |
 160                 ST_NEW_RASTERIZER;
 161
 162       set_affected_state_flags(states, prog,
 163                                ST_NEW_GS_CONSTANTS,
 164                                ST_NEW_GS_SAMPLER_VIEWS,
 165                                ST_NEW_GS_SAMPLERS,
 166                                ST_NEW_GS_IMAGES,
 167                                ST_NEW_GS_UBOS,
 168                                ST_NEW_GS_SSBOS,
 169                                ST_NEW_GS_ATOMICS);
 170       break;
 171
 172    case MESA_SHADER_FRAGMENT:
 173       states = &((struct st_common_program*)prog)->affected_states;
 174
 175       /* gl_FragCoord and glDrawPixels always use constants. */
 176       *states = ST_NEW_FS_STATE |
 177                 ST_NEW_SAMPLE_SHADING |
 178                 ST_NEW_FS_CONSTANTS;
 179
 180       set_affected_state_flags(states, prog,
 181                                ST_NEW_FS_CONSTANTS,
 182                                ST_NEW_FS_SAMPLER_VIEWS,
 183                                ST_NEW_FS_SAMPLERS,
 184                                ST_NEW_FS_IMAGES,
 185                                ST_NEW_FS_UBOS,
 186                                ST_NEW_FS_SSBOS,
 187                                ST_NEW_FS_ATOMICS);
 188       break;
 189
 190    case MESA_SHADER_COMPUTE:
 191       states = &((struct st_common_program*)prog)->affected_states;
 192
 193       *states = ST_NEW_CS_STATE;
 194
 195       set_affected_state_flags(states, prog,
 196                                ST_NEW_CS_CONSTANTS,
 197                                ST_NEW_CS_SAMPLER_VIEWS,
 198                                ST_NEW_CS_SAMPLERS,
 199                                ST_NEW_CS_IMAGES,
 200                                ST_NEW_CS_UBOS,
 201                                ST_NEW_CS_SSBOS,
 202                                ST_NEW_CS_ATOMICS);
 203       break;
 204
 205    default:
 206       unreachable("unhandled shader stage");
 207    }
 208 }
 209
 210 static void
 211 delete_ir(struct pipe_shader_state *ir)
 212 {
 213    if (ir->tokens) {
 214       ureg_free_tokens(ir->tokens);
 215       ir->tokens = NULL;
 216    }
 217
 218    /* Note: Any setup of ->ir.nir that has had pipe->create_*_state called on
 219     * it has resulted in the driver taking ownership of the NIR.  Those
 220     * callers should be NULLing out the nir field in any pipe_shader_state
 221     * that might have this called in order to indicate that.
 222     *
 223     * GLSL IR and ARB programs will have set gl_program->nir to the same
 224     * shader as ir->ir.nir, so it will be freed by _mesa_delete_program().
 225     */
 226 }
 227
 228 /**
 229  * Delete a vertex program variant.  Note the caller must unlink
 230  * the variant from the linked list.
 231  */
 232 static void
 233 delete_vp_variant(struct st_context *st, struct st_vp_variant *vpv)
 234 {
 235    if (vpv->driver_shader) {
 236       if (st->has_shareable_shaders || vpv->key.st == st) {
 237          cso_delete_vertex_shader(st->cso_context, vpv->driver_shader);
 238       } else {
 239          st_save_zombie_shader(vpv->key.st, PIPE_SHADER_VERTEX,
 240                                vpv->driver_shader);
 241       }
 242    }
 243
 244    if (vpv->draw_shader)
 245       draw_delete_vertex_shader( st->draw, vpv->draw_shader );
 246
 247    if (vpv->tokens)
 248       ureg_free_tokens(vpv->tokens);
 249
 250    free( vpv );
 251 }
 252
 253
 254
 255 /**
 256  * Clean out any old compilations:
 257  */
 258 void
 259 st_release_vp_variants( struct st_context *st,
 260                         struct st_vertex_program *stvp )
 261 {
 262    struct st_vp_variant *vpv;
 263
 264    for (vpv = stvp->variants; vpv; ) {
 265       struct st_vp_variant *next = vpv->next;
 266       delete_vp_variant(st, vpv);
 267       vpv = next;
 268    }
 269
 270    stvp->variants = NULL;
 271
 272    delete_ir(&stvp->state);
 273 }
 274
 275
 276
 277 /**
 278  * Delete a fragment program variant.  Note the caller must unlink
 279  * the variant from the linked list.
 280  */
 281 static void
 282 delete_fp_variant(struct st_context *st, struct st_fp_variant *fpv)
 283 {
 284    if (fpv->driver_shader) {
 285       if (st->has_shareable_shaders || fpv->key.st == st) {
 286          cso_delete_fragment_shader(st->cso_context, fpv->driver_shader);
 287       } else {
 288          st_save_zombie_shader(fpv->key.st, PIPE_SHADER_FRAGMENT,
 289                                fpv->driver_shader);
 290       }
 291    }
 292
 293    free(fpv);
 294 }
 295
 296
 297 /**
 298  * Free all variants of a fragment program.
 299  */
 300 void
 301 st_release_fp_variants(struct st_context *st, struct st_common_program *stfp)
 302 {
 303    struct st_fp_variant *fpv;
 304
 305    for (fpv = stfp->fp_variants; fpv; ) {
 306       struct st_fp_variant *next = fpv->next;
 307       delete_fp_variant(st, fpv);
 308       fpv = next;
 309    }
 310
 311    stfp->fp_variants = NULL;
 312
 313    delete_ir(&stfp->state);
 314 }
 315
 316
 317 /**
 318  * Delete a basic program variant.  Note the caller must unlink
 319  * the variant from the linked list.
 320  */
 321 static void
 322 delete_basic_variant(struct st_context *st, struct st_common_variant *v,
 323                      GLenum target)
 324 {
 325    if (v->driver_shader) {
 326       if (st->has_shareable_shaders || v->key.st == st) {
 327          /* The shader's context matches the calling context, or we
 328           * don't care.
 329           */
 330          switch (target) {
 331          case GL_TESS_CONTROL_PROGRAM_NV:
 332             cso_delete_tessctrl_shader(st->cso_context, v->driver_shader);
 333             break;
 334          case GL_TESS_EVALUATION_PROGRAM_NV:
 335             cso_delete_tesseval_shader(st->cso_context, v->driver_shader);
 336             break;
 337          case GL_GEOMETRY_PROGRAM_NV:
 338             cso_delete_geometry_shader(st->cso_context, v->driver_shader);
 339             break;
 340          case GL_COMPUTE_PROGRAM_NV:
 341             cso_delete_compute_shader(st->cso_context, v->driver_shader);
 342             break;
 343          default:
 344             unreachable("bad shader type in delete_basic_variant");
 345          }
 346       } else {
 347          /* We can't delete a shader with a context different from the one
 348           * that created it.  Add it to the creating context's zombie list.
 349           */
 350          enum pipe_shader_type type;
 351          switch (target) {
 352          case GL_TESS_CONTROL_PROGRAM_NV:
 353             type = PIPE_SHADER_TESS_CTRL;
 354             break;
 355          case GL_TESS_EVALUATION_PROGRAM_NV:
 356             type = PIPE_SHADER_TESS_EVAL;
 357             break;
 358          case GL_GEOMETRY_PROGRAM_NV:
 359             type = PIPE_SHADER_GEOMETRY;
 360             break;
 361          default:
 362             unreachable("");
 363          }
 364          st_save_zombie_shader(v->key.st, type, v->driver_shader);
 365       }
 366    }
 367
 368    free(v);
 369 }
 370
 371
 372 /**
 373  * Free all basic program variants.
 374  */
 375 void
 376 st_release_common_variants(struct st_context *st, struct st_common_program *p)
 377 {
 378    struct st_common_variant *v;
 379
 380    for (v = p->variants; v; ) {
 381       struct st_common_variant *next = v->next;
 382       delete_basic_variant(st, v, p->Base.Target);
 383       v = next;
 384    }
 385
 386    p->variants = NULL;
 387    delete_ir(&p->state);
 388 }
 389
 390 void
 391 st_finalize_nir_before_variants(struct nir_shader *nir)
 392 {
 393    NIR_PASS_V(nir, nir_split_var_copies);
 394    NIR_PASS_V(nir, nir_lower_var_copies);
 395    if (nir->options->lower_all_io_to_temps ||
 396        nir->options->lower_all_io_to_elements ||
 397        nir->info.stage == MESA_SHADER_VERTEX ||
 398        nir->info.stage == MESA_SHADER_GEOMETRY) {
 399       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
 400    } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
 401       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
 402    }
 403
 404    st_nir_assign_vs_in_locations(nir);
 405 }
 406
 407 /**
 408  * Translate ARB (asm) program to NIR
 409  */
 410 static nir_shader *
 411 st_translate_prog_to_nir(struct st_context *st, struct gl_program *prog,
 412                          gl_shader_stage stage)
 413 {
 414    const struct gl_shader_compiler_options *options =
 415       &st->ctx->Const.ShaderCompilerOptions[stage];
 416
 417    /* Translate to NIR */
 418    nir_shader *nir = prog_to_nir(prog, options->NirOptions);
 419    NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
 420    nir_validate_shader(nir, "after st/ptn lower_regs_to_ssa");
 421
 422    NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, prog, st->pipe->screen);
 423    NIR_PASS_V(nir, nir_lower_system_values);
 424
 425    /* Optimise NIR */
 426    NIR_PASS_V(nir, nir_opt_constant_folding);
 427    st_nir_opts(nir);
 428    st_finalize_nir_before_variants(nir);
 429    nir_validate_shader(nir, "after st/ptn NIR opts");
 430
 431    return nir;
 432 }
 433
 434 void
 435 st_prepare_vertex_program(struct st_vertex_program *stvp)
 436 {
 437    stvp->num_inputs = 0;
 438    memset(stvp->input_to_index, ~0, sizeof(stvp->input_to_index));
 439    memset(stvp->result_to_output, ~0, sizeof(stvp->result_to_output));
 440
 441    /* Determine number of inputs, the mappings between VERT_ATTRIB_x
 442     * and TGSI generic input indexes, plus input attrib semantic info.
 443     */
 444    for (unsigned attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
 445       if ((stvp->Base.info.inputs_read & BITFIELD64_BIT(attr)) != 0) {
 446          stvp->input_to_index[attr] = stvp->num_inputs;
 447          stvp->index_to_input[stvp->num_inputs] = attr;
 448          stvp->num_inputs++;
 449
 450          if ((stvp->Base.DualSlotInputs & BITFIELD64_BIT(attr)) != 0) {
 451             /* add placeholder for second part of a double attribute */
 452             stvp->index_to_input[stvp->num_inputs] = ST_DOUBLE_ATTRIB_PLACEHOLDER;
 453             stvp->num_inputs++;
 454          }
 455       }
 456    }
 457    /* pre-setup potentially unused edgeflag input */
 458    stvp->input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
 459    stvp->index_to_input[stvp->num_inputs] = VERT_ATTRIB_EDGEFLAG;
 460
 461    /* Compute mapping of vertex program outputs to slots. */
 462    unsigned num_outputs = 0;
 463    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 464       if (stvp->Base.info.outputs_written & BITFIELD64_BIT(attr))
 465          stvp->result_to_output[attr] = num_outputs++;
 466    }
 467    /* pre-setup potentially unused edgeflag output */
 468    stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs;
 469 }
 470
 471 void
 472 st_translate_stream_output_info(struct gl_program *prog)
 473 {
 474    struct gl_transform_feedback_info *info = prog->sh.LinkedTransformFeedback;
 475    if (!info)
 476       return;
 477
 478    /* Determine the (default) output register mapping for each output. */
 479    unsigned num_outputs = 0;
 480    ubyte output_mapping[VARYING_SLOT_TESS_MAX];
 481    memset(output_mapping, 0, sizeof(output_mapping));
 482
 483    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 484       if (prog->info.outputs_written & BITFIELD64_BIT(attr))
 485          output_mapping[attr] = num_outputs++;
 486    }
 487
 488    /* Translate stream output info. */
 489    struct pipe_stream_output_info *so_info = NULL;
 490    if (prog->info.stage == MESA_SHADER_VERTEX)
 491       so_info = &((struct st_vertex_program*)prog)->state.stream_output;
 492    else
 493       so_info = &((struct st_common_program*)prog)->state.stream_output;
 494
 495    for (unsigned i = 0; i < info->NumOutputs; i++) {
 496       so_info->output[i].register_index =
 497          output_mapping[info->Outputs[i].OutputRegister];
 498       so_info->output[i].start_component = info->Outputs[i].ComponentOffset;
 499       so_info->output[i].num_components = info->Outputs[i].NumComponents;
 500       so_info->output[i].output_buffer = info->Outputs[i].OutputBuffer;
 501       so_info->output[i].dst_offset = info->Outputs[i].DstOffset;
 502       so_info->output[i].stream = info->Outputs[i].StreamId;
 503    }
 504
 505    for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
 506       so_info->stride[i] = info->Buffers[i].Stride;
 507    }
 508    so_info->num_outputs = info->NumOutputs;
 509 }
 510
 511 /**
 512  * Translate a vertex program.
 513  */
 514 bool
 515 st_translate_vertex_program(struct st_context *st,
 516                             struct st_vertex_program *stvp)
 517 {
 518    struct ureg_program *ureg;
 519    enum pipe_error error;
 520    unsigned num_outputs = 0;
 521    unsigned attr;
 522    ubyte output_semantic_name[VARYING_SLOT_MAX] = {0};
 523    ubyte output_semantic_index[VARYING_SLOT_MAX] = {0};
 524
 525    if (stvp->Base.arb.IsPositionInvariant)
 526       _mesa_insert_mvp_code(st->ctx, &stvp->Base);
 527
 528    st_prepare_vertex_program(stvp);
 529
 530    /* ARB_vp: */
 531    if (!stvp->glsl_to_tgsi) {
 532       _mesa_remove_output_reads(&stvp->Base, PROGRAM_OUTPUT);
 533
 534       /* This determines which states will be updated when the assembly
 535        * shader is bound.
 536        */
 537       stvp->affected_states = ST_NEW_VS_STATE |
 538                               ST_NEW_RASTERIZER |
 539                               ST_NEW_VERTEX_ARRAYS;
 540
 541       if (stvp->Base.Parameters->NumParameters)
 542          stvp->affected_states |= ST_NEW_VS_CONSTANTS;
 543
 544       /* No samplers are allowed in ARB_vp. */
 545    }
 546
 547    /* Get semantic names and indices. */
 548    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 549       if (stvp->Base.info.outputs_written & BITFIELD64_BIT(attr)) {
 550          unsigned slot = num_outputs++;
 551          unsigned semantic_name, semantic_index;
 552          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
 553                                       &semantic_name, &semantic_index);
 554          output_semantic_name[slot] = semantic_name;
 555          output_semantic_index[slot] = semantic_index;
 556       }
 557    }
 558    /* pre-setup potentially unused edgeflag output */
 559    output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
 560    output_semantic_index[num_outputs] = 0;
 561
 562    ureg = ureg_create_with_screen(PIPE_SHADER_VERTEX, st->pipe->screen);
 563    if (ureg == NULL)
 564       return false;
 565
 566    if (stvp->Base.info.clip_distance_array_size)
 567       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
 568                     stvp->Base.info.clip_distance_array_size);
 569    if (stvp->Base.info.cull_distance_array_size)
 570       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
 571                     stvp->Base.info.cull_distance_array_size);
 572
 573    if (ST_DEBUG & DEBUG_MESA) {
 574       _mesa_print_program(&stvp->Base);
 575       _mesa_print_program_parameters(st->ctx, &stvp->Base);
 576       debug_printf("\n");
 577    }
 578
 579    if (stvp->glsl_to_tgsi) {
 580       error = st_translate_program(st->ctx,
 581                                    PIPE_SHADER_VERTEX,
 582                                    ureg,
 583                                    stvp->glsl_to_tgsi,
 584                                    &stvp->Base,
 585                                    /* inputs */
 586                                    stvp->num_inputs,
 587                                    stvp->input_to_index,
 588                                    NULL, /* inputSlotToAttr */
 589                                    NULL, /* input semantic name */
 590                                    NULL, /* input semantic index */
 591                                    NULL, /* interp mode */
 592                                    /* outputs */
 593                                    num_outputs,
 594                                    stvp->result_to_output,
 595                                    output_semantic_name,
 596                                    output_semantic_index);
 597
 598       st_translate_stream_output_info(&stvp->Base);
 599
 600       free_glsl_to_tgsi_visitor(stvp->glsl_to_tgsi);
 601    } else
 602       error = st_translate_mesa_program(st->ctx,
 603                                         PIPE_SHADER_VERTEX,
 604                                         ureg,
 605                                         &stvp->Base,
 606                                         /* inputs */
 607                                         stvp->num_inputs,
 608                                         stvp->input_to_index,
 609                                         NULL, /* input semantic name */
 610                                         NULL, /* input semantic index */
 611                                         NULL,
 612                                         /* outputs */
 613                                         num_outputs,
 614                                         stvp->result_to_output,
 615                                         output_semantic_name,
 616                                         output_semantic_index);
 617
 618    if (error) {
 619       debug_printf("%s: failed to translate Mesa program:\n", __func__);
 620       _mesa_print_program(&stvp->Base);
 621       debug_assert(0);
 622       return false;
 623    }
 624
 625    stvp->state.tokens = ureg_get_tokens(ureg, NULL);
 626    ureg_destroy(ureg);
 627
 628    if (stvp->glsl_to_tgsi) {
 629       stvp->glsl_to_tgsi = NULL;
 630       st_store_ir_in_disk_cache(st, &stvp->Base, false);
 631    }
 632
 633    /* Translate to NIR.
 634     *
 635     * This must be done after the translation to TGSI is done, because
 636     * we'll pass the NIR shader to the driver and the TGSI version to
 637     * the draw module for the select/feedback/rasterpos code.
 638     */
 639    if (st->pipe->screen->get_shader_param(st->pipe->screen,
 640                                           PIPE_SHADER_VERTEX,
 641                                           PIPE_SHADER_CAP_PREFERRED_IR)) {
 642       assert(!stvp->glsl_to_tgsi);
 643
 644       nir_shader *nir =
 645          st_translate_prog_to_nir(st, &stvp->Base, MESA_SHADER_VERTEX);
 646
 647       if (stvp->state.ir.nir)
 648          ralloc_free(stvp->state.ir.nir);
 649       stvp->state.type = PIPE_SHADER_IR_NIR;
 650       stvp->state.ir.nir = nir;
 651       stvp->Base.nir = nir;
 652       return true;
 653    }
 654
 655    return stvp->state.tokens != NULL;
 656 }
 657
 658 static const gl_state_index16 depth_range_state[STATE_LENGTH] =
 659    { STATE_DEPTH_RANGE };
 660
 661 static struct st_vp_variant *
 662 st_create_vp_variant(struct st_context *st,
 663                      struct st_vertex_program *stvp,
 664                      const struct st_common_variant_key *key)
 665 {
 666    struct st_vp_variant *vpv = CALLOC_STRUCT(st_vp_variant);
 667    struct pipe_context *pipe = st->pipe;
 668    struct pipe_shader_state state = {0};
 669
 670    static const gl_state_index16 point_size_state[STATE_LENGTH] =
 671       { STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED, 0 };
 672    struct gl_program_parameter_list *params = stvp->Base.Parameters;
 673
 674    vpv->key = *key;
 675    vpv->num_inputs = stvp->num_inputs;
 676
 677    state.stream_output = stvp->state.stream_output;
 678
 679    if (stvp->state.type == PIPE_SHADER_IR_NIR) {
 680       state.type = PIPE_SHADER_IR_NIR;
 681       state.ir.nir = nir_shader_clone(NULL, stvp->state.ir.nir);
 682       if (key->clamp_color)
 683          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
 684       if (key->passthrough_edgeflags) {
 685          NIR_PASS_V(state.ir.nir, nir_lower_passthrough_edgeflags);
 686          vpv->num_inputs++;
 687       }
 688
 689       if (key->lower_point_size) {
 690          _mesa_add_state_reference(params, point_size_state);
 691          NIR_PASS_V(state.ir.nir, nir_lower_point_size_mov,
 692                     point_size_state);
 693       }
 694
 695       if (key->lower_ucp) {
 696          struct pipe_screen *screen = pipe->screen;
 697          bool can_compact = screen->get_param(screen,
 698                                               PIPE_CAP_NIR_COMPACT_ARRAYS);
 699
 700          bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;
 701          gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH];
 702          for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
 703             if (use_eye) {
 704                clipplane_state[i][0] = STATE_CLIPPLANE;
 705                clipplane_state[i][1] = i;
 706             } else {
 707                clipplane_state[i][0] = STATE_INTERNAL;
 708                clipplane_state[i][1] = STATE_CLIP_INTERNAL;
 709                clipplane_state[i][2] = i;
 710             }
 711             _mesa_add_state_reference(params, clipplane_state[i]);
 712          }
 713
 714          NIR_PASS_V(state.ir.nir, nir_lower_clip_vs, key->lower_ucp,
 715                     true, can_compact, clipplane_state);
 716          NIR_PASS_V(state.ir.nir, nir_lower_io_to_temporaries,
 717                     nir_shader_get_entrypoint(state.ir.nir), true, false);
 718       }
 719
 720       st_finalize_nir(st, &stvp->Base, stvp->shader_program,
 721                       state.ir.nir);
 722
 723       /* Some of the lowering above may have introduced new varyings */
 724       nir_shader_gather_info(state.ir.nir,
 725                              nir_shader_get_entrypoint(state.ir.nir));
 726
 727       vpv->driver_shader = pipe->create_vs_state(pipe, &state);
 728
 729       /* When generating a NIR program, we usually don't have TGSI tokens.
 730        * However, we do create them for ARB_vertex_program / fixed-function VS
 731        * programs which we may need to use with the draw module for legacy
 732        * feedback/select emulation.  If they exist, copy them.
 733        *
 734        * TODO: Lowering for shader variants is not applied to TGSI when
 735        * generating a NIR shader.
 736        */
 737       if (stvp->state.tokens)
 738          vpv->tokens = tgsi_dup_tokens(stvp->state.tokens);
 739
 740       return vpv;
 741    }
 742
 743    state.type = PIPE_SHADER_IR_TGSI;
 744    state.tokens = tgsi_dup_tokens(stvp->state.tokens);
 745
 746    /* Emulate features. */
 747    if (key->clamp_color || key->passthrough_edgeflags) {
 748       const struct tgsi_token *tokens;
 749       unsigned flags =
 750          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
 751          (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
 752
 753       tokens = tgsi_emulate(state.tokens, flags);
 754
 755       if (tokens) {
 756          tgsi_free_tokens(state.tokens);
 757          state.tokens = tokens;
 758
 759          if (key->passthrough_edgeflags)
 760             vpv->num_inputs++;
 761       } else
 762          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
 763    }
 764
 765    if (key->lower_depth_clamp) {
 766       unsigned depth_range_const =
 767             _mesa_add_state_reference(params, depth_range_state);
 768
 769       const struct tgsi_token *tokens;
 770       tokens = st_tgsi_lower_depth_clamp(state.tokens, depth_range_const,
 771                                          key->clip_negative_one_to_one);
 772       if (tokens != state.tokens)
 773          tgsi_free_tokens(state.tokens);
 774       state.tokens = tokens;
 775    }
 776
 777    if (ST_DEBUG & DEBUG_TGSI) {
 778       tgsi_dump(state.tokens, 0);
 779       debug_printf("\n");
 780    }
 781
 782    vpv->driver_shader = pipe->create_vs_state(pipe, &state);
 783    /* Save this for selection/feedback/rasterpos. */
 784    vpv->tokens = state.tokens;
 785    return vpv;
 786 }
 787
 788
 789 /**
 790  * Find/create a vertex program variant.
 791  */
 792 struct st_vp_variant *
 793 st_get_vp_variant(struct st_context *st,
 794                   struct st_vertex_program *stvp,
 795                   const struct st_common_variant_key *key)
 796 {
 797    struct st_vp_variant *vpv;
 798
 799    /* Search for existing variant */
 800    for (vpv = stvp->variants; vpv; vpv = vpv->next) {
 801       if (memcmp(&vpv->key, key, sizeof(*key)) == 0) {
 802          break;
 803       }
 804    }
 805
 806    if (!vpv) {
 807       /* create now */
 808       vpv = st_create_vp_variant(st, stvp, key);
 809       if (vpv) {
 810           for (unsigned index = 0; index < vpv->num_inputs; ++index) {
 811              unsigned attr = stvp->index_to_input[index];
 812              if (attr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
 813                 continue;
 814              vpv->vert_attrib_mask |= 1u << attr;
 815           }
 816
 817          /* insert into list */
 818          vpv->next = stvp->variants;
 819          stvp->variants = vpv;
 820       }
 821    }
 822
 823    return vpv;
 824 }
 825
 826
 827 /**
 828  * Translate a Mesa fragment shader into a TGSI shader.
 829  */
 830 bool
 831 st_translate_fragment_program(struct st_context *st,
 832                               struct st_common_program *stfp)
 833 {
 834    /* Non-GLSL programs: */
 835    if (!stfp->glsl_to_tgsi) {
 836       _mesa_remove_output_reads(&stfp->Base, PROGRAM_OUTPUT);
 837       if (st->ctx->Const.GLSLFragCoordIsSysVal)
 838          _mesa_program_fragment_position_to_sysval(&stfp->Base);
 839
 840       /* This determines which states will be updated when the assembly
 841        * shader is bound.
 842        *
 843        * fragment.position and glDrawPixels always use constants.
 844        */
 845       stfp->affected_states = ST_NEW_FS_STATE |
 846                               ST_NEW_SAMPLE_SHADING |
 847                               ST_NEW_FS_CONSTANTS;
 848
 849       if (stfp->ati_fs) {
 850          /* Just set them for ATI_fs unconditionally. */
 851          stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 852                                   ST_NEW_FS_SAMPLERS;
 853       } else {
 854          /* ARB_fp */
 855          if (stfp->Base.SamplersUsed)
 856             stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 857                                      ST_NEW_FS_SAMPLERS;
 858       }
 859
 860       /* Translate to NIR. */
 861       if (!stfp->ati_fs &&
 862           st->pipe->screen->get_shader_param(st->pipe->screen,
 863                                              PIPE_SHADER_FRAGMENT,
 864                                              PIPE_SHADER_CAP_PREFERRED_IR)) {
 865          nir_shader *nir =
 866             st_translate_prog_to_nir(st, &stfp->Base, MESA_SHADER_FRAGMENT);
 867
 868          if (stfp->state.ir.nir)
 869             ralloc_free(stfp->state.ir.nir);
 870          stfp->state.type = PIPE_SHADER_IR_NIR;
 871          stfp->state.ir.nir = nir;
 872          stfp->Base.nir = nir;
 873          return true;
 874       }
 875    }
 876
 877    ubyte outputMapping[2 * FRAG_RESULT_MAX];
 878    ubyte inputMapping[VARYING_SLOT_MAX];
 879    ubyte inputSlotToAttr[VARYING_SLOT_MAX];
 880    ubyte interpMode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
 881    GLuint attr;
 882    GLbitfield64 inputsRead;
 883    struct ureg_program *ureg;
 884
 885    GLboolean write_all = GL_FALSE;
 886
 887    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
 888    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
 889    uint fs_num_inputs = 0;
 890
 891    ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
 892    ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
 893    uint fs_num_outputs = 0;
 894
 895    memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
 896
 897    /*
 898     * Convert Mesa program inputs to TGSI input register semantics.
 899     */
 900    inputsRead = stfp->Base.info.inputs_read;
 901    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 902       if ((inputsRead & BITFIELD64_BIT(attr)) != 0) {
 903          const GLuint slot = fs_num_inputs++;
 904
 905          inputMapping[attr] = slot;
 906          inputSlotToAttr[slot] = attr;
 907
 908          switch (attr) {
 909          case VARYING_SLOT_POS:
 910             input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
 911             input_semantic_index[slot] = 0;
 912             interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 913             break;
 914          case VARYING_SLOT_COL0:
 915             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 916             input_semantic_index[slot] = 0;
 917             interpMode[slot] = stfp->glsl_to_tgsi ?
 918                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 919             break;
 920          case VARYING_SLOT_COL1:
 921             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 922             input_semantic_index[slot] = 1;
 923             interpMode[slot] = stfp->glsl_to_tgsi ?
 924                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 925             break;
 926          case VARYING_SLOT_FOGC:
 927             input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
 928             input_semantic_index[slot] = 0;
 929             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 930             break;
 931          case VARYING_SLOT_FACE:
 932             input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
 933             input_semantic_index[slot] = 0;
 934             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 935             break;
 936          case VARYING_SLOT_PRIMITIVE_ID:
 937             input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
 938             input_semantic_index[slot] = 0;
 939             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 940             break;
 941          case VARYING_SLOT_LAYER:
 942             input_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
 943             input_semantic_index[slot] = 0;
 944             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 945             break;
 946          case VARYING_SLOT_VIEWPORT:
 947             input_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
 948             input_semantic_index[slot] = 0;
 949             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 950             break;
 951          case VARYING_SLOT_CLIP_DIST0:
 952             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
 953             input_semantic_index[slot] = 0;
 954             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 955             break;
 956          case VARYING_SLOT_CLIP_DIST1:
 957             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
 958             input_semantic_index[slot] = 1;
 959             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 960             break;
 961          case VARYING_SLOT_CULL_DIST0:
 962          case VARYING_SLOT_CULL_DIST1:
 963             /* these should have been lowered by GLSL */
 964             assert(0);
 965             break;
 966             /* In most cases, there is nothing special about these
 967              * inputs, so adopt a convention to use the generic
 968              * semantic name and the mesa VARYING_SLOT_ number as the
 969              * index.
 970              *
 971              * All that is required is that the vertex shader labels
 972              * its own outputs similarly, and that the vertex shader
 973              * generates at least every output required by the
 974              * fragment shader plus fixed-function hardware (such as
 975              * BFC).
 976              *
 977              * However, some drivers may need us to identify the PNTC and TEXi
 978              * varyings if, for example, their capability to replace them with
 979              * sprite coordinates is limited.
 980              */
 981          case VARYING_SLOT_PNTC:
 982             if (st->needs_texcoord_semantic) {
 983                input_semantic_name[slot] = TGSI_SEMANTIC_PCOORD;
 984                input_semantic_index[slot] = 0;
 985                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 986                break;
 987             }
 988             /* fall through */
 989          case VARYING_SLOT_TEX0:
 990          case VARYING_SLOT_TEX1:
 991          case VARYING_SLOT_TEX2:
 992          case VARYING_SLOT_TEX3:
 993          case VARYING_SLOT_TEX4:
 994          case VARYING_SLOT_TEX5:
 995          case VARYING_SLOT_TEX6:
 996          case VARYING_SLOT_TEX7:
 997             if (st->needs_texcoord_semantic) {
 998                input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
 999                input_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
1000                interpMode[slot] = stfp->glsl_to_tgsi ?
1001                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
1002                break;
1003             }
1004             /* fall through */
1005          case VARYING_SLOT_VAR0:
1006          default:
1007             /* Semantic indices should be zero-based because drivers may choose
1008              * to assign a fixed slot determined by that index.
1009              * This is useful because ARB_separate_shader_objects uses location
1010              * qualifiers for linkage, and if the semantic index corresponds to
1011              * these locations, linkage passes in the driver become unecessary.
1012              *
1013              * If needs_texcoord_semantic is true, no semantic indices will be
1014              * consumed for the TEXi varyings, and we can base the locations of
1015              * the user varyings on VAR0.  Otherwise, we use TEX0 as base index.
1016              */
1017             assert(attr >= VARYING_SLOT_VAR0 || attr == VARYING_SLOT_PNTC ||
1018                    (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
1019             input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
1020             input_semantic_index[slot] = st_get_generic_varying_index(st, attr);
1021             if (attr == VARYING_SLOT_PNTC)
1022                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
1023             else {
1024                interpMode[slot] = stfp->glsl_to_tgsi ?
1025                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
1026             }
1027             break;
1028          }
1029       }
1030       else {
1031          inputMapping[attr] = -1;
1032       }
1033    }
1034
1035    /*
1036     * Semantics and mapping for outputs
1037     */
1038    GLbitfield64 outputsWritten = stfp->Base.info.outputs_written;
1039
1040    /* if z is written, emit that first */
1041    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
1042       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
1043       fs_output_semantic_index[fs_num_outputs] = 0;
1044       outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
1045       fs_num_outputs++;
1046       outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
1047    }
1048
1049    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
1050       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
1051       fs_output_semantic_index[fs_num_outputs] = 0;
1052       outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
1053       fs_num_outputs++;
1054       outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
1055    }
1056
1057    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
1058       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_SAMPLEMASK;
1059       fs_output_semantic_index[fs_num_outputs] = 0;
1060       outputMapping[FRAG_RESULT_SAMPLE_MASK] = fs_num_outputs;
1061       fs_num_outputs++;
1062       outputsWritten &= ~(1 << FRAG_RESULT_SAMPLE_MASK);
1063    }
1064
1065    /* handle remaining outputs (color) */
1066    for (attr = 0; attr < ARRAY_SIZE(outputMapping); attr++) {
1067       const GLbitfield64 written = attr < FRAG_RESULT_MAX ? outputsWritten :
1068          stfp->Base.SecondaryOutputsWritten;
1069       const unsigned loc = attr % FRAG_RESULT_MAX;
1070
1071       if (written & BITFIELD64_BIT(loc)) {
1072          switch (loc) {
1073          case FRAG_RESULT_DEPTH:
1074          case FRAG_RESULT_STENCIL:
1075          case FRAG_RESULT_SAMPLE_MASK:
1076             /* handled above */
1077             assert(0);
1078             break;
1079          case FRAG_RESULT_COLOR:
1080             write_all = GL_TRUE; /* fallthrough */
1081          default: {
1082             int index;
1083             assert(loc == FRAG_RESULT_COLOR ||
1084                    (FRAG_RESULT_DATA0 <= loc && loc < FRAG_RESULT_MAX));
1085
1086             index = (loc == FRAG_RESULT_COLOR) ? 0 : (loc - FRAG_RESULT_DATA0);
1087
1088             if (attr >= FRAG_RESULT_MAX) {
1089                /* Secondary color for dual source blending. */
1090                assert(index == 0);
1091                index++;
1092             }
1093
1094             fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
1095             fs_output_semantic_index[fs_num_outputs] = index;
1096             outputMapping[attr] = fs_num_outputs;
1097             break;
1098          }
1099          }
1100
1101          fs_num_outputs++;
1102       }
1103    }
1104
1105    ureg = ureg_create_with_screen(PIPE_SHADER_FRAGMENT, st->pipe->screen);
1106    if (ureg == NULL)
1107       return false;
1108
1109    if (ST_DEBUG & DEBUG_MESA) {
1110       _mesa_print_program(&stfp->Base);
1111       _mesa_print_program_parameters(st->ctx, &stfp->Base);
1112       debug_printf("\n");
1113    }
1114    if (write_all == GL_TRUE)
1115       ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1);
1116
1117    if (stfp->Base.info.fs.depth_layout != FRAG_DEPTH_LAYOUT_NONE) {
1118       switch (stfp->Base.info.fs.depth_layout) {
1119       case FRAG_DEPTH_LAYOUT_ANY:
1120          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1121                        TGSI_FS_DEPTH_LAYOUT_ANY);
1122          break;
1123       case FRAG_DEPTH_LAYOUT_GREATER:
1124          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1125                        TGSI_FS_DEPTH_LAYOUT_GREATER);
1126          break;
1127       case FRAG_DEPTH_LAYOUT_LESS:
1128          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1129                        TGSI_FS_DEPTH_LAYOUT_LESS);
1130          break;
1131       case FRAG_DEPTH_LAYOUT_UNCHANGED:
1132          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1133                        TGSI_FS_DEPTH_LAYOUT_UNCHANGED);
1134          break;
1135       default:
1136          assert(0);
1137       }
1138    }
1139
1140    if (stfp->glsl_to_tgsi) {
1141       st_translate_program(st->ctx,
1142                            PIPE_SHADER_FRAGMENT,
1143                            ureg,
1144                            stfp->glsl_to_tgsi,
1145                            &stfp->Base,
1146                            /* inputs */
1147                            fs_num_inputs,
1148                            inputMapping,
1149                            inputSlotToAttr,
1150                            input_semantic_name,
1151                            input_semantic_index,
1152                            interpMode,
1153                            /* outputs */
1154                            fs_num_outputs,
1155                            outputMapping,
1156                            fs_output_semantic_name,
1157                            fs_output_semantic_index);
1158
1159       free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
1160    } else if (stfp->ati_fs)
1161       st_translate_atifs_program(ureg,
1162                                  stfp->ati_fs,
1163                                  &stfp->Base,
1164                                  /* inputs */
1165                                  fs_num_inputs,
1166                                  inputMapping,
1167                                  input_semantic_name,
1168                                  input_semantic_index,
1169                                  interpMode,
1170                                  /* outputs */
1171                                  fs_num_outputs,
1172                                  outputMapping,
1173                                  fs_output_semantic_name,
1174                                  fs_output_semantic_index);
1175    else
1176       st_translate_mesa_program(st->ctx,
1177                                 PIPE_SHADER_FRAGMENT,
1178                                 ureg,
1179                                 &stfp->Base,
1180                                 /* inputs */
1181                                 fs_num_inputs,
1182                                 inputMapping,
1183                                 input_semantic_name,
1184                                 input_semantic_index,
1185                                 interpMode,
1186                                 /* outputs */
1187                                 fs_num_outputs,
1188                                 outputMapping,
1189                                 fs_output_semantic_name,
1190                                 fs_output_semantic_index);
1191
1192    stfp->state.tokens = ureg_get_tokens(ureg, NULL);
1193    ureg_destroy(ureg);
1194
1195    if (stfp->glsl_to_tgsi) {
1196       stfp->glsl_to_tgsi = NULL;
1197       st_store_ir_in_disk_cache(st, &stfp->Base, false);
1198    }
1199
1200    return stfp->state.tokens != NULL;
1201 }
1202
1203 static struct st_fp_variant *
1204 st_create_fp_variant(struct st_context *st,
1205                      struct st_common_program *stfp,
1206                      const struct st_fp_variant_key *key)
1207 {
1208    struct pipe_context *pipe = st->pipe;
1209    struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
1210    struct pipe_shader_state state = {0};
1211    struct gl_program_parameter_list *params = stfp->Base.Parameters;
1212    static const gl_state_index16 texcoord_state[STATE_LENGTH] =
1213       { STATE_INTERNAL, STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
1214    static const gl_state_index16 scale_state[STATE_LENGTH] =
1215       { STATE_INTERNAL, STATE_PT_SCALE };
1216    static const gl_state_index16 bias_state[STATE_LENGTH] =
1217       { STATE_INTERNAL, STATE_PT_BIAS };
1218    static const gl_state_index16 alpha_ref_state[STATE_LENGTH] =
1219       { STATE_INTERNAL, STATE_ALPHA_REF };
1220
1221    if (!variant)
1222       return NULL;
1223
1224    if (stfp->state.type == PIPE_SHADER_IR_NIR) {
1225       state.type = PIPE_SHADER_IR_NIR;
1226       state.ir.nir = nir_shader_clone(NULL, stfp->state.ir.nir);
1227
1228       if (key->clamp_color)
1229          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1230
1231       if (key->lower_flatshade)
1232          NIR_PASS_V(state.ir.nir, nir_lower_flatshade);
1233
1234       if (key->lower_alpha_func != COMPARE_FUNC_NEVER) {
1235          _mesa_add_state_reference(params, alpha_ref_state);
1236          NIR_PASS_V(state.ir.nir, nir_lower_alpha_test, key->lower_alpha_func,
1237                     false, alpha_ref_state);
1238       }
1239
1240       if (key->lower_two_sided_color)
1241          NIR_PASS_V(state.ir.nir, nir_lower_two_sided_color);
1242
1243       if (key->persample_shading) {
1244           nir_shader *shader = state.ir.nir;
1245           nir_foreach_variable(var, &shader->inputs)
1246              var->data.sample = true;
1247       }
1248
1249       assert(!(key->bitmap && key->drawpixels));
1250
1251       /* glBitmap */
1252       if (key->bitmap) {
1253          nir_lower_bitmap_options options = {0};
1254
1255          variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1256          options.sampler = variant->bitmap_sampler;
1257          options.swizzle_xxxx = st->bitmap.tex_format == PIPE_FORMAT_R8_UNORM;
1258
1259          NIR_PASS_V(state.ir.nir, nir_lower_bitmap, &options);
1260       }
1261
1262       /* glDrawPixels (color only) */
1263       if (key->drawpixels) {
1264          nir_lower_drawpixels_options options = {{0}};
1265          unsigned samplers_used = stfp->Base.SamplersUsed;
1266
1267          /* Find the first unused slot. */
1268          variant->drawpix_sampler = ffs(~samplers_used) - 1;
1269          options.drawpix_sampler = variant->drawpix_sampler;
1270          samplers_used |= (1 << variant->drawpix_sampler);
1271
1272          options.pixel_maps = key->pixelMaps;
1273          if (key->pixelMaps) {
1274             variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1275             options.pixelmap_sampler = variant->pixelmap_sampler;
1276          }
1277
1278          options.scale_and_bias = key->scaleAndBias;
1279          if (key->scaleAndBias) {
1280             _mesa_add_state_reference(params, scale_state);
1281             memcpy(options.scale_state_tokens, scale_state,
1282                    sizeof(options.scale_state_tokens));
1283             _mesa_add_state_reference(params, bias_state);
1284             memcpy(options.bias_state_tokens, bias_state,
1285                    sizeof(options.bias_state_tokens));
1286          }
1287
1288          _mesa_add_state_reference(params, texcoord_state);
1289          memcpy(options.texcoord_state_tokens, texcoord_state,
1290                 sizeof(options.texcoord_state_tokens));
1291
1292          NIR_PASS_V(state.ir.nir, nir_lower_drawpixels, &options);
1293       }
1294
1295       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1296                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1297                    key->external.lower_ayuv || key->external.lower_xyuv)) {
1298          nir_lower_tex_options options = {0};
1299          options.lower_y_uv_external = key->external.lower_nv12;
1300          options.lower_y_u_v_external = key->external.lower_iyuv;
1301          options.lower_xy_uxvx_external = key->external.lower_xy_uxvx;
1302          options.lower_yx_xuxv_external = key->external.lower_yx_xuxv;
1303          options.lower_ayuv_external = key->external.lower_ayuv;
1304          options.lower_xyuv_external = key->external.lower_xyuv;
1305          NIR_PASS_V(state.ir.nir, nir_lower_tex, &options);
1306       }
1307
1308       st_finalize_nir(st, &stfp->Base, stfp->shader_program, state.ir.nir);
1309
1310       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1311                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1312          /* This pass needs to happen *after* nir_lower_sampler */
1313          NIR_PASS_V(state.ir.nir, st_nir_lower_tex_src_plane,
1314                     ~stfp->Base.SamplersUsed,
1315                     key->external.lower_nv12 || key->external.lower_xy_uxvx ||
1316                        key->external.lower_yx_xuxv,
1317                     key->external.lower_iyuv);
1318       }
1319
1320       /* Some of the lowering above may have introduced new varyings */
1321       nir_shader_gather_info(state.ir.nir,
1322                              nir_shader_get_entrypoint(state.ir.nir));
1323
1324       variant->driver_shader = pipe->create_fs_state(pipe, &state);
1325       variant->key = *key;
1326
1327       return variant;
1328    }
1329
1330    state.tokens = stfp->state.tokens;
1331
1332    assert(!(key->bitmap && key->drawpixels));
1333
1334    /* Fix texture targets and add fog for ATI_fs */
1335    if (stfp->ati_fs) {
1336       const struct tgsi_token *tokens = st_fixup_atifs(state.tokens, key);
1337
1338       if (tokens)
1339          state.tokens = tokens;
1340       else
1341          fprintf(stderr, "mesa: cannot post-process ATI_fs\n");
1342    }
1343
1344    /* Emulate features. */
1345    if (key->clamp_color || key->persample_shading) {
1346       const struct tgsi_token *tokens;
1347       unsigned flags =
1348          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
1349          (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
1350
1351       tokens = tgsi_emulate(state.tokens, flags);
1352
1353       if (tokens) {
1354          if (state.tokens != stfp->state.tokens)
1355             tgsi_free_tokens(state.tokens);
1356          state.tokens = tokens;
1357       } else
1358          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
1359    }
1360
1361    /* glBitmap */
1362    if (key->bitmap) {
1363       const struct tgsi_token *tokens;
1364
1365       variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1366
1367       tokens = st_get_bitmap_shader(state.tokens,
1368                                     st->internal_target,
1369                                     variant->bitmap_sampler,
1370                                     st->needs_texcoord_semantic,
1371                                     st->bitmap.tex_format ==
1372                                     PIPE_FORMAT_R8_UNORM);
1373
1374       if (tokens) {
1375          if (state.tokens != stfp->state.tokens)
1376             tgsi_free_tokens(state.tokens);
1377          state.tokens = tokens;
1378       } else
1379          fprintf(stderr, "mesa: cannot create a shader for glBitmap\n");
1380    }
1381
1382    /* glDrawPixels (color only) */
1383    if (key->drawpixels) {
1384       const struct tgsi_token *tokens;
1385       unsigned scale_const = 0, bias_const = 0, texcoord_const = 0;
1386
1387       /* Find the first unused slot. */
1388       variant->drawpix_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1389
1390       if (key->pixelMaps) {
1391          unsigned samplers_used = stfp->Base.SamplersUsed |
1392                                   (1 << variant->drawpix_sampler);
1393
1394          variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1395       }
1396
1397       if (key->scaleAndBias) {
1398          scale_const = _mesa_add_state_reference(params, scale_state);
1399          bias_const = _mesa_add_state_reference(params, bias_state);
1400       }
1401
1402       texcoord_const = _mesa_add_state_reference(params, texcoord_state);
1403
1404       tokens = st_get_drawpix_shader(state.tokens,
1405                                      st->needs_texcoord_semantic,
1406                                      key->scaleAndBias, scale_const,
1407                                      bias_const, key->pixelMaps,
1408                                      variant->drawpix_sampler,
1409                                      variant->pixelmap_sampler,
1410                                      texcoord_const, st->internal_target);
1411
1412       if (tokens) {
1413          if (state.tokens != stfp->state.tokens)
1414             tgsi_free_tokens(state.tokens);
1415          state.tokens = tokens;
1416       } else
1417          fprintf(stderr, "mesa: cannot create a shader for glDrawPixels\n");
1418    }
1419
1420    if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1421                 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1422       const struct tgsi_token *tokens;
1423
1424       /* samplers inserted would conflict, but this should be unpossible: */
1425       assert(!(key->bitmap || key->drawpixels));
1426
1427       tokens = st_tgsi_lower_yuv(state.tokens,
1428                                  ~stfp->Base.SamplersUsed,
1429                                  key->external.lower_nv12 ||
1430                                     key->external.lower_xy_uxvx ||
1431                                     key->external.lower_yx_xuxv,
1432                                  key->external.lower_iyuv);
1433       if (tokens) {
1434          if (state.tokens != stfp->state.tokens)
1435             tgsi_free_tokens(state.tokens);
1436          state.tokens = tokens;
1437       } else {
1438          fprintf(stderr, "mesa: cannot create a shader for samplerExternalOES\n");
1439       }
1440    }
1441
1442    if (key->lower_depth_clamp) {
1443       unsigned depth_range_const = _mesa_add_state_reference(params, depth_range_state);
1444
1445       const struct tgsi_token *tokens;
1446       tokens = st_tgsi_lower_depth_clamp_fs(state.tokens, depth_range_const);
1447       if (state.tokens != stfp->state.tokens)
1448          tgsi_free_tokens(state.tokens);
1449       state.tokens = tokens;
1450    }
1451
1452    if (ST_DEBUG & DEBUG_TGSI) {
1453       tgsi_dump(state.tokens, 0);
1454       debug_printf("\n");
1455    }
1456
1457    /* fill in variant */
1458    variant->driver_shader = pipe->create_fs_state(pipe, &state);
1459    variant->key = *key;
1460
1461    if (state.tokens != stfp->state.tokens)
1462       tgsi_free_tokens(state.tokens);
1463    return variant;
1464 }
1465
1466 /**
1467  * Translate fragment program if needed.
1468  */
1469 struct st_fp_variant *
1470 st_get_fp_variant(struct st_context *st,
1471                   struct st_common_program *stfp,
1472                   const struct st_fp_variant_key *key)
1473 {
1474    struct st_fp_variant *fpv;
1475
1476    /* Search for existing variant */
1477    for (fpv = stfp->fp_variants; fpv; fpv = fpv->next) {
1478       if (memcmp(&fpv->key, key, sizeof(*key)) == 0) {
1479          break;
1480       }
1481    }
1482
1483    if (!fpv) {
1484       /* create new */
1485       fpv = st_create_fp_variant(st, stfp, key);
1486       if (fpv) {
1487          if (key->bitmap || key->drawpixels) {
1488             /* Regular variants should always come before the
1489              * bitmap & drawpixels variants, (unless there
1490              * are no regular variants) so that
1491              * st_update_fp can take a fast path when
1492              * shader_has_one_variant is set.
1493              */
1494             if (!stfp->fp_variants) {
1495                stfp->fp_variants = fpv;
1496             } else {
1497                /* insert into list after the first one */
1498                fpv->next = stfp->fp_variants->next;
1499                stfp->fp_variants->next = fpv;
1500             }
1501          } else {
1502             /* insert into list */
1503             fpv->next = stfp->fp_variants;
1504             stfp->fp_variants = fpv;
1505          }
1506       }
1507    }
1508
1509    return fpv;
1510 }
1511
1512 /**
1513  * Translate a program. This is common code for geometry and tessellation
1514  * shaders.
1515  */
1516 bool
1517 st_translate_common_program(struct st_context *st,
1518                             struct st_common_program *stcp)
1519 {
1520    struct gl_program *prog = &stcp->Base;
1521    enum pipe_shader_type stage =
1522       pipe_shader_type_from_mesa(stcp->Base.info.stage);
1523    struct ureg_program *ureg = ureg_create_with_screen(stage, st->pipe->screen);
1524
1525    if (ureg == NULL)
1526       return false;
1527
1528    switch (stage) {
1529    case PIPE_SHADER_TESS_CTRL:
1530       ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT,
1531                     stcp->Base.info.tess.tcs_vertices_out);
1532       break;
1533
1534    case PIPE_SHADER_TESS_EVAL:
1535       if (stcp->Base.info.tess.primitive_mode == GL_ISOLINES)
1536          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, GL_LINES);
1537       else
1538          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE,
1539                        stcp->Base.info.tess.primitive_mode);
1540
1541       STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL);
1542       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 ==
1543                     PIPE_TESS_SPACING_FRACTIONAL_ODD);
1544       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 ==
1545                     PIPE_TESS_SPACING_FRACTIONAL_EVEN);
1546
1547       ureg_property(ureg, TGSI_PROPERTY_TES_SPACING,
1548                     (stcp->Base.info.tess.spacing + 1) % 3);
1549
1550       ureg_property(ureg, TGSI_PROPERTY_TES_VERTEX_ORDER_CW,
1551                     !stcp->Base.info.tess.ccw);
1552       ureg_property(ureg, TGSI_PROPERTY_TES_POINT_MODE,
1553                     stcp->Base.info.tess.point_mode);
1554       break;
1555
1556    case PIPE_SHADER_GEOMETRY:
1557       ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM,
1558                     stcp->Base.info.gs.input_primitive);
1559       ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM,
1560                     stcp->Base.info.gs.output_primitive);
1561       ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
1562                     stcp->Base.info.gs.vertices_out);
1563       ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS,
1564                     stcp->Base.info.gs.invocations);
1565       break;
1566
1567    default:
1568       break;
1569    }
1570
1571    ubyte inputSlotToAttr[VARYING_SLOT_TESS_MAX];
1572    ubyte inputMapping[VARYING_SLOT_TESS_MAX];
1573    ubyte outputMapping[VARYING_SLOT_TESS_MAX];
1574    GLuint attr;
1575
1576    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
1577    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
1578    uint num_inputs = 0;
1579
1580    ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
1581    ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
1582    uint num_outputs = 0;
1583
1584    GLint i;
1585
1586    memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr));
1587    memset(inputMapping, 0, sizeof(inputMapping));
1588    memset(outputMapping, 0, sizeof(outputMapping));
1589    memset(&stcp->state, 0, sizeof(stcp->state));
1590
1591    if (prog->info.clip_distance_array_size)
1592       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
1593                     prog->info.clip_distance_array_size);
1594    if (prog->info.cull_distance_array_size)
1595       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
1596                     prog->info.cull_distance_array_size);
1597
1598    /*
1599     * Convert Mesa program inputs to TGSI input register semantics.
1600     */
1601    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1602       if ((prog->info.inputs_read & BITFIELD64_BIT(attr)) == 0)
1603          continue;
1604
1605       unsigned slot = num_inputs++;
1606
1607       inputMapping[attr] = slot;
1608       inputSlotToAttr[slot] = attr;
1609
1610       unsigned semantic_name, semantic_index;
1611       tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1612                                    &semantic_name, &semantic_index);
1613       input_semantic_name[slot] = semantic_name;
1614       input_semantic_index[slot] = semantic_index;
1615    }
1616
1617    /* Also add patch inputs. */
1618    for (attr = 0; attr < 32; attr++) {
1619       if (prog->info.patch_inputs_read & (1u << attr)) {
1620          GLuint slot = num_inputs++;
1621          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1622
1623          inputMapping[patch_attr] = slot;
1624          inputSlotToAttr[slot] = patch_attr;
1625          input_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1626          input_semantic_index[slot] = attr;
1627       }
1628    }
1629
1630    /* initialize output semantics to defaults */
1631    for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
1632       output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
1633       output_semantic_index[i] = 0;
1634    }
1635
1636    /*
1637     * Determine number of outputs, the (default) output register
1638     * mapping and the semantic information for each output.
1639     */
1640    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1641       if (prog->info.outputs_written & BITFIELD64_BIT(attr)) {
1642          GLuint slot = num_outputs++;
1643
1644          outputMapping[attr] = slot;
1645
1646          unsigned semantic_name, semantic_index;
1647          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1648                                       &semantic_name, &semantic_index);
1649          output_semantic_name[slot] = semantic_name;
1650          output_semantic_index[slot] = semantic_index;
1651       }
1652    }
1653
1654    /* Also add patch outputs. */
1655    for (attr = 0; attr < 32; attr++) {
1656       if (prog->info.patch_outputs_written & (1u << attr)) {
1657          GLuint slot = num_outputs++;
1658          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1659
1660          outputMapping[patch_attr] = slot;
1661          output_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1662          output_semantic_index[slot] = attr;
1663       }
1664    }
1665
1666    st_translate_program(st->ctx,
1667                         stage,
1668                         ureg,
1669                         stcp->glsl_to_tgsi,
1670                         prog,
1671                         /* inputs */
1672                         num_inputs,
1673                         inputMapping,
1674                         inputSlotToAttr,
1675                         input_semantic_name,
1676                         input_semantic_index,
1677                         NULL,
1678                         /* outputs */
1679                         num_outputs,
1680                         outputMapping,
1681                         output_semantic_name,
1682                         output_semantic_index);
1683
1684    stcp->state.tokens = ureg_get_tokens(ureg, NULL);
1685
1686    ureg_destroy(ureg);
1687
1688    st_translate_stream_output_info(prog);
1689
1690    st_store_ir_in_disk_cache(st, prog, false);
1691
1692    if ((ST_DEBUG & DEBUG_TGSI) && (ST_DEBUG & DEBUG_MESA)) {
1693       _mesa_print_program(prog);
1694       debug_printf("\n");
1695    }
1696
1697    if (ST_DEBUG & DEBUG_TGSI) {
1698       tgsi_dump(stcp->state.tokens, 0);
1699       debug_printf("\n");
1700    }
1701
1702    free_glsl_to_tgsi_visitor(stcp->glsl_to_tgsi);
1703    stcp->glsl_to_tgsi = NULL;
1704    return true;
1705 }
1706
1707
1708 /**
1709  * Get/create a basic program variant.
1710  */
1711 struct st_common_variant *
1712 st_get_common_variant(struct st_context *st,
1713                       struct st_common_program *prog,
1714                       const struct st_common_variant_key *key)
1715 {
1716    struct pipe_context *pipe = st->pipe;
1717    struct st_common_variant *v;
1718    struct pipe_shader_state state = {0};
1719
1720    /* Search for existing variant */
1721    for (v = prog->variants; v; v = v->next) {
1722       if (memcmp(&v->key, key, sizeof(*key)) == 0) {
1723          break;
1724       }
1725    }
1726
1727    if (!v) {
1728       /* create new */
1729       v = CALLOC_STRUCT(st_common_variant);
1730       if (v) {
1731
1732          if (prog->state.type == PIPE_SHADER_IR_NIR) {
1733             state.type = PIPE_SHADER_IR_NIR;
1734             state.ir.nir = nir_shader_clone(NULL, prog->state.ir.nir);
1735
1736             if (key->clamp_color)
1737                NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1738
1739             state.stream_output = prog->state.stream_output;
1740
1741             st_finalize_nir(st, &prog->Base, prog->shader_program,
1742                             state.ir.nir);
1743          } else {
1744             if (key->lower_depth_clamp) {
1745                struct gl_program_parameter_list *params = prog->Base.Parameters;
1746
1747                unsigned depth_range_const =
1748                      _mesa_add_state_reference(params, depth_range_state);
1749
1750                const struct tgsi_token *tokens;
1751                tokens =
1752                      st_tgsi_lower_depth_clamp(prog->state.tokens,
1753                                                depth_range_const,
1754                                                key->clip_negative_one_to_one);
1755
1756                if (tokens != prog->state.tokens)
1757                   tgsi_free_tokens(prog->state.tokens);
1758
1759                prog->state.tokens = tokens;
1760             }
1761             state = prog->state;
1762          }
1763          /* fill in new variant */
1764          switch (prog->Base.info.stage) {
1765          case MESA_SHADER_TESS_CTRL:
1766             v->driver_shader = pipe->create_tcs_state(pipe, &state);
1767             break;
1768          case MESA_SHADER_TESS_EVAL:
1769             v->driver_shader = pipe->create_tes_state(pipe, &state);
1770             break;
1771          case MESA_SHADER_GEOMETRY:
1772             v->driver_shader = pipe->create_gs_state(pipe, &state);
1773             break;
1774          case MESA_SHADER_COMPUTE: {
1775             struct pipe_compute_state cs = {0};
1776             cs.ir_type = state.type;
1777             cs.req_local_mem = prog->Base.info.cs.shared_size;
1778
1779             if (state.type == PIPE_SHADER_IR_NIR)
1780                cs.prog = state.ir.nir;
1781             else
1782                cs.prog = state.tokens;
1783
1784             v->driver_shader = pipe->create_compute_state(pipe, &cs);
1785             break;
1786          }
1787          default:
1788             assert(!"unhandled shader type");
1789             free(v);
1790             return NULL;
1791          }
1792
1793          v->key = *key;
1794
1795          /* insert into list */
1796          v->next = prog->variants;
1797          prog->variants = v;
1798       }
1799    }
1800
1801    return v;
1802 }
1803
1804
1805 /**
1806  * Vert/Geom/Frag programs have per-context variants.  Free all the
1807  * variants attached to the given program which match the given context.
1808  */
1809 static void
1810 destroy_program_variants(struct st_context *st, struct gl_program *target)
1811 {
1812    if (!target || target == &_mesa_DummyProgram)
1813       return;
1814
1815    switch (target->Target) {
1816    case GL_VERTEX_PROGRAM_ARB:
1817       {
1818          struct st_vertex_program *stvp = (struct st_vertex_program *) target;
1819          struct st_vp_variant *vpv, **prevPtr = &stvp->variants;
1820
1821          for (vpv = stvp->variants; vpv; ) {
1822             struct st_vp_variant *next = vpv->next;
1823             if (vpv->key.st == st) {
1824                /* unlink from list */
1825                *prevPtr = next;
1826                /* destroy this variant */
1827                delete_vp_variant(st, vpv);
1828             }
1829             else {
1830                prevPtr = &vpv->next;
1831             }
1832             vpv = next;
1833          }
1834       }
1835       break;
1836    case GL_FRAGMENT_PROGRAM_ARB:
1837       {
1838          struct st_common_program *stfp =
1839             (struct st_common_program *) target;
1840          struct st_fp_variant *fpv, **prevPtr = &stfp->fp_variants;
1841
1842          for (fpv = stfp->fp_variants; fpv; ) {
1843             struct st_fp_variant *next = fpv->next;
1844             if (fpv->key.st == st) {
1845                /* unlink from list */
1846                *prevPtr = next;
1847                /* destroy this variant */
1848                delete_fp_variant(st, fpv);
1849             }
1850             else {
1851                prevPtr = &fpv->next;
1852             }
1853             fpv = next;
1854          }
1855       }
1856       break;
1857    case GL_GEOMETRY_PROGRAM_NV:
1858    case GL_TESS_CONTROL_PROGRAM_NV:
1859    case GL_TESS_EVALUATION_PROGRAM_NV:
1860    case GL_COMPUTE_PROGRAM_NV:
1861       {
1862          struct st_common_program *p = st_common_program(target);
1863          struct st_common_variant *v, **prevPtr = &p->variants;
1864
1865          for (v = p->variants; v; ) {
1866             struct st_common_variant *next = v->next;
1867             if (v->key.st == st) {
1868                /* unlink from list */
1869                *prevPtr = next;
1870                /* destroy this variant */
1871                delete_basic_variant(st, v, target->Target);
1872             }
1873             else {
1874                prevPtr = &v->next;
1875             }
1876             v = next;
1877          }
1878       }
1879       break;
1880    default:
1881       _mesa_problem(NULL, "Unexpected program target 0x%x in "
1882                     "destroy_program_variants_cb()", target->Target);
1883    }
1884 }
1885
1886
1887 /**
1888  * Callback for _mesa_HashWalk.  Free all the shader's program variants
1889  * which match the given context.
1890  */
1891 static void
1892 destroy_shader_program_variants_cb(GLuint key, void *data, void *userData)
1893 {
1894    struct st_context *st = (struct st_context *) userData;
1895    struct gl_shader *shader = (struct gl_shader *) data;
1896
1897    switch (shader->Type) {
1898    case GL_SHADER_PROGRAM_MESA:
1899       {
1900          struct gl_shader_program *shProg = (struct gl_shader_program *) data;
1901          GLuint i;
1902
1903          for (i = 0; i < ARRAY_SIZE(shProg->_LinkedShaders); i++) {
1904             if (shProg->_LinkedShaders[i])
1905                destroy_program_variants(st, shProg->_LinkedShaders[i]->Program);
1906          }
1907       }
1908       break;
1909    case GL_VERTEX_SHADER:
1910    case GL_FRAGMENT_SHADER:
1911    case GL_GEOMETRY_SHADER:
1912    case GL_TESS_CONTROL_SHADER:
1913    case GL_TESS_EVALUATION_SHADER:
1914    case GL_COMPUTE_SHADER:
1915       break;
1916    default:
1917       assert(0);
1918    }
1919 }
1920
1921
1922 /**
1923  * Callback for _mesa_HashWalk.  Free all the program variants which match
1924  * the given context.
1925  */
1926 static void
1927 destroy_program_variants_cb(GLuint key, void *data, void *userData)
1928 {
1929    struct st_context *st = (struct st_context *) userData;
1930    struct gl_program *program = (struct gl_program *) data;
1931    destroy_program_variants(st, program);
1932 }
1933
1934
1935 /**
1936  * Walk over all shaders and programs to delete any variants which
1937  * belong to the given context.
1938  * This is called during context tear-down.
1939  */
1940 void
1941 st_destroy_program_variants(struct st_context *st)
1942 {
1943    /* If shaders can be shared with other contexts, the last context will
1944     * call DeleteProgram on all shaders, releasing everything.
1945     */
1946    if (st->has_shareable_shaders)
1947       return;
1948
1949    /* ARB vert/frag program */
1950    _mesa_HashWalk(st->ctx->Shared->Programs,
1951                   destroy_program_variants_cb, st);
1952
1953    /* GLSL vert/frag/geom shaders */
1954    _mesa_HashWalk(st->ctx->Shared->ShaderObjects,
1955                   destroy_shader_program_variants_cb, st);
1956 }
1957
1958
1959 /**
1960  * For debugging, print/dump the current vertex program.
1961  */
1962 void
1963 st_print_current_vertex_program(void)
1964 {
1965    GET_CURRENT_CONTEXT(ctx);
1966
1967    if (ctx->VertexProgram._Current) {
1968       struct st_vertex_program *stvp =
1969          (struct st_vertex_program *) ctx->VertexProgram._Current;
1970       struct st_vp_variant *stv;
1971
1972       debug_printf("Vertex program %u\n", stvp->Base.Id);
1973
1974       for (stv = stvp->variants; stv; stv = stv->next) {
1975          debug_printf("variant %p\n", stv);
1976          tgsi_dump(stv->tokens, 0);
1977       }
1978    }
1979 }
1980
1981
1982 /**
1983  * Compile one shader variant.
1984  */
1985 void
1986 st_precompile_shader_variant(struct st_context *st,
1987                              struct gl_program *prog)
1988 {
1989    switch (prog->Target) {
1990    case GL_VERTEX_PROGRAM_ARB: {
1991       struct st_vertex_program *p = (struct st_vertex_program *)prog;
1992       struct st_common_variant_key key;
1993
1994       memset(&key, 0, sizeof(key));
1995
1996       key.st = st->has_shareable_shaders ? NULL : st;
1997       st_get_vp_variant(st, p, &key);
1998       break;
1999    }
2000
2001    case GL_FRAGMENT_PROGRAM_ARB: {
2002       struct st_common_program *p = (struct st_common_program *)prog;
2003       struct st_fp_variant_key key;
2004
2005       memset(&key, 0, sizeof(key));
2006
2007       key.st = st->has_shareable_shaders ? NULL : st;
2008       st_get_fp_variant(st, p, &key);
2009       break;
2010    }
2011
2012    case GL_TESS_CONTROL_PROGRAM_NV:
2013    case GL_TESS_EVALUATION_PROGRAM_NV:
2014    case GL_GEOMETRY_PROGRAM_NV:
2015    case GL_COMPUTE_PROGRAM_NV: {
2016       struct st_common_program *p = st_common_program(prog);
2017       struct st_common_variant_key key;
2018
2019       memset(&key, 0, sizeof(key));
2020
2021       key.st = st->has_shareable_shaders ? NULL : st;
2022       st_get_common_variant(st, p, &key);
2023       break;
2024    }
2025
2026    default:
2027       assert(0);
2028    }
2029 }