src/mesa/state_tracker/st_program.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   *   Brian Paul
  31   */
  32
  33
  34 #include "main/errors.h"
  35 #include "main/imports.h"
  36 #include "main/hash.h"
  37 #include "main/mtypes.h"
  38 #include "program/prog_parameter.h"
  39 #include "program/prog_print.h"
  40 #include "program/prog_to_nir.h"
  41 #include "program/programopt.h"
  42
  43 #include "compiler/nir/nir.h"
  44
  45 #include "pipe/p_context.h"
  46 #include "pipe/p_defines.h"
  47 #include "pipe/p_shader_tokens.h"
  48 #include "draw/draw_context.h"
  49 #include "tgsi/tgsi_dump.h"
  50 #include "tgsi/tgsi_emulate.h"
  51 #include "tgsi/tgsi_parse.h"
  52 #include "tgsi/tgsi_ureg.h"
  53
  54 #include "st_debug.h"
  55 #include "st_cb_bitmap.h"
  56 #include "st_cb_drawpixels.h"
  57 #include "st_context.h"
  58 #include "st_tgsi_lower_depth_clamp.h"
  59 #include "st_tgsi_lower_yuv.h"
  60 #include "st_program.h"
  61 #include "st_mesa_to_tgsi.h"
  62 #include "st_atifs_to_tgsi.h"
  63 #include "st_nir.h"
  64 #include "st_shader_cache.h"
  65 #include "cso_cache/cso_context.h"
  66
  67
  68
  69 static void
  70 set_affected_state_flags(uint64_t *states,
  71                          struct gl_program *prog,
  72                          uint64_t new_constants,
  73                          uint64_t new_sampler_views,
  74                          uint64_t new_samplers,
  75                          uint64_t new_images,
  76                          uint64_t new_ubos,
  77                          uint64_t new_ssbos,
  78                          uint64_t new_atomics)
  79 {
  80    if (prog->Parameters->NumParameters)
  81       *states |= new_constants;
  82
  83    if (prog->info.num_textures)
  84       *states |= new_sampler_views | new_samplers;
  85
  86    if (prog->info.num_images)
  87       *states |= new_images;
  88
  89    if (prog->info.num_ubos)
  90       *states |= new_ubos;
  91
  92    if (prog->info.num_ssbos)
  93       *states |= new_ssbos;
  94
  95    if (prog->info.num_abos)
  96       *states |= new_atomics;
  97 }
  98
  99 /**
 100  * This determines which states will be updated when the shader is bound.
 101  */
 102 void
 103 st_set_prog_affected_state_flags(struct gl_program *prog)
 104 {
 105    uint64_t *states;
 106
 107    switch (prog->info.stage) {
 108    case MESA_SHADER_VERTEX:
 109       states = &((struct st_vertex_program*)prog)->affected_states;
 110
 111       *states = ST_NEW_VS_STATE |
 112                 ST_NEW_RASTERIZER |
 113                 ST_NEW_VERTEX_ARRAYS;
 114
 115       set_affected_state_flags(states, prog,
 116                                ST_NEW_VS_CONSTANTS,
 117                                ST_NEW_VS_SAMPLER_VIEWS,
 118                                ST_NEW_VS_SAMPLERS,
 119                                ST_NEW_VS_IMAGES,
 120                                ST_NEW_VS_UBOS,
 121                                ST_NEW_VS_SSBOS,
 122                                ST_NEW_VS_ATOMICS);
 123       break;
 124
 125    case MESA_SHADER_TESS_CTRL:
 126       states = &(st_common_program(prog))->affected_states;
 127
 128       *states = ST_NEW_TCS_STATE;
 129
 130       set_affected_state_flags(states, prog,
 131                                ST_NEW_TCS_CONSTANTS,
 132                                ST_NEW_TCS_SAMPLER_VIEWS,
 133                                ST_NEW_TCS_SAMPLERS,
 134                                ST_NEW_TCS_IMAGES,
 135                                ST_NEW_TCS_UBOS,
 136                                ST_NEW_TCS_SSBOS,
 137                                ST_NEW_TCS_ATOMICS);
 138       break;
 139
 140    case MESA_SHADER_TESS_EVAL:
 141       states = &(st_common_program(prog))->affected_states;
 142
 143       *states = ST_NEW_TES_STATE |
 144                 ST_NEW_RASTERIZER;
 145
 146       set_affected_state_flags(states, prog,
 147                                ST_NEW_TES_CONSTANTS,
 148                                ST_NEW_TES_SAMPLER_VIEWS,
 149                                ST_NEW_TES_SAMPLERS,
 150                                ST_NEW_TES_IMAGES,
 151                                ST_NEW_TES_UBOS,
 152                                ST_NEW_TES_SSBOS,
 153                                ST_NEW_TES_ATOMICS);
 154       break;
 155
 156    case MESA_SHADER_GEOMETRY:
 157       states = &(st_common_program(prog))->affected_states;
 158
 159       *states = ST_NEW_GS_STATE |
 160                 ST_NEW_RASTERIZER;
 161
 162       set_affected_state_flags(states, prog,
 163                                ST_NEW_GS_CONSTANTS,
 164                                ST_NEW_GS_SAMPLER_VIEWS,
 165                                ST_NEW_GS_SAMPLERS,
 166                                ST_NEW_GS_IMAGES,
 167                                ST_NEW_GS_UBOS,
 168                                ST_NEW_GS_SSBOS,
 169                                ST_NEW_GS_ATOMICS);
 170       break;
 171
 172    case MESA_SHADER_FRAGMENT:
 173       states = &((struct st_common_program*)prog)->affected_states;
 174
 175       /* gl_FragCoord and glDrawPixels always use constants. */
 176       *states = ST_NEW_FS_STATE |
 177                 ST_NEW_SAMPLE_SHADING |
 178                 ST_NEW_FS_CONSTANTS;
 179
 180       set_affected_state_flags(states, prog,
 181                                ST_NEW_FS_CONSTANTS,
 182                                ST_NEW_FS_SAMPLER_VIEWS,
 183                                ST_NEW_FS_SAMPLERS,
 184                                ST_NEW_FS_IMAGES,
 185                                ST_NEW_FS_UBOS,
 186                                ST_NEW_FS_SSBOS,
 187                                ST_NEW_FS_ATOMICS);
 188       break;
 189
 190    case MESA_SHADER_COMPUTE:
 191       states = &((struct st_common_program*)prog)->affected_states;
 192
 193       *states = ST_NEW_CS_STATE;
 194
 195       set_affected_state_flags(states, prog,
 196                                ST_NEW_CS_CONSTANTS,
 197                                ST_NEW_CS_SAMPLER_VIEWS,
 198                                ST_NEW_CS_SAMPLERS,
 199                                ST_NEW_CS_IMAGES,
 200                                ST_NEW_CS_UBOS,
 201                                ST_NEW_CS_SSBOS,
 202                                ST_NEW_CS_ATOMICS);
 203       break;
 204
 205    default:
 206       unreachable("unhandled shader stage");
 207    }
 208 }
 209
 210 static void
 211 delete_ir(struct pipe_shader_state *ir)
 212 {
 213    if (ir->tokens) {
 214       ureg_free_tokens(ir->tokens);
 215       ir->tokens = NULL;
 216    }
 217
 218    /* Note: Any setup of ->ir.nir that has had pipe->create_*_state called on
 219     * it has resulted in the driver taking ownership of the NIR.  Those
 220     * callers should be NULLing out the nir field in any pipe_shader_state
 221     * that might have this called in order to indicate that.
 222     *
 223     * GLSL IR and ARB programs will have set gl_program->nir to the same
 224     * shader as ir->ir.nir, so it will be freed by _mesa_delete_program().
 225     */
 226 }
 227
 228 /**
 229  * Delete a vertex program variant.  Note the caller must unlink
 230  * the variant from the linked list.
 231  */
 232 static void
 233 delete_vp_variant(struct st_context *st, struct st_vp_variant *vpv)
 234 {
 235    if (vpv->driver_shader) {
 236       if (st->has_shareable_shaders || vpv->key.st == st) {
 237          cso_delete_vertex_shader(st->cso_context, vpv->driver_shader);
 238       } else {
 239          st_save_zombie_shader(vpv->key.st, PIPE_SHADER_VERTEX,
 240                                vpv->driver_shader);
 241       }
 242    }
 243
 244    if (vpv->draw_shader)
 245       draw_delete_vertex_shader( st->draw, vpv->draw_shader );
 246
 247    if (vpv->tokens)
 248       ureg_free_tokens(vpv->tokens);
 249
 250    free( vpv );
 251 }
 252
 253
 254
 255 /**
 256  * Clean out any old compilations:
 257  */
 258 void
 259 st_release_vp_variants( struct st_context *st,
 260                         struct st_vertex_program *stvp )
 261 {
 262    struct st_vp_variant *vpv;
 263
 264    for (vpv = stvp->variants; vpv; ) {
 265       struct st_vp_variant *next = vpv->next;
 266       delete_vp_variant(st, vpv);
 267       vpv = next;
 268    }
 269
 270    stvp->variants = NULL;
 271
 272    delete_ir(&stvp->state);
 273 }
 274
 275
 276
 277 /**
 278  * Delete a fragment program variant.  Note the caller must unlink
 279  * the variant from the linked list.
 280  */
 281 static void
 282 delete_fp_variant(struct st_context *st, struct st_fp_variant *fpv)
 283 {
 284    if (fpv->driver_shader) {
 285       if (st->has_shareable_shaders || fpv->key.st == st) {
 286          cso_delete_fragment_shader(st->cso_context, fpv->driver_shader);
 287       } else {
 288          st_save_zombie_shader(fpv->key.st, PIPE_SHADER_FRAGMENT,
 289                                fpv->driver_shader);
 290       }
 291    }
 292
 293    free(fpv);
 294 }
 295
 296
 297 /**
 298  * Free all variants of a fragment program.
 299  */
 300 void
 301 st_release_fp_variants(struct st_context *st, struct st_common_program *stfp)
 302 {
 303    struct st_fp_variant *fpv;
 304
 305    for (fpv = stfp->fp_variants; fpv; ) {
 306       struct st_fp_variant *next = fpv->next;
 307       delete_fp_variant(st, fpv);
 308       fpv = next;
 309    }
 310
 311    stfp->fp_variants = NULL;
 312
 313    delete_ir(&stfp->state);
 314 }
 315
 316
 317 /**
 318  * Delete a basic program variant.  Note the caller must unlink
 319  * the variant from the linked list.
 320  */
 321 static void
 322 delete_common_variant(struct st_context *st, struct st_common_variant *v,
 323                       GLenum target)
 324 {
 325    if (v->driver_shader) {
 326       if (st->has_shareable_shaders || v->key.st == st) {
 327          /* The shader's context matches the calling context, or we
 328           * don't care.
 329           */
 330          switch (target) {
 331          case GL_TESS_CONTROL_PROGRAM_NV:
 332             cso_delete_tessctrl_shader(st->cso_context, v->driver_shader);
 333             break;
 334          case GL_TESS_EVALUATION_PROGRAM_NV:
 335             cso_delete_tesseval_shader(st->cso_context, v->driver_shader);
 336             break;
 337          case GL_GEOMETRY_PROGRAM_NV:
 338             cso_delete_geometry_shader(st->cso_context, v->driver_shader);
 339             break;
 340          case GL_COMPUTE_PROGRAM_NV:
 341             cso_delete_compute_shader(st->cso_context, v->driver_shader);
 342             break;
 343          default:
 344             unreachable("bad shader type in delete_basic_variant");
 345          }
 346       } else {
 347          /* We can't delete a shader with a context different from the one
 348           * that created it.  Add it to the creating context's zombie list.
 349           */
 350          enum pipe_shader_type type;
 351          switch (target) {
 352          case GL_TESS_CONTROL_PROGRAM_NV:
 353             type = PIPE_SHADER_TESS_CTRL;
 354             break;
 355          case GL_TESS_EVALUATION_PROGRAM_NV:
 356             type = PIPE_SHADER_TESS_EVAL;
 357             break;
 358          case GL_GEOMETRY_PROGRAM_NV:
 359             type = PIPE_SHADER_GEOMETRY;
 360             break;
 361          default:
 362             unreachable("");
 363          }
 364          st_save_zombie_shader(v->key.st, type, v->driver_shader);
 365       }
 366    }
 367
 368    free(v);
 369 }
 370
 371
 372 /**
 373  * Free all basic program variants.
 374  */
 375 void
 376 st_release_common_variants(struct st_context *st, struct st_common_program *p)
 377 {
 378    struct st_common_variant *v;
 379
 380    for (v = p->variants; v; ) {
 381       struct st_common_variant *next = v->next;
 382       delete_common_variant(st, v, p->Base.Target);
 383       v = next;
 384    }
 385
 386    p->variants = NULL;
 387    delete_ir(&p->state);
 388 }
 389
 390 void
 391 st_finalize_nir_before_variants(struct nir_shader *nir)
 392 {
 393    NIR_PASS_V(nir, nir_opt_access);
 394
 395    NIR_PASS_V(nir, nir_split_var_copies);
 396    NIR_PASS_V(nir, nir_lower_var_copies);
 397    if (nir->options->lower_all_io_to_temps ||
 398        nir->options->lower_all_io_to_elements ||
 399        nir->info.stage == MESA_SHADER_VERTEX ||
 400        nir->info.stage == MESA_SHADER_GEOMETRY) {
 401       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
 402    } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
 403       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
 404    }
 405
 406    st_nir_assign_vs_in_locations(nir);
 407 }
 408
 409 /**
 410  * Translate ARB (asm) program to NIR
 411  */
 412 static nir_shader *
 413 st_translate_prog_to_nir(struct st_context *st, struct gl_program *prog,
 414                          gl_shader_stage stage)
 415 {
 416    struct pipe_screen *screen = st->pipe->screen;
 417    const struct gl_shader_compiler_options *options =
 418       &st->ctx->Const.ShaderCompilerOptions[stage];
 419
 420    /* Translate to NIR */
 421    nir_shader *nir = prog_to_nir(prog, options->NirOptions);
 422    NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
 423    nir_validate_shader(nir, "after st/ptn lower_regs_to_ssa");
 424
 425    NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, prog, screen);
 426    NIR_PASS_V(nir, nir_lower_system_values);
 427
 428    /* Optimise NIR */
 429    NIR_PASS_V(nir, nir_opt_constant_folding);
 430    st_nir_opts(nir);
 431    st_finalize_nir_before_variants(nir);
 432
 433    if (st->allow_st_finalize_nir_twice)
 434       st_finalize_nir(st, prog, NULL, nir, true);
 435
 436    nir_validate_shader(nir, "after st/glsl finalize_nir");
 437
 438    return nir;
 439 }
 440
 441 void
 442 st_prepare_vertex_program(struct st_vertex_program *stvp)
 443 {
 444    stvp->num_inputs = 0;
 445    memset(stvp->input_to_index, ~0, sizeof(stvp->input_to_index));
 446    memset(stvp->result_to_output, ~0, sizeof(stvp->result_to_output));
 447
 448    /* Determine number of inputs, the mappings between VERT_ATTRIB_x
 449     * and TGSI generic input indexes, plus input attrib semantic info.
 450     */
 451    for (unsigned attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
 452       if ((stvp->Base.info.inputs_read & BITFIELD64_BIT(attr)) != 0) {
 453          stvp->input_to_index[attr] = stvp->num_inputs;
 454          stvp->index_to_input[stvp->num_inputs] = attr;
 455          stvp->num_inputs++;
 456
 457          if ((stvp->Base.DualSlotInputs & BITFIELD64_BIT(attr)) != 0) {
 458             /* add placeholder for second part of a double attribute */
 459             stvp->index_to_input[stvp->num_inputs] = ST_DOUBLE_ATTRIB_PLACEHOLDER;
 460             stvp->num_inputs++;
 461          }
 462       }
 463    }
 464    /* pre-setup potentially unused edgeflag input */
 465    stvp->input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
 466    stvp->index_to_input[stvp->num_inputs] = VERT_ATTRIB_EDGEFLAG;
 467
 468    /* Compute mapping of vertex program outputs to slots. */
 469    unsigned num_outputs = 0;
 470    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 471       if (stvp->Base.info.outputs_written & BITFIELD64_BIT(attr))
 472          stvp->result_to_output[attr] = num_outputs++;
 473    }
 474    /* pre-setup potentially unused edgeflag output */
 475    stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs;
 476 }
 477
 478 void
 479 st_translate_stream_output_info(struct gl_program *prog)
 480 {
 481    struct gl_transform_feedback_info *info = prog->sh.LinkedTransformFeedback;
 482    if (!info)
 483       return;
 484
 485    /* Determine the (default) output register mapping for each output. */
 486    unsigned num_outputs = 0;
 487    ubyte output_mapping[VARYING_SLOT_TESS_MAX];
 488    memset(output_mapping, 0, sizeof(output_mapping));
 489
 490    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 491       if (prog->info.outputs_written & BITFIELD64_BIT(attr))
 492          output_mapping[attr] = num_outputs++;
 493    }
 494
 495    /* Translate stream output info. */
 496    struct pipe_stream_output_info *so_info = NULL;
 497    if (prog->info.stage == MESA_SHADER_VERTEX)
 498       so_info = &((struct st_vertex_program*)prog)->state.stream_output;
 499    else
 500       so_info = &((struct st_common_program*)prog)->state.stream_output;
 501
 502    for (unsigned i = 0; i < info->NumOutputs; i++) {
 503       so_info->output[i].register_index =
 504          output_mapping[info->Outputs[i].OutputRegister];
 505       so_info->output[i].start_component = info->Outputs[i].ComponentOffset;
 506       so_info->output[i].num_components = info->Outputs[i].NumComponents;
 507       so_info->output[i].output_buffer = info->Outputs[i].OutputBuffer;
 508       so_info->output[i].dst_offset = info->Outputs[i].DstOffset;
 509       so_info->output[i].stream = info->Outputs[i].StreamId;
 510    }
 511
 512    for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
 513       so_info->stride[i] = info->Buffers[i].Stride;
 514    }
 515    so_info->num_outputs = info->NumOutputs;
 516 }
 517
 518 /**
 519  * Translate a vertex program.
 520  */
 521 bool
 522 st_translate_vertex_program(struct st_context *st,
 523                             struct st_vertex_program *stvp)
 524 {
 525    struct ureg_program *ureg;
 526    enum pipe_error error;
 527    unsigned num_outputs = 0;
 528    unsigned attr;
 529    ubyte output_semantic_name[VARYING_SLOT_MAX] = {0};
 530    ubyte output_semantic_index[VARYING_SLOT_MAX] = {0};
 531
 532    if (stvp->Base.arb.IsPositionInvariant)
 533       _mesa_insert_mvp_code(st->ctx, &stvp->Base);
 534
 535    st_prepare_vertex_program(stvp);
 536
 537    /* ARB_vp: */
 538    if (!stvp->glsl_to_tgsi) {
 539       _mesa_remove_output_reads(&stvp->Base, PROGRAM_OUTPUT);
 540
 541       /* This determines which states will be updated when the assembly
 542        * shader is bound.
 543        */
 544       stvp->affected_states = ST_NEW_VS_STATE |
 545                               ST_NEW_RASTERIZER |
 546                               ST_NEW_VERTEX_ARRAYS;
 547
 548       if (stvp->Base.Parameters->NumParameters)
 549          stvp->affected_states |= ST_NEW_VS_CONSTANTS;
 550
 551       /* No samplers are allowed in ARB_vp. */
 552    }
 553
 554    /* Get semantic names and indices. */
 555    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 556       if (stvp->Base.info.outputs_written & BITFIELD64_BIT(attr)) {
 557          unsigned slot = num_outputs++;
 558          unsigned semantic_name, semantic_index;
 559          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
 560                                       &semantic_name, &semantic_index);
 561          output_semantic_name[slot] = semantic_name;
 562          output_semantic_index[slot] = semantic_index;
 563       }
 564    }
 565    /* pre-setup potentially unused edgeflag output */
 566    output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
 567    output_semantic_index[num_outputs] = 0;
 568
 569    ureg = ureg_create_with_screen(PIPE_SHADER_VERTEX, st->pipe->screen);
 570    if (ureg == NULL)
 571       return false;
 572
 573    if (stvp->Base.info.clip_distance_array_size)
 574       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
 575                     stvp->Base.info.clip_distance_array_size);
 576    if (stvp->Base.info.cull_distance_array_size)
 577       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
 578                     stvp->Base.info.cull_distance_array_size);
 579
 580    if (ST_DEBUG & DEBUG_MESA) {
 581       _mesa_print_program(&stvp->Base);
 582       _mesa_print_program_parameters(st->ctx, &stvp->Base);
 583       debug_printf("\n");
 584    }
 585
 586    if (stvp->glsl_to_tgsi) {
 587       error = st_translate_program(st->ctx,
 588                                    PIPE_SHADER_VERTEX,
 589                                    ureg,
 590                                    stvp->glsl_to_tgsi,
 591                                    &stvp->Base,
 592                                    /* inputs */
 593                                    stvp->num_inputs,
 594                                    stvp->input_to_index,
 595                                    NULL, /* inputSlotToAttr */
 596                                    NULL, /* input semantic name */
 597                                    NULL, /* input semantic index */
 598                                    NULL, /* interp mode */
 599                                    /* outputs */
 600                                    num_outputs,
 601                                    stvp->result_to_output,
 602                                    output_semantic_name,
 603                                    output_semantic_index);
 604
 605       st_translate_stream_output_info(&stvp->Base);
 606
 607       free_glsl_to_tgsi_visitor(stvp->glsl_to_tgsi);
 608    } else
 609       error = st_translate_mesa_program(st->ctx,
 610                                         PIPE_SHADER_VERTEX,
 611                                         ureg,
 612                                         &stvp->Base,
 613                                         /* inputs */
 614                                         stvp->num_inputs,
 615                                         stvp->input_to_index,
 616                                         NULL, /* input semantic name */
 617                                         NULL, /* input semantic index */
 618                                         NULL,
 619                                         /* outputs */
 620                                         num_outputs,
 621                                         stvp->result_to_output,
 622                                         output_semantic_name,
 623                                         output_semantic_index);
 624
 625    if (error) {
 626       debug_printf("%s: failed to translate Mesa program:\n", __func__);
 627       _mesa_print_program(&stvp->Base);
 628       debug_assert(0);
 629       return false;
 630    }
 631
 632    stvp->state.tokens = ureg_get_tokens(ureg, NULL);
 633    ureg_destroy(ureg);
 634
 635    if (stvp->glsl_to_tgsi) {
 636       stvp->glsl_to_tgsi = NULL;
 637       st_store_ir_in_disk_cache(st, &stvp->Base, false);
 638    }
 639
 640    /* Translate to NIR.
 641     *
 642     * This must be done after the translation to TGSI is done, because
 643     * we'll pass the NIR shader to the driver and the TGSI version to
 644     * the draw module for the select/feedback/rasterpos code.
 645     */
 646    if (st->pipe->screen->get_shader_param(st->pipe->screen,
 647                                           PIPE_SHADER_VERTEX,
 648                                           PIPE_SHADER_CAP_PREFERRED_IR)) {
 649       assert(!stvp->glsl_to_tgsi);
 650
 651       nir_shader *nir =
 652          st_translate_prog_to_nir(st, &stvp->Base, MESA_SHADER_VERTEX);
 653
 654       if (stvp->state.ir.nir)
 655          ralloc_free(stvp->state.ir.nir);
 656       stvp->state.type = PIPE_SHADER_IR_NIR;
 657       stvp->state.ir.nir = nir;
 658       stvp->Base.nir = nir;
 659       return true;
 660    }
 661
 662    return stvp->state.tokens != NULL;
 663 }
 664
 665 static const gl_state_index16 depth_range_state[STATE_LENGTH] =
 666    { STATE_DEPTH_RANGE };
 667
 668 static struct st_vp_variant *
 669 st_create_vp_variant(struct st_context *st,
 670                      struct st_vertex_program *stvp,
 671                      const struct st_common_variant_key *key)
 672 {
 673    struct st_vp_variant *vpv = CALLOC_STRUCT(st_vp_variant);
 674    struct pipe_context *pipe = st->pipe;
 675    struct pipe_screen *screen = pipe->screen;
 676    struct pipe_shader_state state = {0};
 677
 678    static const gl_state_index16 point_size_state[STATE_LENGTH] =
 679       { STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED, 0 };
 680    struct gl_program_parameter_list *params = stvp->Base.Parameters;
 681
 682    vpv->key = *key;
 683    vpv->num_inputs = stvp->num_inputs;
 684
 685    state.stream_output = stvp->state.stream_output;
 686
 687    if (stvp->state.type == PIPE_SHADER_IR_NIR) {
 688       bool finalize = false;
 689
 690       state.type = PIPE_SHADER_IR_NIR;
 691       state.ir.nir = nir_shader_clone(NULL, stvp->state.ir.nir);
 692       if (key->clamp_color) {
 693          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
 694          finalize = true;
 695       }
 696       if (key->passthrough_edgeflags) {
 697          NIR_PASS_V(state.ir.nir, nir_lower_passthrough_edgeflags);
 698          vpv->num_inputs++;
 699          finalize = true;
 700       }
 701
 702       if (key->lower_point_size) {
 703          _mesa_add_state_reference(params, point_size_state);
 704          NIR_PASS_V(state.ir.nir, nir_lower_point_size_mov,
 705                     point_size_state);
 706          finalize = true;
 707       }
 708
 709       if (key->lower_ucp) {
 710          bool can_compact = screen->get_param(screen,
 711                                               PIPE_CAP_NIR_COMPACT_ARRAYS);
 712
 713          bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;
 714          gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH];
 715          for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
 716             if (use_eye) {
 717                clipplane_state[i][0] = STATE_CLIPPLANE;
 718                clipplane_state[i][1] = i;
 719             } else {
 720                clipplane_state[i][0] = STATE_INTERNAL;
 721                clipplane_state[i][1] = STATE_CLIP_INTERNAL;
 722                clipplane_state[i][2] = i;
 723             }
 724             _mesa_add_state_reference(params, clipplane_state[i]);
 725          }
 726
 727          NIR_PASS_V(state.ir.nir, nir_lower_clip_vs, key->lower_ucp,
 728                     true, can_compact, clipplane_state);
 729          NIR_PASS_V(state.ir.nir, nir_lower_io_to_temporaries,
 730                     nir_shader_get_entrypoint(state.ir.nir), true, false);
 731          NIR_PASS_V(state.ir.nir, nir_lower_global_vars_to_local);
 732          finalize = true;
 733       }
 734
 735       if (finalize || !st->allow_st_finalize_nir_twice) {
 736          st_finalize_nir(st, &stvp->Base, stvp->shader_program, state.ir.nir,
 737                          true);
 738
 739          /* Some of the lowering above may have introduced new varyings */
 740          nir_shader_gather_info(state.ir.nir,
 741                                 nir_shader_get_entrypoint(state.ir.nir));
 742       }
 743
 744       if (ST_DEBUG & DEBUG_PRINT_IR)
 745          nir_print_shader(state.ir.nir, stderr);
 746
 747       vpv->driver_shader = pipe->create_vs_state(pipe, &state);
 748
 749       /* When generating a NIR program, we usually don't have TGSI tokens.
 750        * However, we do create them for ARB_vertex_program / fixed-function VS
 751        * programs which we may need to use with the draw module for legacy
 752        * feedback/select emulation.  If they exist, copy them.
 753        *
 754        * TODO: Lowering for shader variants is not applied to TGSI when
 755        * generating a NIR shader.
 756        */
 757       if (stvp->state.tokens)
 758          vpv->tokens = tgsi_dup_tokens(stvp->state.tokens);
 759
 760       return vpv;
 761    }
 762
 763    state.type = PIPE_SHADER_IR_TGSI;
 764    state.tokens = tgsi_dup_tokens(stvp->state.tokens);
 765
 766    /* Emulate features. */
 767    if (key->clamp_color || key->passthrough_edgeflags) {
 768       const struct tgsi_token *tokens;
 769       unsigned flags =
 770          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
 771          (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
 772
 773       tokens = tgsi_emulate(state.tokens, flags);
 774
 775       if (tokens) {
 776          tgsi_free_tokens(state.tokens);
 777          state.tokens = tokens;
 778
 779          if (key->passthrough_edgeflags)
 780             vpv->num_inputs++;
 781       } else
 782          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
 783    }
 784
 785    if (key->lower_depth_clamp) {
 786       unsigned depth_range_const =
 787             _mesa_add_state_reference(params, depth_range_state);
 788
 789       const struct tgsi_token *tokens;
 790       tokens = st_tgsi_lower_depth_clamp(state.tokens, depth_range_const,
 791                                          key->clip_negative_one_to_one);
 792       if (tokens != state.tokens)
 793          tgsi_free_tokens(state.tokens);
 794       state.tokens = tokens;
 795    }
 796
 797    if (ST_DEBUG & DEBUG_PRINT_IR)
 798       tgsi_dump(state.tokens, 0);
 799
 800    vpv->driver_shader = pipe->create_vs_state(pipe, &state);
 801    /* Save this for selection/feedback/rasterpos. */
 802    vpv->tokens = state.tokens;
 803    return vpv;
 804 }
 805
 806
 807 /**
 808  * Find/create a vertex program variant.
 809  */
 810 struct st_vp_variant *
 811 st_get_vp_variant(struct st_context *st,
 812                   struct st_vertex_program *stvp,
 813                   const struct st_common_variant_key *key)
 814 {
 815    struct st_vp_variant *vpv;
 816
 817    /* Search for existing variant */
 818    for (vpv = stvp->variants; vpv; vpv = vpv->next) {
 819       if (memcmp(&vpv->key, key, sizeof(*key)) == 0) {
 820          break;
 821       }
 822    }
 823
 824    if (!vpv) {
 825       /* create now */
 826       vpv = st_create_vp_variant(st, stvp, key);
 827       if (vpv) {
 828           for (unsigned index = 0; index < vpv->num_inputs; ++index) {
 829              unsigned attr = stvp->index_to_input[index];
 830              if (attr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
 831                 continue;
 832              vpv->vert_attrib_mask |= 1u << attr;
 833           }
 834
 835          /* insert into list */
 836          vpv->next = stvp->variants;
 837          stvp->variants = vpv;
 838       }
 839    }
 840
 841    return vpv;
 842 }
 843
 844
 845 /**
 846  * Translate a Mesa fragment shader into a TGSI shader.
 847  */
 848 bool
 849 st_translate_fragment_program(struct st_context *st,
 850                               struct st_common_program *stfp)
 851 {
 852    /* Non-GLSL programs: */
 853    if (!stfp->glsl_to_tgsi) {
 854       _mesa_remove_output_reads(&stfp->Base, PROGRAM_OUTPUT);
 855       if (st->ctx->Const.GLSLFragCoordIsSysVal)
 856          _mesa_program_fragment_position_to_sysval(&stfp->Base);
 857
 858       /* This determines which states will be updated when the assembly
 859        * shader is bound.
 860        *
 861        * fragment.position and glDrawPixels always use constants.
 862        */
 863       stfp->affected_states = ST_NEW_FS_STATE |
 864                               ST_NEW_SAMPLE_SHADING |
 865                               ST_NEW_FS_CONSTANTS;
 866
 867       if (stfp->ati_fs) {
 868          /* Just set them for ATI_fs unconditionally. */
 869          stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 870                                   ST_NEW_FS_SAMPLERS;
 871       } else {
 872          /* ARB_fp */
 873          if (stfp->Base.SamplersUsed)
 874             stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 875                                      ST_NEW_FS_SAMPLERS;
 876       }
 877
 878       /* Translate to NIR. */
 879       if (!stfp->ati_fs &&
 880           st->pipe->screen->get_shader_param(st->pipe->screen,
 881                                              PIPE_SHADER_FRAGMENT,
 882                                              PIPE_SHADER_CAP_PREFERRED_IR)) {
 883          nir_shader *nir =
 884             st_translate_prog_to_nir(st, &stfp->Base, MESA_SHADER_FRAGMENT);
 885
 886          if (stfp->state.ir.nir)
 887             ralloc_free(stfp->state.ir.nir);
 888          stfp->state.type = PIPE_SHADER_IR_NIR;
 889          stfp->state.ir.nir = nir;
 890          stfp->Base.nir = nir;
 891          return true;
 892       }
 893    }
 894
 895    ubyte outputMapping[2 * FRAG_RESULT_MAX];
 896    ubyte inputMapping[VARYING_SLOT_MAX];
 897    ubyte inputSlotToAttr[VARYING_SLOT_MAX];
 898    ubyte interpMode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
 899    GLuint attr;
 900    GLbitfield64 inputsRead;
 901    struct ureg_program *ureg;
 902
 903    GLboolean write_all = GL_FALSE;
 904
 905    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
 906    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
 907    uint fs_num_inputs = 0;
 908
 909    ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
 910    ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
 911    uint fs_num_outputs = 0;
 912
 913    memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
 914
 915    /*
 916     * Convert Mesa program inputs to TGSI input register semantics.
 917     */
 918    inputsRead = stfp->Base.info.inputs_read;
 919    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 920       if ((inputsRead & BITFIELD64_BIT(attr)) != 0) {
 921          const GLuint slot = fs_num_inputs++;
 922
 923          inputMapping[attr] = slot;
 924          inputSlotToAttr[slot] = attr;
 925
 926          switch (attr) {
 927          case VARYING_SLOT_POS:
 928             input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
 929             input_semantic_index[slot] = 0;
 930             interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 931             break;
 932          case VARYING_SLOT_COL0:
 933             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 934             input_semantic_index[slot] = 0;
 935             interpMode[slot] = stfp->glsl_to_tgsi ?
 936                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 937             break;
 938          case VARYING_SLOT_COL1:
 939             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 940             input_semantic_index[slot] = 1;
 941             interpMode[slot] = stfp->glsl_to_tgsi ?
 942                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 943             break;
 944          case VARYING_SLOT_FOGC:
 945             input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
 946             input_semantic_index[slot] = 0;
 947             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 948             break;
 949          case VARYING_SLOT_FACE:
 950             input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
 951             input_semantic_index[slot] = 0;
 952             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 953             break;
 954          case VARYING_SLOT_PRIMITIVE_ID:
 955             input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
 956             input_semantic_index[slot] = 0;
 957             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 958             break;
 959          case VARYING_SLOT_LAYER:
 960             input_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
 961             input_semantic_index[slot] = 0;
 962             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 963             break;
 964          case VARYING_SLOT_VIEWPORT:
 965             input_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
 966             input_semantic_index[slot] = 0;
 967             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 968             break;
 969          case VARYING_SLOT_CLIP_DIST0:
 970             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
 971             input_semantic_index[slot] = 0;
 972             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 973             break;
 974          case VARYING_SLOT_CLIP_DIST1:
 975             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
 976             input_semantic_index[slot] = 1;
 977             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 978             break;
 979          case VARYING_SLOT_CULL_DIST0:
 980          case VARYING_SLOT_CULL_DIST1:
 981             /* these should have been lowered by GLSL */
 982             assert(0);
 983             break;
 984             /* In most cases, there is nothing special about these
 985              * inputs, so adopt a convention to use the generic
 986              * semantic name and the mesa VARYING_SLOT_ number as the
 987              * index.
 988              *
 989              * All that is required is that the vertex shader labels
 990              * its own outputs similarly, and that the vertex shader
 991              * generates at least every output required by the
 992              * fragment shader plus fixed-function hardware (such as
 993              * BFC).
 994              *
 995              * However, some drivers may need us to identify the PNTC and TEXi
 996              * varyings if, for example, their capability to replace them with
 997              * sprite coordinates is limited.
 998              */
 999          case VARYING_SLOT_PNTC:
1000             if (st->needs_texcoord_semantic) {
1001                input_semantic_name[slot] = TGSI_SEMANTIC_PCOORD;
1002                input_semantic_index[slot] = 0;
1003                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
1004                break;
1005             }
1006             /* fall through */
1007          case VARYING_SLOT_TEX0:
1008          case VARYING_SLOT_TEX1:
1009          case VARYING_SLOT_TEX2:
1010          case VARYING_SLOT_TEX3:
1011          case VARYING_SLOT_TEX4:
1012          case VARYING_SLOT_TEX5:
1013          case VARYING_SLOT_TEX6:
1014          case VARYING_SLOT_TEX7:
1015             if (st->needs_texcoord_semantic) {
1016                input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
1017                input_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
1018                interpMode[slot] = stfp->glsl_to_tgsi ?
1019                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
1020                break;
1021             }
1022             /* fall through */
1023          case VARYING_SLOT_VAR0:
1024          default:
1025             /* Semantic indices should be zero-based because drivers may choose
1026              * to assign a fixed slot determined by that index.
1027              * This is useful because ARB_separate_shader_objects uses location
1028              * qualifiers for linkage, and if the semantic index corresponds to
1029              * these locations, linkage passes in the driver become unecessary.
1030              *
1031              * If needs_texcoord_semantic is true, no semantic indices will be
1032              * consumed for the TEXi varyings, and we can base the locations of
1033              * the user varyings on VAR0.  Otherwise, we use TEX0 as base index.
1034              */
1035             assert(attr >= VARYING_SLOT_VAR0 || attr == VARYING_SLOT_PNTC ||
1036                    (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
1037             input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
1038             input_semantic_index[slot] = st_get_generic_varying_index(st, attr);
1039             if (attr == VARYING_SLOT_PNTC)
1040                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
1041             else {
1042                interpMode[slot] = stfp->glsl_to_tgsi ?
1043                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
1044             }
1045             break;
1046          }
1047       }
1048       else {
1049          inputMapping[attr] = -1;
1050       }
1051    }
1052
1053    /*
1054     * Semantics and mapping for outputs
1055     */
1056    GLbitfield64 outputsWritten = stfp->Base.info.outputs_written;
1057
1058    /* if z is written, emit that first */
1059    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
1060       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
1061       fs_output_semantic_index[fs_num_outputs] = 0;
1062       outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
1063       fs_num_outputs++;
1064       outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
1065    }
1066
1067    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
1068       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
1069       fs_output_semantic_index[fs_num_outputs] = 0;
1070       outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
1071       fs_num_outputs++;
1072       outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
1073    }
1074
1075    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
1076       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_SAMPLEMASK;
1077       fs_output_semantic_index[fs_num_outputs] = 0;
1078       outputMapping[FRAG_RESULT_SAMPLE_MASK] = fs_num_outputs;
1079       fs_num_outputs++;
1080       outputsWritten &= ~(1 << FRAG_RESULT_SAMPLE_MASK);
1081    }
1082
1083    /* handle remaining outputs (color) */
1084    for (attr = 0; attr < ARRAY_SIZE(outputMapping); attr++) {
1085       const GLbitfield64 written = attr < FRAG_RESULT_MAX ? outputsWritten :
1086          stfp->Base.SecondaryOutputsWritten;
1087       const unsigned loc = attr % FRAG_RESULT_MAX;
1088
1089       if (written & BITFIELD64_BIT(loc)) {
1090          switch (loc) {
1091          case FRAG_RESULT_DEPTH:
1092          case FRAG_RESULT_STENCIL:
1093          case FRAG_RESULT_SAMPLE_MASK:
1094             /* handled above */
1095             assert(0);
1096             break;
1097          case FRAG_RESULT_COLOR:
1098             write_all = GL_TRUE; /* fallthrough */
1099          default: {
1100             int index;
1101             assert(loc == FRAG_RESULT_COLOR ||
1102                    (FRAG_RESULT_DATA0 <= loc && loc < FRAG_RESULT_MAX));
1103
1104             index = (loc == FRAG_RESULT_COLOR) ? 0 : (loc - FRAG_RESULT_DATA0);
1105
1106             if (attr >= FRAG_RESULT_MAX) {
1107                /* Secondary color for dual source blending. */
1108                assert(index == 0);
1109                index++;
1110             }
1111
1112             fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
1113             fs_output_semantic_index[fs_num_outputs] = index;
1114             outputMapping[attr] = fs_num_outputs;
1115             break;
1116          }
1117          }
1118
1119          fs_num_outputs++;
1120       }
1121    }
1122
1123    ureg = ureg_create_with_screen(PIPE_SHADER_FRAGMENT, st->pipe->screen);
1124    if (ureg == NULL)
1125       return false;
1126
1127    if (ST_DEBUG & DEBUG_MESA) {
1128       _mesa_print_program(&stfp->Base);
1129       _mesa_print_program_parameters(st->ctx, &stfp->Base);
1130       debug_printf("\n");
1131    }
1132    if (write_all == GL_TRUE)
1133       ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1);
1134
1135    if (stfp->Base.info.fs.depth_layout != FRAG_DEPTH_LAYOUT_NONE) {
1136       switch (stfp->Base.info.fs.depth_layout) {
1137       case FRAG_DEPTH_LAYOUT_ANY:
1138          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1139                        TGSI_FS_DEPTH_LAYOUT_ANY);
1140          break;
1141       case FRAG_DEPTH_LAYOUT_GREATER:
1142          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1143                        TGSI_FS_DEPTH_LAYOUT_GREATER);
1144          break;
1145       case FRAG_DEPTH_LAYOUT_LESS:
1146          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1147                        TGSI_FS_DEPTH_LAYOUT_LESS);
1148          break;
1149       case FRAG_DEPTH_LAYOUT_UNCHANGED:
1150          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1151                        TGSI_FS_DEPTH_LAYOUT_UNCHANGED);
1152          break;
1153       default:
1154          assert(0);
1155       }
1156    }
1157
1158    if (stfp->glsl_to_tgsi) {
1159       st_translate_program(st->ctx,
1160                            PIPE_SHADER_FRAGMENT,
1161                            ureg,
1162                            stfp->glsl_to_tgsi,
1163                            &stfp->Base,
1164                            /* inputs */
1165                            fs_num_inputs,
1166                            inputMapping,
1167                            inputSlotToAttr,
1168                            input_semantic_name,
1169                            input_semantic_index,
1170                            interpMode,
1171                            /* outputs */
1172                            fs_num_outputs,
1173                            outputMapping,
1174                            fs_output_semantic_name,
1175                            fs_output_semantic_index);
1176
1177       free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
1178    } else if (stfp->ati_fs)
1179       st_translate_atifs_program(ureg,
1180                                  stfp->ati_fs,
1181                                  &stfp->Base,
1182                                  /* inputs */
1183                                  fs_num_inputs,
1184                                  inputMapping,
1185                                  input_semantic_name,
1186                                  input_semantic_index,
1187                                  interpMode,
1188                                  /* outputs */
1189                                  fs_num_outputs,
1190                                  outputMapping,
1191                                  fs_output_semantic_name,
1192                                  fs_output_semantic_index);
1193    else
1194       st_translate_mesa_program(st->ctx,
1195                                 PIPE_SHADER_FRAGMENT,
1196                                 ureg,
1197                                 &stfp->Base,
1198                                 /* inputs */
1199                                 fs_num_inputs,
1200                                 inputMapping,
1201                                 input_semantic_name,
1202                                 input_semantic_index,
1203                                 interpMode,
1204                                 /* outputs */
1205                                 fs_num_outputs,
1206                                 outputMapping,
1207                                 fs_output_semantic_name,
1208                                 fs_output_semantic_index);
1209
1210    stfp->state.tokens = ureg_get_tokens(ureg, NULL);
1211    ureg_destroy(ureg);
1212
1213    if (stfp->glsl_to_tgsi) {
1214       stfp->glsl_to_tgsi = NULL;
1215       st_store_ir_in_disk_cache(st, &stfp->Base, false);
1216    }
1217
1218    return stfp->state.tokens != NULL;
1219 }
1220
1221 static struct st_fp_variant *
1222 st_create_fp_variant(struct st_context *st,
1223                      struct st_common_program *stfp,
1224                      const struct st_fp_variant_key *key)
1225 {
1226    struct pipe_context *pipe = st->pipe;
1227    struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
1228    struct pipe_shader_state state = {0};
1229    struct gl_program_parameter_list *params = stfp->Base.Parameters;
1230    static const gl_state_index16 texcoord_state[STATE_LENGTH] =
1231       { STATE_INTERNAL, STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
1232    static const gl_state_index16 scale_state[STATE_LENGTH] =
1233       { STATE_INTERNAL, STATE_PT_SCALE };
1234    static const gl_state_index16 bias_state[STATE_LENGTH] =
1235       { STATE_INTERNAL, STATE_PT_BIAS };
1236    static const gl_state_index16 alpha_ref_state[STATE_LENGTH] =
1237       { STATE_INTERNAL, STATE_ALPHA_REF };
1238
1239    if (!variant)
1240       return NULL;
1241
1242    if (stfp->state.type == PIPE_SHADER_IR_NIR) {
1243       bool finalize = false;
1244
1245       state.type = PIPE_SHADER_IR_NIR;
1246       state.ir.nir = nir_shader_clone(NULL, stfp->state.ir.nir);
1247
1248       if (key->clamp_color) {
1249          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1250          finalize = true;
1251       }
1252
1253       if (key->lower_flatshade) {
1254          NIR_PASS_V(state.ir.nir, nir_lower_flatshade);
1255          finalize = true;
1256       }
1257
1258       if (key->lower_alpha_func != COMPARE_FUNC_NEVER) {
1259          _mesa_add_state_reference(params, alpha_ref_state);
1260          NIR_PASS_V(state.ir.nir, nir_lower_alpha_test, key->lower_alpha_func,
1261                     false, alpha_ref_state);
1262          finalize = true;
1263       }
1264
1265       if (key->lower_two_sided_color) {
1266          NIR_PASS_V(state.ir.nir, nir_lower_two_sided_color);
1267          finalize = true;
1268       }
1269
1270       if (key->persample_shading) {
1271           nir_shader *shader = state.ir.nir;
1272           nir_foreach_variable(var, &shader->inputs)
1273              var->data.sample = true;
1274           finalize = true;
1275       }
1276
1277       assert(!(key->bitmap && key->drawpixels));
1278
1279       /* glBitmap */
1280       if (key->bitmap) {
1281          nir_lower_bitmap_options options = {0};
1282
1283          variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1284          options.sampler = variant->bitmap_sampler;
1285          options.swizzle_xxxx = st->bitmap.tex_format == PIPE_FORMAT_R8_UNORM;
1286
1287          NIR_PASS_V(state.ir.nir, nir_lower_bitmap, &options);
1288          finalize = true;
1289       }
1290
1291       /* glDrawPixels (color only) */
1292       if (key->drawpixels) {
1293          nir_lower_drawpixels_options options = {{0}};
1294          unsigned samplers_used = stfp->Base.SamplersUsed;
1295
1296          /* Find the first unused slot. */
1297          variant->drawpix_sampler = ffs(~samplers_used) - 1;
1298          options.drawpix_sampler = variant->drawpix_sampler;
1299          samplers_used |= (1 << variant->drawpix_sampler);
1300
1301          options.pixel_maps = key->pixelMaps;
1302          if (key->pixelMaps) {
1303             variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1304             options.pixelmap_sampler = variant->pixelmap_sampler;
1305          }
1306
1307          options.scale_and_bias = key->scaleAndBias;
1308          if (key->scaleAndBias) {
1309             _mesa_add_state_reference(params, scale_state);
1310             memcpy(options.scale_state_tokens, scale_state,
1311                    sizeof(options.scale_state_tokens));
1312             _mesa_add_state_reference(params, bias_state);
1313             memcpy(options.bias_state_tokens, bias_state,
1314                    sizeof(options.bias_state_tokens));
1315          }
1316
1317          _mesa_add_state_reference(params, texcoord_state);
1318          memcpy(options.texcoord_state_tokens, texcoord_state,
1319                 sizeof(options.texcoord_state_tokens));
1320
1321          NIR_PASS_V(state.ir.nir, nir_lower_drawpixels, &options);
1322          finalize = true;
1323       }
1324
1325       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1326                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1327                    key->external.lower_ayuv || key->external.lower_xyuv)) {
1328          nir_lower_tex_options options = {0};
1329          options.lower_y_uv_external = key->external.lower_nv12;
1330          options.lower_y_u_v_external = key->external.lower_iyuv;
1331          options.lower_xy_uxvx_external = key->external.lower_xy_uxvx;
1332          options.lower_yx_xuxv_external = key->external.lower_yx_xuxv;
1333          options.lower_ayuv_external = key->external.lower_ayuv;
1334          options.lower_xyuv_external = key->external.lower_xyuv;
1335          NIR_PASS_V(state.ir.nir, nir_lower_tex, &options);
1336          finalize = true;
1337       }
1338
1339       if (finalize || !st->allow_st_finalize_nir_twice) {
1340          st_finalize_nir(st, &stfp->Base, stfp->shader_program, state.ir.nir,
1341                          false);
1342       }
1343
1344       /* This pass needs to happen *after* nir_lower_sampler */
1345       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1346                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1347          NIR_PASS_V(state.ir.nir, st_nir_lower_tex_src_plane,
1348                     ~stfp->Base.SamplersUsed,
1349                     key->external.lower_nv12 || key->external.lower_xy_uxvx ||
1350                        key->external.lower_yx_xuxv,
1351                     key->external.lower_iyuv);
1352          finalize = true;
1353       }
1354
1355       if (finalize || !st->allow_st_finalize_nir_twice) {
1356          /* Some of the lowering above may have introduced new varyings */
1357          nir_shader_gather_info(state.ir.nir,
1358                                 nir_shader_get_entrypoint(state.ir.nir));
1359
1360          struct pipe_screen *screen = pipe->screen;
1361          if (screen->finalize_nir)
1362             screen->finalize_nir(screen, state.ir.nir, false);
1363       }
1364
1365       if (ST_DEBUG & DEBUG_PRINT_IR)
1366          nir_print_shader(state.ir.nir, stderr);
1367
1368       variant->driver_shader = pipe->create_fs_state(pipe, &state);
1369       variant->key = *key;
1370
1371       return variant;
1372    }
1373
1374    state.tokens = stfp->state.tokens;
1375
1376    assert(!(key->bitmap && key->drawpixels));
1377
1378    /* Fix texture targets and add fog for ATI_fs */
1379    if (stfp->ati_fs) {
1380       const struct tgsi_token *tokens = st_fixup_atifs(state.tokens, key);
1381
1382       if (tokens)
1383          state.tokens = tokens;
1384       else
1385          fprintf(stderr, "mesa: cannot post-process ATI_fs\n");
1386    }
1387
1388    /* Emulate features. */
1389    if (key->clamp_color || key->persample_shading) {
1390       const struct tgsi_token *tokens;
1391       unsigned flags =
1392          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
1393          (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
1394
1395       tokens = tgsi_emulate(state.tokens, flags);
1396
1397       if (tokens) {
1398          if (state.tokens != stfp->state.tokens)
1399             tgsi_free_tokens(state.tokens);
1400          state.tokens = tokens;
1401       } else
1402          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
1403    }
1404
1405    /* glBitmap */
1406    if (key->bitmap) {
1407       const struct tgsi_token *tokens;
1408
1409       variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1410
1411       tokens = st_get_bitmap_shader(state.tokens,
1412                                     st->internal_target,
1413                                     variant->bitmap_sampler,
1414                                     st->needs_texcoord_semantic,
1415                                     st->bitmap.tex_format ==
1416                                     PIPE_FORMAT_R8_UNORM);
1417
1418       if (tokens) {
1419          if (state.tokens != stfp->state.tokens)
1420             tgsi_free_tokens(state.tokens);
1421          state.tokens = tokens;
1422       } else
1423          fprintf(stderr, "mesa: cannot create a shader for glBitmap\n");
1424    }
1425
1426    /* glDrawPixels (color only) */
1427    if (key->drawpixels) {
1428       const struct tgsi_token *tokens;
1429       unsigned scale_const = 0, bias_const = 0, texcoord_const = 0;
1430
1431       /* Find the first unused slot. */
1432       variant->drawpix_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1433
1434       if (key->pixelMaps) {
1435          unsigned samplers_used = stfp->Base.SamplersUsed |
1436                                   (1 << variant->drawpix_sampler);
1437
1438          variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1439       }
1440
1441       if (key->scaleAndBias) {
1442          scale_const = _mesa_add_state_reference(params, scale_state);
1443          bias_const = _mesa_add_state_reference(params, bias_state);
1444       }
1445
1446       texcoord_const = _mesa_add_state_reference(params, texcoord_state);
1447
1448       tokens = st_get_drawpix_shader(state.tokens,
1449                                      st->needs_texcoord_semantic,
1450                                      key->scaleAndBias, scale_const,
1451                                      bias_const, key->pixelMaps,
1452                                      variant->drawpix_sampler,
1453                                      variant->pixelmap_sampler,
1454                                      texcoord_const, st->internal_target);
1455
1456       if (tokens) {
1457          if (state.tokens != stfp->state.tokens)
1458             tgsi_free_tokens(state.tokens);
1459          state.tokens = tokens;
1460       } else
1461          fprintf(stderr, "mesa: cannot create a shader for glDrawPixels\n");
1462    }
1463
1464    if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1465                 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1466       const struct tgsi_token *tokens;
1467
1468       /* samplers inserted would conflict, but this should be unpossible: */
1469       assert(!(key->bitmap || key->drawpixels));
1470
1471       tokens = st_tgsi_lower_yuv(state.tokens,
1472                                  ~stfp->Base.SamplersUsed,
1473                                  key->external.lower_nv12 ||
1474                                     key->external.lower_xy_uxvx ||
1475                                     key->external.lower_yx_xuxv,
1476                                  key->external.lower_iyuv);
1477       if (tokens) {
1478          if (state.tokens != stfp->state.tokens)
1479             tgsi_free_tokens(state.tokens);
1480          state.tokens = tokens;
1481       } else {
1482          fprintf(stderr, "mesa: cannot create a shader for samplerExternalOES\n");
1483       }
1484    }
1485
1486    if (key->lower_depth_clamp) {
1487       unsigned depth_range_const = _mesa_add_state_reference(params, depth_range_state);
1488
1489       const struct tgsi_token *tokens;
1490       tokens = st_tgsi_lower_depth_clamp_fs(state.tokens, depth_range_const);
1491       if (state.tokens != stfp->state.tokens)
1492          tgsi_free_tokens(state.tokens);
1493       state.tokens = tokens;
1494    }
1495
1496    if (ST_DEBUG & DEBUG_PRINT_IR)
1497       tgsi_dump(state.tokens, 0);
1498
1499    /* fill in variant */
1500    variant->driver_shader = pipe->create_fs_state(pipe, &state);
1501    variant->key = *key;
1502
1503    if (state.tokens != stfp->state.tokens)
1504       tgsi_free_tokens(state.tokens);
1505    return variant;
1506 }
1507
1508 /**
1509  * Translate fragment program if needed.
1510  */
1511 struct st_fp_variant *
1512 st_get_fp_variant(struct st_context *st,
1513                   struct st_common_program *stfp,
1514                   const struct st_fp_variant_key *key)
1515 {
1516    struct st_fp_variant *fpv;
1517
1518    /* Search for existing variant */
1519    for (fpv = stfp->fp_variants; fpv; fpv = fpv->next) {
1520       if (memcmp(&fpv->key, key, sizeof(*key)) == 0) {
1521          break;
1522       }
1523    }
1524
1525    if (!fpv) {
1526       /* create new */
1527       fpv = st_create_fp_variant(st, stfp, key);
1528       if (fpv) {
1529          if (key->bitmap || key->drawpixels) {
1530             /* Regular variants should always come before the
1531              * bitmap & drawpixels variants, (unless there
1532              * are no regular variants) so that
1533              * st_update_fp can take a fast path when
1534              * shader_has_one_variant is set.
1535              */
1536             if (!stfp->fp_variants) {
1537                stfp->fp_variants = fpv;
1538             } else {
1539                /* insert into list after the first one */
1540                fpv->next = stfp->fp_variants->next;
1541                stfp->fp_variants->next = fpv;
1542             }
1543          } else {
1544             /* insert into list */
1545             fpv->next = stfp->fp_variants;
1546             stfp->fp_variants = fpv;
1547          }
1548       }
1549    }
1550
1551    return fpv;
1552 }
1553
1554 /**
1555  * Translate a program. This is common code for geometry and tessellation
1556  * shaders.
1557  */
1558 bool
1559 st_translate_common_program(struct st_context *st,
1560                             struct st_common_program *stcp)
1561 {
1562    struct gl_program *prog = &stcp->Base;
1563    enum pipe_shader_type stage =
1564       pipe_shader_type_from_mesa(stcp->Base.info.stage);
1565    struct ureg_program *ureg = ureg_create_with_screen(stage, st->pipe->screen);
1566
1567    if (ureg == NULL)
1568       return false;
1569
1570    switch (stage) {
1571    case PIPE_SHADER_TESS_CTRL:
1572       ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT,
1573                     stcp->Base.info.tess.tcs_vertices_out);
1574       break;
1575
1576    case PIPE_SHADER_TESS_EVAL:
1577       if (stcp->Base.info.tess.primitive_mode == GL_ISOLINES)
1578          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, GL_LINES);
1579       else
1580          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE,
1581                        stcp->Base.info.tess.primitive_mode);
1582
1583       STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL);
1584       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 ==
1585                     PIPE_TESS_SPACING_FRACTIONAL_ODD);
1586       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 ==
1587                     PIPE_TESS_SPACING_FRACTIONAL_EVEN);
1588
1589       ureg_property(ureg, TGSI_PROPERTY_TES_SPACING,
1590                     (stcp->Base.info.tess.spacing + 1) % 3);
1591
1592       ureg_property(ureg, TGSI_PROPERTY_TES_VERTEX_ORDER_CW,
1593                     !stcp->Base.info.tess.ccw);
1594       ureg_property(ureg, TGSI_PROPERTY_TES_POINT_MODE,
1595                     stcp->Base.info.tess.point_mode);
1596       break;
1597
1598    case PIPE_SHADER_GEOMETRY:
1599       ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM,
1600                     stcp->Base.info.gs.input_primitive);
1601       ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM,
1602                     stcp->Base.info.gs.output_primitive);
1603       ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
1604                     stcp->Base.info.gs.vertices_out);
1605       ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS,
1606                     stcp->Base.info.gs.invocations);
1607       break;
1608
1609    default:
1610       break;
1611    }
1612
1613    ubyte inputSlotToAttr[VARYING_SLOT_TESS_MAX];
1614    ubyte inputMapping[VARYING_SLOT_TESS_MAX];
1615    ubyte outputMapping[VARYING_SLOT_TESS_MAX];
1616    GLuint attr;
1617
1618    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
1619    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
1620    uint num_inputs = 0;
1621
1622    ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
1623    ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
1624    uint num_outputs = 0;
1625
1626    GLint i;
1627
1628    memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr));
1629    memset(inputMapping, 0, sizeof(inputMapping));
1630    memset(outputMapping, 0, sizeof(outputMapping));
1631    memset(&stcp->state, 0, sizeof(stcp->state));
1632
1633    if (prog->info.clip_distance_array_size)
1634       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
1635                     prog->info.clip_distance_array_size);
1636    if (prog->info.cull_distance_array_size)
1637       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
1638                     prog->info.cull_distance_array_size);
1639
1640    /*
1641     * Convert Mesa program inputs to TGSI input register semantics.
1642     */
1643    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1644       if ((prog->info.inputs_read & BITFIELD64_BIT(attr)) == 0)
1645          continue;
1646
1647       unsigned slot = num_inputs++;
1648
1649       inputMapping[attr] = slot;
1650       inputSlotToAttr[slot] = attr;
1651
1652       unsigned semantic_name, semantic_index;
1653       tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1654                                    &semantic_name, &semantic_index);
1655       input_semantic_name[slot] = semantic_name;
1656       input_semantic_index[slot] = semantic_index;
1657    }
1658
1659    /* Also add patch inputs. */
1660    for (attr = 0; attr < 32; attr++) {
1661       if (prog->info.patch_inputs_read & (1u << attr)) {
1662          GLuint slot = num_inputs++;
1663          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1664
1665          inputMapping[patch_attr] = slot;
1666          inputSlotToAttr[slot] = patch_attr;
1667          input_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1668          input_semantic_index[slot] = attr;
1669       }
1670    }
1671
1672    /* initialize output semantics to defaults */
1673    for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
1674       output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
1675       output_semantic_index[i] = 0;
1676    }
1677
1678    /*
1679     * Determine number of outputs, the (default) output register
1680     * mapping and the semantic information for each output.
1681     */
1682    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1683       if (prog->info.outputs_written & BITFIELD64_BIT(attr)) {
1684          GLuint slot = num_outputs++;
1685
1686          outputMapping[attr] = slot;
1687
1688          unsigned semantic_name, semantic_index;
1689          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1690                                       &semantic_name, &semantic_index);
1691          output_semantic_name[slot] = semantic_name;
1692          output_semantic_index[slot] = semantic_index;
1693       }
1694    }
1695
1696    /* Also add patch outputs. */
1697    for (attr = 0; attr < 32; attr++) {
1698       if (prog->info.patch_outputs_written & (1u << attr)) {
1699          GLuint slot = num_outputs++;
1700          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1701
1702          outputMapping[patch_attr] = slot;
1703          output_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1704          output_semantic_index[slot] = attr;
1705       }
1706    }
1707
1708    st_translate_program(st->ctx,
1709                         stage,
1710                         ureg,
1711                         stcp->glsl_to_tgsi,
1712                         prog,
1713                         /* inputs */
1714                         num_inputs,
1715                         inputMapping,
1716                         inputSlotToAttr,
1717                         input_semantic_name,
1718                         input_semantic_index,
1719                         NULL,
1720                         /* outputs */
1721                         num_outputs,
1722                         outputMapping,
1723                         output_semantic_name,
1724                         output_semantic_index);
1725
1726    stcp->state.tokens = ureg_get_tokens(ureg, NULL);
1727
1728    ureg_destroy(ureg);
1729
1730    st_translate_stream_output_info(prog);
1731
1732    st_store_ir_in_disk_cache(st, prog, false);
1733
1734    if (ST_DEBUG & DEBUG_PRINT_IR && ST_DEBUG & DEBUG_MESA)
1735       _mesa_print_program(prog);
1736
1737    free_glsl_to_tgsi_visitor(stcp->glsl_to_tgsi);
1738    stcp->glsl_to_tgsi = NULL;
1739    return true;
1740 }
1741
1742
1743 /**
1744  * Get/create a basic program variant.
1745  */
1746 struct st_common_variant *
1747 st_get_common_variant(struct st_context *st,
1748                       struct st_common_program *prog,
1749                       const struct st_common_variant_key *key)
1750 {
1751    struct pipe_context *pipe = st->pipe;
1752    struct st_common_variant *v;
1753    struct pipe_shader_state state = {0};
1754
1755    /* Search for existing variant */
1756    for (v = prog->variants; v; v = v->next) {
1757       if (memcmp(&v->key, key, sizeof(*key)) == 0) {
1758          break;
1759       }
1760    }
1761
1762    if (!v) {
1763       /* create new */
1764       v = CALLOC_STRUCT(st_common_variant);
1765       if (v) {
1766          if (prog->state.type == PIPE_SHADER_IR_NIR) {
1767             bool finalize = false;
1768
1769             state.type = PIPE_SHADER_IR_NIR;
1770             state.ir.nir = nir_shader_clone(NULL, prog->state.ir.nir);
1771
1772             if (key->clamp_color) {
1773                NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1774                finalize = true;
1775             }
1776
1777             state.stream_output = prog->state.stream_output;
1778
1779             if (finalize || !st->allow_st_finalize_nir_twice) {
1780                st_finalize_nir(st, &prog->Base, prog->shader_program,
1781                                state.ir.nir, true);
1782             }
1783
1784             if (ST_DEBUG & DEBUG_PRINT_IR)
1785                nir_print_shader(state.ir.nir, stderr);
1786          } else {
1787             if (key->lower_depth_clamp) {
1788                struct gl_program_parameter_list *params = prog->Base.Parameters;
1789
1790                unsigned depth_range_const =
1791                      _mesa_add_state_reference(params, depth_range_state);
1792
1793                const struct tgsi_token *tokens;
1794                tokens =
1795                      st_tgsi_lower_depth_clamp(prog->state.tokens,
1796                                                depth_range_const,
1797                                                key->clip_negative_one_to_one);
1798
1799                if (tokens != prog->state.tokens)
1800                   tgsi_free_tokens(prog->state.tokens);
1801
1802                prog->state.tokens = tokens;
1803             }
1804             state = prog->state;
1805
1806             if (ST_DEBUG & DEBUG_PRINT_IR)
1807                tgsi_dump(state.tokens, 0);
1808          }
1809          /* fill in new variant */
1810          switch (prog->Base.info.stage) {
1811          case MESA_SHADER_TESS_CTRL:
1812             v->driver_shader = pipe->create_tcs_state(pipe, &state);
1813             break;
1814          case MESA_SHADER_TESS_EVAL:
1815             v->driver_shader = pipe->create_tes_state(pipe, &state);
1816             break;
1817          case MESA_SHADER_GEOMETRY:
1818             v->driver_shader = pipe->create_gs_state(pipe, &state);
1819             break;
1820          case MESA_SHADER_COMPUTE: {
1821             struct pipe_compute_state cs = {0};
1822             cs.ir_type = state.type;
1823             cs.req_local_mem = prog->Base.info.cs.shared_size;
1824
1825             if (state.type == PIPE_SHADER_IR_NIR)
1826                cs.prog = state.ir.nir;
1827             else
1828                cs.prog = state.tokens;
1829
1830             v->driver_shader = pipe->create_compute_state(pipe, &cs);
1831             break;
1832          }
1833          default:
1834             assert(!"unhandled shader type");
1835             free(v);
1836             return NULL;
1837          }
1838
1839          v->key = *key;
1840
1841          /* insert into list */
1842          v->next = prog->variants;
1843          prog->variants = v;
1844       }
1845    }
1846
1847    return v;
1848 }
1849
1850
1851 /**
1852  * Vert/Geom/Frag programs have per-context variants.  Free all the
1853  * variants attached to the given program which match the given context.
1854  */
1855 static void
1856 destroy_program_variants(struct st_context *st, struct gl_program *target)
1857 {
1858    if (!target || target == &_mesa_DummyProgram)
1859       return;
1860
1861    switch (target->Target) {
1862    case GL_VERTEX_PROGRAM_ARB:
1863       {
1864          struct st_vertex_program *stvp = (struct st_vertex_program *) target;
1865          struct st_vp_variant *vpv, **prevPtr = &stvp->variants;
1866
1867          for (vpv = stvp->variants; vpv; ) {
1868             struct st_vp_variant *next = vpv->next;
1869             if (vpv->key.st == st) {
1870                /* unlink from list */
1871                *prevPtr = next;
1872                /* destroy this variant */
1873                delete_vp_variant(st, vpv);
1874             }
1875             else {
1876                prevPtr = &vpv->next;
1877             }
1878             vpv = next;
1879          }
1880       }
1881       break;
1882    case GL_FRAGMENT_PROGRAM_ARB:
1883       {
1884          struct st_common_program *stfp =
1885             (struct st_common_program *) target;
1886          struct st_fp_variant *fpv, **prevPtr = &stfp->fp_variants;
1887
1888          for (fpv = stfp->fp_variants; fpv; ) {
1889             struct st_fp_variant *next = fpv->next;
1890             if (fpv->key.st == st) {
1891                /* unlink from list */
1892                *prevPtr = next;
1893                /* destroy this variant */
1894                delete_fp_variant(st, fpv);
1895             }
1896             else {
1897                prevPtr = &fpv->next;
1898             }
1899             fpv = next;
1900          }
1901       }
1902       break;
1903    case GL_GEOMETRY_PROGRAM_NV:
1904    case GL_TESS_CONTROL_PROGRAM_NV:
1905    case GL_TESS_EVALUATION_PROGRAM_NV:
1906    case GL_COMPUTE_PROGRAM_NV:
1907       {
1908          struct st_common_program *p = st_common_program(target);
1909          struct st_common_variant *v, **prevPtr = &p->variants;
1910
1911          for (v = p->variants; v; ) {
1912             struct st_common_variant *next = v->next;
1913             if (v->key.st == st) {
1914                /* unlink from list */
1915                *prevPtr = next;
1916                /* destroy this variant */
1917                delete_common_variant(st, v, target->Target);
1918             }
1919             else {
1920                prevPtr = &v->next;
1921             }
1922             v = next;
1923          }
1924       }
1925       break;
1926    default:
1927       _mesa_problem(NULL, "Unexpected program target 0x%x in "
1928                     "destroy_program_variants_cb()", target->Target);
1929    }
1930 }
1931
1932
1933 /**
1934  * Callback for _mesa_HashWalk.  Free all the shader's program variants
1935  * which match the given context.
1936  */
1937 static void
1938 destroy_shader_program_variants_cb(GLuint key, void *data, void *userData)
1939 {
1940    struct st_context *st = (struct st_context *) userData;
1941    struct gl_shader *shader = (struct gl_shader *) data;
1942
1943    switch (shader->Type) {
1944    case GL_SHADER_PROGRAM_MESA:
1945       {
1946          struct gl_shader_program *shProg = (struct gl_shader_program *) data;
1947          GLuint i;
1948
1949          for (i = 0; i < ARRAY_SIZE(shProg->_LinkedShaders); i++) {
1950             if (shProg->_LinkedShaders[i])
1951                destroy_program_variants(st, shProg->_LinkedShaders[i]->Program);
1952          }
1953       }
1954       break;
1955    case GL_VERTEX_SHADER:
1956    case GL_FRAGMENT_SHADER:
1957    case GL_GEOMETRY_SHADER:
1958    case GL_TESS_CONTROL_SHADER:
1959    case GL_TESS_EVALUATION_SHADER:
1960    case GL_COMPUTE_SHADER:
1961       break;
1962    default:
1963       assert(0);
1964    }
1965 }
1966
1967
1968 /**
1969  * Callback for _mesa_HashWalk.  Free all the program variants which match
1970  * the given context.
1971  */
1972 static void
1973 destroy_program_variants_cb(GLuint key, void *data, void *userData)
1974 {
1975    struct st_context *st = (struct st_context *) userData;
1976    struct gl_program *program = (struct gl_program *) data;
1977    destroy_program_variants(st, program);
1978 }
1979
1980
1981 /**
1982  * Walk over all shaders and programs to delete any variants which
1983  * belong to the given context.
1984  * This is called during context tear-down.
1985  */
1986 void
1987 st_destroy_program_variants(struct st_context *st)
1988 {
1989    /* If shaders can be shared with other contexts, the last context will
1990     * call DeleteProgram on all shaders, releasing everything.
1991     */
1992    if (st->has_shareable_shaders)
1993       return;
1994
1995    /* ARB vert/frag program */
1996    _mesa_HashWalk(st->ctx->Shared->Programs,
1997                   destroy_program_variants_cb, st);
1998
1999    /* GLSL vert/frag/geom shaders */
2000    _mesa_HashWalk(st->ctx->Shared->ShaderObjects,
2001                   destroy_shader_program_variants_cb, st);
2002 }
2003
2004
2005 /**
2006  * Compile one shader variant.
2007  */
2008 void
2009 st_precompile_shader_variant(struct st_context *st,
2010                              struct gl_program *prog)
2011 {
2012    switch (prog->Target) {
2013    case GL_VERTEX_PROGRAM_ARB: {
2014       struct st_vertex_program *p = (struct st_vertex_program *)prog;
2015       struct st_common_variant_key key;
2016
2017       memset(&key, 0, sizeof(key));
2018
2019       key.st = st->has_shareable_shaders ? NULL : st;
2020       st_get_vp_variant(st, p, &key);
2021       break;
2022    }
2023
2024    case GL_FRAGMENT_PROGRAM_ARB: {
2025       struct st_common_program *p = (struct st_common_program *)prog;
2026       struct st_fp_variant_key key;
2027
2028       memset(&key, 0, sizeof(key));
2029
2030       key.st = st->has_shareable_shaders ? NULL : st;
2031       st_get_fp_variant(st, p, &key);
2032       break;
2033    }
2034
2035    case GL_TESS_CONTROL_PROGRAM_NV:
2036    case GL_TESS_EVALUATION_PROGRAM_NV:
2037    case GL_GEOMETRY_PROGRAM_NV:
2038    case GL_COMPUTE_PROGRAM_NV: {
2039       struct st_common_program *p = st_common_program(prog);
2040       struct st_common_variant_key key;
2041
2042       memset(&key, 0, sizeof(key));
2043
2044       key.st = st->has_shareable_shaders ? NULL : st;
2045       st_get_common_variant(st, p, &key);
2046       break;
2047    }
2048
2049    default:
2050       assert(0);
2051    }
2052 }