src/mesa/state_tracker/st_program.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   *   Brian Paul
  31   */
  32
  33
  34 #include "main/errors.h"
  35 #include "main/imports.h"
  36 #include "main/hash.h"
  37 #include "main/mtypes.h"
  38 #include "program/prog_parameter.h"
  39 #include "program/prog_print.h"
  40 #include "program/prog_to_nir.h"
  41 #include "program/programopt.h"
  42
  43 #include "compiler/nir/nir.h"
  44
  45 #include "pipe/p_context.h"
  46 #include "pipe/p_defines.h"
  47 #include "pipe/p_shader_tokens.h"
  48 #include "draw/draw_context.h"
  49 #include "tgsi/tgsi_dump.h"
  50 #include "tgsi/tgsi_emulate.h"
  51 #include "tgsi/tgsi_parse.h"
  52 #include "tgsi/tgsi_ureg.h"
  53
  54 #include "st_debug.h"
  55 #include "st_cb_bitmap.h"
  56 #include "st_cb_drawpixels.h"
  57 #include "st_context.h"
  58 #include "st_tgsi_lower_depth_clamp.h"
  59 #include "st_tgsi_lower_yuv.h"
  60 #include "st_program.h"
  61 #include "st_mesa_to_tgsi.h"
  62 #include "st_atifs_to_tgsi.h"
  63 #include "st_nir.h"
  64 #include "st_shader_cache.h"
  65 #include "cso_cache/cso_context.h"
  66
  67
  68
  69 static void
  70 set_affected_state_flags(uint64_t *states,
  71                          struct gl_program *prog,
  72                          uint64_t new_constants,
  73                          uint64_t new_sampler_views,
  74                          uint64_t new_samplers,
  75                          uint64_t new_images,
  76                          uint64_t new_ubos,
  77                          uint64_t new_ssbos,
  78                          uint64_t new_atomics)
  79 {
  80    if (prog->Parameters->NumParameters)
  81       *states |= new_constants;
  82
  83    if (prog->info.num_textures)
  84       *states |= new_sampler_views | new_samplers;
  85
  86    if (prog->info.num_images)
  87       *states |= new_images;
  88
  89    if (prog->info.num_ubos)
  90       *states |= new_ubos;
  91
  92    if (prog->info.num_ssbos)
  93       *states |= new_ssbos;
  94
  95    if (prog->info.num_abos)
  96       *states |= new_atomics;
  97 }
  98
  99 /**
 100  * This determines which states will be updated when the shader is bound.
 101  */
 102 void
 103 st_set_prog_affected_state_flags(struct gl_program *prog)
 104 {
 105    uint64_t *states;
 106
 107    switch (prog->info.stage) {
 108    case MESA_SHADER_VERTEX:
 109       states = &((struct st_vertex_program*)prog)->affected_states;
 110
 111       *states = ST_NEW_VS_STATE |
 112                 ST_NEW_RASTERIZER |
 113                 ST_NEW_VERTEX_ARRAYS;
 114
 115       set_affected_state_flags(states, prog,
 116                                ST_NEW_VS_CONSTANTS,
 117                                ST_NEW_VS_SAMPLER_VIEWS,
 118                                ST_NEW_VS_SAMPLERS,
 119                                ST_NEW_VS_IMAGES,
 120                                ST_NEW_VS_UBOS,
 121                                ST_NEW_VS_SSBOS,
 122                                ST_NEW_VS_ATOMICS);
 123       break;
 124
 125    case MESA_SHADER_TESS_CTRL:
 126       states = &(st_common_program(prog))->affected_states;
 127
 128       *states = ST_NEW_TCS_STATE;
 129
 130       set_affected_state_flags(states, prog,
 131                                ST_NEW_TCS_CONSTANTS,
 132                                ST_NEW_TCS_SAMPLER_VIEWS,
 133                                ST_NEW_TCS_SAMPLERS,
 134                                ST_NEW_TCS_IMAGES,
 135                                ST_NEW_TCS_UBOS,
 136                                ST_NEW_TCS_SSBOS,
 137                                ST_NEW_TCS_ATOMICS);
 138       break;
 139
 140    case MESA_SHADER_TESS_EVAL:
 141       states = &(st_common_program(prog))->affected_states;
 142
 143       *states = ST_NEW_TES_STATE |
 144                 ST_NEW_RASTERIZER;
 145
 146       set_affected_state_flags(states, prog,
 147                                ST_NEW_TES_CONSTANTS,
 148                                ST_NEW_TES_SAMPLER_VIEWS,
 149                                ST_NEW_TES_SAMPLERS,
 150                                ST_NEW_TES_IMAGES,
 151                                ST_NEW_TES_UBOS,
 152                                ST_NEW_TES_SSBOS,
 153                                ST_NEW_TES_ATOMICS);
 154       break;
 155
 156    case MESA_SHADER_GEOMETRY:
 157       states = &(st_common_program(prog))->affected_states;
 158
 159       *states = ST_NEW_GS_STATE |
 160                 ST_NEW_RASTERIZER;
 161
 162       set_affected_state_flags(states, prog,
 163                                ST_NEW_GS_CONSTANTS,
 164                                ST_NEW_GS_SAMPLER_VIEWS,
 165                                ST_NEW_GS_SAMPLERS,
 166                                ST_NEW_GS_IMAGES,
 167                                ST_NEW_GS_UBOS,
 168                                ST_NEW_GS_SSBOS,
 169                                ST_NEW_GS_ATOMICS);
 170       break;
 171
 172    case MESA_SHADER_FRAGMENT:
 173       states = &((struct st_common_program*)prog)->affected_states;
 174
 175       /* gl_FragCoord and glDrawPixels always use constants. */
 176       *states = ST_NEW_FS_STATE |
 177                 ST_NEW_SAMPLE_SHADING |
 178                 ST_NEW_FS_CONSTANTS;
 179
 180       set_affected_state_flags(states, prog,
 181                                ST_NEW_FS_CONSTANTS,
 182                                ST_NEW_FS_SAMPLER_VIEWS,
 183                                ST_NEW_FS_SAMPLERS,
 184                                ST_NEW_FS_IMAGES,
 185                                ST_NEW_FS_UBOS,
 186                                ST_NEW_FS_SSBOS,
 187                                ST_NEW_FS_ATOMICS);
 188       break;
 189
 190    case MESA_SHADER_COMPUTE:
 191       states = &((struct st_common_program*)prog)->affected_states;
 192
 193       *states = ST_NEW_CS_STATE;
 194
 195       set_affected_state_flags(states, prog,
 196                                ST_NEW_CS_CONSTANTS,
 197                                ST_NEW_CS_SAMPLER_VIEWS,
 198                                ST_NEW_CS_SAMPLERS,
 199                                ST_NEW_CS_IMAGES,
 200                                ST_NEW_CS_UBOS,
 201                                ST_NEW_CS_SSBOS,
 202                                ST_NEW_CS_ATOMICS);
 203       break;
 204
 205    default:
 206       unreachable("unhandled shader stage");
 207    }
 208 }
 209
 210 static void
 211 delete_ir(struct pipe_shader_state *ir)
 212 {
 213    if (ir->tokens) {
 214       ureg_free_tokens(ir->tokens);
 215       ir->tokens = NULL;
 216    }
 217
 218    /* Note: Any setup of ->ir.nir that has had pipe->create_*_state called on
 219     * it has resulted in the driver taking ownership of the NIR.  Those
 220     * callers should be NULLing out the nir field in any pipe_shader_state
 221     * that might have this called in order to indicate that.
 222     *
 223     * GLSL IR and ARB programs will have set gl_program->nir to the same
 224     * shader as ir->ir.nir, so it will be freed by _mesa_delete_program().
 225     */
 226 }
 227
 228 /**
 229  * Delete a vertex program variant.  Note the caller must unlink
 230  * the variant from the linked list.
 231  */
 232 static void
 233 delete_vp_variant(struct st_context *st, struct st_vp_variant *vpv)
 234 {
 235    if (vpv->driver_shader) {
 236       if (st->has_shareable_shaders || vpv->key.st == st) {
 237          cso_delete_vertex_shader(st->cso_context, vpv->driver_shader);
 238       } else {
 239          st_save_zombie_shader(vpv->key.st, PIPE_SHADER_VERTEX,
 240                                vpv->driver_shader);
 241       }
 242    }
 243
 244    if (vpv->draw_shader)
 245       draw_delete_vertex_shader( st->draw, vpv->draw_shader );
 246
 247    if (vpv->tokens)
 248       ureg_free_tokens(vpv->tokens);
 249
 250    free( vpv );
 251 }
 252
 253
 254
 255 /**
 256  * Clean out any old compilations:
 257  */
 258 void
 259 st_release_vp_variants( struct st_context *st,
 260                         struct st_vertex_program *stvp )
 261 {
 262    struct st_vp_variant *vpv;
 263
 264    for (vpv = stvp->variants; vpv; ) {
 265       struct st_vp_variant *next = vpv->next;
 266       delete_vp_variant(st, vpv);
 267       vpv = next;
 268    }
 269
 270    stvp->variants = NULL;
 271
 272    delete_ir(&stvp->state);
 273 }
 274
 275
 276
 277 /**
 278  * Delete a fragment program variant.  Note the caller must unlink
 279  * the variant from the linked list.
 280  */
 281 static void
 282 delete_fp_variant(struct st_context *st, struct st_fp_variant *fpv)
 283 {
 284    if (fpv->driver_shader) {
 285       if (st->has_shareable_shaders || fpv->key.st == st) {
 286          cso_delete_fragment_shader(st->cso_context, fpv->driver_shader);
 287       } else {
 288          st_save_zombie_shader(fpv->key.st, PIPE_SHADER_FRAGMENT,
 289                                fpv->driver_shader);
 290       }
 291    }
 292
 293    free(fpv);
 294 }
 295
 296
 297 /**
 298  * Free all variants of a fragment program.
 299  */
 300 void
 301 st_release_fp_variants(struct st_context *st, struct st_common_program *stfp)
 302 {
 303    struct st_fp_variant *fpv;
 304
 305    for (fpv = stfp->fp_variants; fpv; ) {
 306       struct st_fp_variant *next = fpv->next;
 307       delete_fp_variant(st, fpv);
 308       fpv = next;
 309    }
 310
 311    stfp->fp_variants = NULL;
 312
 313    delete_ir(&stfp->state);
 314 }
 315
 316
 317 /**
 318  * Delete a basic program variant.  Note the caller must unlink
 319  * the variant from the linked list.
 320  */
 321 static void
 322 delete_basic_variant(struct st_context *st, struct st_common_variant *v,
 323                      GLenum target)
 324 {
 325    if (v->driver_shader) {
 326       if (st->has_shareable_shaders || v->key.st == st) {
 327          /* The shader's context matches the calling context, or we
 328           * don't care.
 329           */
 330          switch (target) {
 331          case GL_TESS_CONTROL_PROGRAM_NV:
 332             cso_delete_tessctrl_shader(st->cso_context, v->driver_shader);
 333             break;
 334          case GL_TESS_EVALUATION_PROGRAM_NV:
 335             cso_delete_tesseval_shader(st->cso_context, v->driver_shader);
 336             break;
 337          case GL_GEOMETRY_PROGRAM_NV:
 338             cso_delete_geometry_shader(st->cso_context, v->driver_shader);
 339             break;
 340          case GL_COMPUTE_PROGRAM_NV:
 341             cso_delete_compute_shader(st->cso_context, v->driver_shader);
 342             break;
 343          default:
 344             unreachable("bad shader type in delete_basic_variant");
 345          }
 346       } else {
 347          /* We can't delete a shader with a context different from the one
 348           * that created it.  Add it to the creating context's zombie list.
 349           */
 350          enum pipe_shader_type type;
 351          switch (target) {
 352          case GL_TESS_CONTROL_PROGRAM_NV:
 353             type = PIPE_SHADER_TESS_CTRL;
 354             break;
 355          case GL_TESS_EVALUATION_PROGRAM_NV:
 356             type = PIPE_SHADER_TESS_EVAL;
 357             break;
 358          case GL_GEOMETRY_PROGRAM_NV:
 359             type = PIPE_SHADER_GEOMETRY;
 360             break;
 361          default:
 362             unreachable("");
 363          }
 364          st_save_zombie_shader(v->key.st, type, v->driver_shader);
 365       }
 366    }
 367
 368    free(v);
 369 }
 370
 371
 372 /**
 373  * Free all basic program variants.
 374  */
 375 void
 376 st_release_common_variants(struct st_context *st, struct st_common_program *p)
 377 {
 378    struct st_common_variant *v;
 379
 380    for (v = p->variants; v; ) {
 381       struct st_common_variant *next = v->next;
 382       delete_basic_variant(st, v, p->Base.Target);
 383       v = next;
 384    }
 385
 386    p->variants = NULL;
 387    delete_ir(&p->state);
 388 }
 389
 390
 391 /**
 392  * Translate ARB (asm) program to NIR
 393  */
 394 static nir_shader *
 395 st_translate_prog_to_nir(struct st_context *st, struct gl_program *prog,
 396                          gl_shader_stage stage)
 397 {
 398    const struct gl_shader_compiler_options *options =
 399       &st->ctx->Const.ShaderCompilerOptions[stage];
 400
 401    /* Translate to NIR */
 402    nir_shader *nir = prog_to_nir(prog, options->NirOptions);
 403    NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
 404    nir_validate_shader(nir, "after st/ptn lower_regs_to_ssa");
 405
 406    NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, prog, st->pipe->screen);
 407    NIR_PASS_V(nir, nir_lower_system_values);
 408
 409    /* Optimise NIR */
 410    NIR_PASS_V(nir, nir_opt_constant_folding);
 411    st_nir_opts(nir);
 412    nir_validate_shader(nir, "after st/ptn NIR opts");
 413
 414    return nir;
 415 }
 416
 417 void
 418 st_prepare_vertex_program(struct st_vertex_program *stvp)
 419 {
 420    stvp->num_inputs = 0;
 421    memset(stvp->input_to_index, ~0, sizeof(stvp->input_to_index));
 422    memset(stvp->result_to_output, ~0, sizeof(stvp->result_to_output));
 423
 424    /* Determine number of inputs, the mappings between VERT_ATTRIB_x
 425     * and TGSI generic input indexes, plus input attrib semantic info.
 426     */
 427    for (unsigned attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
 428       if ((stvp->Base.info.inputs_read & BITFIELD64_BIT(attr)) != 0) {
 429          stvp->input_to_index[attr] = stvp->num_inputs;
 430          stvp->index_to_input[stvp->num_inputs] = attr;
 431          stvp->num_inputs++;
 432
 433          if ((stvp->Base.DualSlotInputs & BITFIELD64_BIT(attr)) != 0) {
 434             /* add placeholder for second part of a double attribute */
 435             stvp->index_to_input[stvp->num_inputs] = ST_DOUBLE_ATTRIB_PLACEHOLDER;
 436             stvp->num_inputs++;
 437          }
 438       }
 439    }
 440    /* pre-setup potentially unused edgeflag input */
 441    stvp->input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
 442    stvp->index_to_input[stvp->num_inputs] = VERT_ATTRIB_EDGEFLAG;
 443
 444    /* Compute mapping of vertex program outputs to slots. */
 445    unsigned num_outputs = 0;
 446    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 447       if (stvp->Base.info.outputs_written & BITFIELD64_BIT(attr))
 448          stvp->result_to_output[attr] = num_outputs++;
 449    }
 450    /* pre-setup potentially unused edgeflag output */
 451    stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs;
 452 }
 453
 454 void
 455 st_translate_stream_output_info(struct gl_program *prog)
 456 {
 457    struct gl_transform_feedback_info *info = prog->sh.LinkedTransformFeedback;
 458    if (!info)
 459       return;
 460
 461    /* Determine the (default) output register mapping for each output. */
 462    unsigned num_outputs = 0;
 463    ubyte output_mapping[VARYING_SLOT_TESS_MAX];
 464    memset(output_mapping, 0, sizeof(output_mapping));
 465
 466    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 467       if (prog->info.outputs_written & BITFIELD64_BIT(attr))
 468          output_mapping[attr] = num_outputs++;
 469    }
 470
 471    /* Translate stream output info. */
 472    struct pipe_stream_output_info *so_info = NULL;
 473    if (prog->info.stage == MESA_SHADER_VERTEX)
 474       so_info = &((struct st_vertex_program*)prog)->state.stream_output;
 475    else
 476       so_info = &((struct st_common_program*)prog)->state.stream_output;
 477
 478    for (unsigned i = 0; i < info->NumOutputs; i++) {
 479       so_info->output[i].register_index =
 480          output_mapping[info->Outputs[i].OutputRegister];
 481       so_info->output[i].start_component = info->Outputs[i].ComponentOffset;
 482       so_info->output[i].num_components = info->Outputs[i].NumComponents;
 483       so_info->output[i].output_buffer = info->Outputs[i].OutputBuffer;
 484       so_info->output[i].dst_offset = info->Outputs[i].DstOffset;
 485       so_info->output[i].stream = info->Outputs[i].StreamId;
 486    }
 487
 488    for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
 489       so_info->stride[i] = info->Buffers[i].Stride;
 490    }
 491    so_info->num_outputs = info->NumOutputs;
 492 }
 493
 494 /**
 495  * Translate a vertex program.
 496  */
 497 bool
 498 st_translate_vertex_program(struct st_context *st,
 499                             struct st_vertex_program *stvp)
 500 {
 501    struct ureg_program *ureg;
 502    enum pipe_error error;
 503    unsigned num_outputs = 0;
 504    unsigned attr;
 505    ubyte output_semantic_name[VARYING_SLOT_MAX] = {0};
 506    ubyte output_semantic_index[VARYING_SLOT_MAX] = {0};
 507
 508    if (stvp->Base.arb.IsPositionInvariant)
 509       _mesa_insert_mvp_code(st->ctx, &stvp->Base);
 510
 511    st_prepare_vertex_program(stvp);
 512
 513    /* ARB_vp: */
 514    if (!stvp->glsl_to_tgsi) {
 515       _mesa_remove_output_reads(&stvp->Base, PROGRAM_OUTPUT);
 516
 517       /* This determines which states will be updated when the assembly
 518        * shader is bound.
 519        */
 520       stvp->affected_states = ST_NEW_VS_STATE |
 521                               ST_NEW_RASTERIZER |
 522                               ST_NEW_VERTEX_ARRAYS;
 523
 524       if (stvp->Base.Parameters->NumParameters)
 525          stvp->affected_states |= ST_NEW_VS_CONSTANTS;
 526
 527       /* No samplers are allowed in ARB_vp. */
 528    }
 529
 530    /* Get semantic names and indices. */
 531    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 532       if (stvp->Base.info.outputs_written & BITFIELD64_BIT(attr)) {
 533          unsigned slot = num_outputs++;
 534          unsigned semantic_name, semantic_index;
 535          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
 536                                       &semantic_name, &semantic_index);
 537          output_semantic_name[slot] = semantic_name;
 538          output_semantic_index[slot] = semantic_index;
 539       }
 540    }
 541    /* pre-setup potentially unused edgeflag output */
 542    output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
 543    output_semantic_index[num_outputs] = 0;
 544
 545    ureg = ureg_create_with_screen(PIPE_SHADER_VERTEX, st->pipe->screen);
 546    if (ureg == NULL)
 547       return false;
 548
 549    if (stvp->Base.info.clip_distance_array_size)
 550       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
 551                     stvp->Base.info.clip_distance_array_size);
 552    if (stvp->Base.info.cull_distance_array_size)
 553       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
 554                     stvp->Base.info.cull_distance_array_size);
 555
 556    if (ST_DEBUG & DEBUG_MESA) {
 557       _mesa_print_program(&stvp->Base);
 558       _mesa_print_program_parameters(st->ctx, &stvp->Base);
 559       debug_printf("\n");
 560    }
 561
 562    if (stvp->glsl_to_tgsi) {
 563       error = st_translate_program(st->ctx,
 564                                    PIPE_SHADER_VERTEX,
 565                                    ureg,
 566                                    stvp->glsl_to_tgsi,
 567                                    &stvp->Base,
 568                                    /* inputs */
 569                                    stvp->num_inputs,
 570                                    stvp->input_to_index,
 571                                    NULL, /* inputSlotToAttr */
 572                                    NULL, /* input semantic name */
 573                                    NULL, /* input semantic index */
 574                                    NULL, /* interp mode */
 575                                    /* outputs */
 576                                    num_outputs,
 577                                    stvp->result_to_output,
 578                                    output_semantic_name,
 579                                    output_semantic_index);
 580
 581       st_translate_stream_output_info(&stvp->Base);
 582
 583       free_glsl_to_tgsi_visitor(stvp->glsl_to_tgsi);
 584    } else
 585       error = st_translate_mesa_program(st->ctx,
 586                                         PIPE_SHADER_VERTEX,
 587                                         ureg,
 588                                         &stvp->Base,
 589                                         /* inputs */
 590                                         stvp->num_inputs,
 591                                         stvp->input_to_index,
 592                                         NULL, /* input semantic name */
 593                                         NULL, /* input semantic index */
 594                                         NULL,
 595                                         /* outputs */
 596                                         num_outputs,
 597                                         stvp->result_to_output,
 598                                         output_semantic_name,
 599                                         output_semantic_index);
 600
 601    if (error) {
 602       debug_printf("%s: failed to translate Mesa program:\n", __func__);
 603       _mesa_print_program(&stvp->Base);
 604       debug_assert(0);
 605       return false;
 606    }
 607
 608    stvp->state.tokens = ureg_get_tokens(ureg, NULL);
 609    ureg_destroy(ureg);
 610
 611    if (stvp->glsl_to_tgsi) {
 612       stvp->glsl_to_tgsi = NULL;
 613       st_store_ir_in_disk_cache(st, &stvp->Base, false);
 614    }
 615
 616    /* Translate to NIR.
 617     *
 618     * This must be done after the translation to TGSI is done, because
 619     * we'll pass the NIR shader to the driver and the TGSI version to
 620     * the draw module for the select/feedback/rasterpos code.
 621     */
 622    if (st->pipe->screen->get_shader_param(st->pipe->screen,
 623                                           PIPE_SHADER_VERTEX,
 624                                           PIPE_SHADER_CAP_PREFERRED_IR)) {
 625       assert(!stvp->glsl_to_tgsi);
 626
 627       nir_shader *nir =
 628          st_translate_prog_to_nir(st, &stvp->Base, MESA_SHADER_VERTEX);
 629
 630       if (stvp->state.ir.nir)
 631          ralloc_free(stvp->state.ir.nir);
 632       stvp->state.type = PIPE_SHADER_IR_NIR;
 633       stvp->state.ir.nir = nir;
 634       stvp->Base.nir = nir;
 635       return true;
 636    }
 637
 638    return stvp->state.tokens != NULL;
 639 }
 640
 641 static const gl_state_index16 depth_range_state[STATE_LENGTH] =
 642    { STATE_DEPTH_RANGE };
 643
 644 static struct st_vp_variant *
 645 st_create_vp_variant(struct st_context *st,
 646                      struct st_vertex_program *stvp,
 647                      const struct st_common_variant_key *key)
 648 {
 649    struct st_vp_variant *vpv = CALLOC_STRUCT(st_vp_variant);
 650    struct pipe_context *pipe = st->pipe;
 651    struct pipe_shader_state state = {0};
 652
 653    static const gl_state_index16 point_size_state[STATE_LENGTH] =
 654       { STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED, 0 };
 655    struct gl_program_parameter_list *params = stvp->Base.Parameters;
 656
 657    vpv->key = *key;
 658    vpv->num_inputs = stvp->num_inputs;
 659
 660    state.stream_output = stvp->state.stream_output;
 661
 662    if (stvp->state.type == PIPE_SHADER_IR_NIR) {
 663       state.type = PIPE_SHADER_IR_NIR;
 664       state.ir.nir = nir_shader_clone(NULL, stvp->state.ir.nir);
 665       if (key->clamp_color)
 666          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
 667       if (key->passthrough_edgeflags) {
 668          NIR_PASS_V(state.ir.nir, nir_lower_passthrough_edgeflags);
 669          vpv->num_inputs++;
 670       }
 671
 672       if (key->lower_point_size) {
 673          _mesa_add_state_reference(params, point_size_state);
 674          NIR_PASS_V(state.ir.nir, nir_lower_point_size_mov,
 675                     point_size_state);
 676       }
 677
 678       if (key->lower_ucp) {
 679          struct pipe_screen *screen = pipe->screen;
 680          bool can_compact = screen->get_param(screen,
 681                                               PIPE_CAP_NIR_COMPACT_ARRAYS);
 682
 683          bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;
 684          gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH];
 685          for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
 686             if (use_eye) {
 687                clipplane_state[i][0] = STATE_CLIPPLANE;
 688                clipplane_state[i][1] = i;
 689             } else {
 690                clipplane_state[i][0] = STATE_INTERNAL;
 691                clipplane_state[i][1] = STATE_CLIP_INTERNAL;
 692                clipplane_state[i][2] = i;
 693             }
 694             _mesa_add_state_reference(params, clipplane_state[i]);
 695          }
 696
 697          NIR_PASS_V(state.ir.nir, nir_lower_clip_vs, key->lower_ucp,
 698                     true, can_compact, clipplane_state);
 699          NIR_PASS_V(state.ir.nir, nir_lower_io_to_temporaries,
 700                     nir_shader_get_entrypoint(state.ir.nir), true, false);
 701       }
 702
 703       st_finalize_nir(st, &stvp->Base, stvp->shader_program,
 704                       state.ir.nir);
 705
 706       vpv->driver_shader = pipe->create_vs_state(pipe, &state);
 707
 708       /* When generating a NIR program, we usually don't have TGSI tokens.
 709        * However, we do create them for ARB_vertex_program / fixed-function VS
 710        * programs which we may need to use with the draw module for legacy
 711        * feedback/select emulation.  If they exist, copy them.
 712        *
 713        * TODO: Lowering for shader variants is not applied to TGSI when
 714        * generating a NIR shader.
 715        */
 716       if (stvp->state.tokens)
 717          vpv->tokens = tgsi_dup_tokens(stvp->state.tokens);
 718
 719       return vpv;
 720    }
 721
 722    state.type = PIPE_SHADER_IR_TGSI;
 723    state.tokens = tgsi_dup_tokens(stvp->state.tokens);
 724
 725    /* Emulate features. */
 726    if (key->clamp_color || key->passthrough_edgeflags) {
 727       const struct tgsi_token *tokens;
 728       unsigned flags =
 729          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
 730          (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
 731
 732       tokens = tgsi_emulate(state.tokens, flags);
 733
 734       if (tokens) {
 735          tgsi_free_tokens(state.tokens);
 736          state.tokens = tokens;
 737
 738          if (key->passthrough_edgeflags)
 739             vpv->num_inputs++;
 740       } else
 741          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
 742    }
 743
 744    if (key->lower_depth_clamp) {
 745       unsigned depth_range_const =
 746             _mesa_add_state_reference(params, depth_range_state);
 747
 748       const struct tgsi_token *tokens;
 749       tokens = st_tgsi_lower_depth_clamp(state.tokens, depth_range_const,
 750                                          key->clip_negative_one_to_one);
 751       if (tokens != state.tokens)
 752          tgsi_free_tokens(state.tokens);
 753       state.tokens = tokens;
 754    }
 755
 756    if (ST_DEBUG & DEBUG_TGSI) {
 757       tgsi_dump(state.tokens, 0);
 758       debug_printf("\n");
 759    }
 760
 761    vpv->driver_shader = pipe->create_vs_state(pipe, &state);
 762    /* Save this for selection/feedback/rasterpos. */
 763    vpv->tokens = state.tokens;
 764    return vpv;
 765 }
 766
 767
 768 /**
 769  * Find/create a vertex program variant.
 770  */
 771 struct st_vp_variant *
 772 st_get_vp_variant(struct st_context *st,
 773                   struct st_vertex_program *stvp,
 774                   const struct st_common_variant_key *key)
 775 {
 776    struct st_vp_variant *vpv;
 777
 778    /* Search for existing variant */
 779    for (vpv = stvp->variants; vpv; vpv = vpv->next) {
 780       if (memcmp(&vpv->key, key, sizeof(*key)) == 0) {
 781          break;
 782       }
 783    }
 784
 785    if (!vpv) {
 786       /* create now */
 787       vpv = st_create_vp_variant(st, stvp, key);
 788       if (vpv) {
 789           for (unsigned index = 0; index < vpv->num_inputs; ++index) {
 790              unsigned attr = stvp->index_to_input[index];
 791              if (attr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
 792                 continue;
 793              vpv->vert_attrib_mask |= 1u << attr;
 794           }
 795
 796          /* insert into list */
 797          vpv->next = stvp->variants;
 798          stvp->variants = vpv;
 799       }
 800    }
 801
 802    return vpv;
 803 }
 804
 805
 806 /**
 807  * Translate a Mesa fragment shader into a TGSI shader.
 808  */
 809 bool
 810 st_translate_fragment_program(struct st_context *st,
 811                               struct st_common_program *stfp)
 812 {
 813    /* Non-GLSL programs: */
 814    if (!stfp->glsl_to_tgsi) {
 815       _mesa_remove_output_reads(&stfp->Base, PROGRAM_OUTPUT);
 816       if (st->ctx->Const.GLSLFragCoordIsSysVal)
 817          _mesa_program_fragment_position_to_sysval(&stfp->Base);
 818
 819       /* This determines which states will be updated when the assembly
 820        * shader is bound.
 821        *
 822        * fragment.position and glDrawPixels always use constants.
 823        */
 824       stfp->affected_states = ST_NEW_FS_STATE |
 825                               ST_NEW_SAMPLE_SHADING |
 826                               ST_NEW_FS_CONSTANTS;
 827
 828       if (stfp->ati_fs) {
 829          /* Just set them for ATI_fs unconditionally. */
 830          stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 831                                   ST_NEW_FS_SAMPLERS;
 832       } else {
 833          /* ARB_fp */
 834          if (stfp->Base.SamplersUsed)
 835             stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 836                                      ST_NEW_FS_SAMPLERS;
 837       }
 838
 839       /* Translate to NIR. */
 840       if (!stfp->ati_fs &&
 841           st->pipe->screen->get_shader_param(st->pipe->screen,
 842                                              PIPE_SHADER_FRAGMENT,
 843                                              PIPE_SHADER_CAP_PREFERRED_IR)) {
 844          nir_shader *nir =
 845             st_translate_prog_to_nir(st, &stfp->Base, MESA_SHADER_FRAGMENT);
 846
 847          if (stfp->state.ir.nir)
 848             ralloc_free(stfp->state.ir.nir);
 849          stfp->state.type = PIPE_SHADER_IR_NIR;
 850          stfp->state.ir.nir = nir;
 851          stfp->Base.nir = nir;
 852          return true;
 853       }
 854    }
 855
 856    ubyte outputMapping[2 * FRAG_RESULT_MAX];
 857    ubyte inputMapping[VARYING_SLOT_MAX];
 858    ubyte inputSlotToAttr[VARYING_SLOT_MAX];
 859    ubyte interpMode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
 860    GLuint attr;
 861    GLbitfield64 inputsRead;
 862    struct ureg_program *ureg;
 863
 864    GLboolean write_all = GL_FALSE;
 865
 866    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
 867    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
 868    uint fs_num_inputs = 0;
 869
 870    ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
 871    ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
 872    uint fs_num_outputs = 0;
 873
 874    memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
 875
 876    /*
 877     * Convert Mesa program inputs to TGSI input register semantics.
 878     */
 879    inputsRead = stfp->Base.info.inputs_read;
 880    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 881       if ((inputsRead & BITFIELD64_BIT(attr)) != 0) {
 882          const GLuint slot = fs_num_inputs++;
 883
 884          inputMapping[attr] = slot;
 885          inputSlotToAttr[slot] = attr;
 886
 887          switch (attr) {
 888          case VARYING_SLOT_POS:
 889             input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
 890             input_semantic_index[slot] = 0;
 891             interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 892             break;
 893          case VARYING_SLOT_COL0:
 894             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 895             input_semantic_index[slot] = 0;
 896             interpMode[slot] = stfp->glsl_to_tgsi ?
 897                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 898             break;
 899          case VARYING_SLOT_COL1:
 900             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 901             input_semantic_index[slot] = 1;
 902             interpMode[slot] = stfp->glsl_to_tgsi ?
 903                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 904             break;
 905          case VARYING_SLOT_FOGC:
 906             input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
 907             input_semantic_index[slot] = 0;
 908             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 909             break;
 910          case VARYING_SLOT_FACE:
 911             input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
 912             input_semantic_index[slot] = 0;
 913             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 914             break;
 915          case VARYING_SLOT_PRIMITIVE_ID:
 916             input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
 917             input_semantic_index[slot] = 0;
 918             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 919             break;
 920          case VARYING_SLOT_LAYER:
 921             input_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
 922             input_semantic_index[slot] = 0;
 923             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 924             break;
 925          case VARYING_SLOT_VIEWPORT:
 926             input_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
 927             input_semantic_index[slot] = 0;
 928             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 929             break;
 930          case VARYING_SLOT_CLIP_DIST0:
 931             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
 932             input_semantic_index[slot] = 0;
 933             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 934             break;
 935          case VARYING_SLOT_CLIP_DIST1:
 936             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
 937             input_semantic_index[slot] = 1;
 938             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 939             break;
 940          case VARYING_SLOT_CULL_DIST0:
 941          case VARYING_SLOT_CULL_DIST1:
 942             /* these should have been lowered by GLSL */
 943             assert(0);
 944             break;
 945             /* In most cases, there is nothing special about these
 946              * inputs, so adopt a convention to use the generic
 947              * semantic name and the mesa VARYING_SLOT_ number as the
 948              * index.
 949              *
 950              * All that is required is that the vertex shader labels
 951              * its own outputs similarly, and that the vertex shader
 952              * generates at least every output required by the
 953              * fragment shader plus fixed-function hardware (such as
 954              * BFC).
 955              *
 956              * However, some drivers may need us to identify the PNTC and TEXi
 957              * varyings if, for example, their capability to replace them with
 958              * sprite coordinates is limited.
 959              */
 960          case VARYING_SLOT_PNTC:
 961             if (st->needs_texcoord_semantic) {
 962                input_semantic_name[slot] = TGSI_SEMANTIC_PCOORD;
 963                input_semantic_index[slot] = 0;
 964                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 965                break;
 966             }
 967             /* fall through */
 968          case VARYING_SLOT_TEX0:
 969          case VARYING_SLOT_TEX1:
 970          case VARYING_SLOT_TEX2:
 971          case VARYING_SLOT_TEX3:
 972          case VARYING_SLOT_TEX4:
 973          case VARYING_SLOT_TEX5:
 974          case VARYING_SLOT_TEX6:
 975          case VARYING_SLOT_TEX7:
 976             if (st->needs_texcoord_semantic) {
 977                input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
 978                input_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
 979                interpMode[slot] = stfp->glsl_to_tgsi ?
 980                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
 981                break;
 982             }
 983             /* fall through */
 984          case VARYING_SLOT_VAR0:
 985          default:
 986             /* Semantic indices should be zero-based because drivers may choose
 987              * to assign a fixed slot determined by that index.
 988              * This is useful because ARB_separate_shader_objects uses location
 989              * qualifiers for linkage, and if the semantic index corresponds to
 990              * these locations, linkage passes in the driver become unecessary.
 991              *
 992              * If needs_texcoord_semantic is true, no semantic indices will be
 993              * consumed for the TEXi varyings, and we can base the locations of
 994              * the user varyings on VAR0.  Otherwise, we use TEX0 as base index.
 995              */
 996             assert(attr >= VARYING_SLOT_VAR0 || attr == VARYING_SLOT_PNTC ||
 997                    (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
 998             input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
 999             input_semantic_index[slot] = st_get_generic_varying_index(st, attr);
1000             if (attr == VARYING_SLOT_PNTC)
1001                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
1002             else {
1003                interpMode[slot] = stfp->glsl_to_tgsi ?
1004                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
1005             }
1006             break;
1007          }
1008       }
1009       else {
1010          inputMapping[attr] = -1;
1011       }
1012    }
1013
1014    /*
1015     * Semantics and mapping for outputs
1016     */
1017    GLbitfield64 outputsWritten = stfp->Base.info.outputs_written;
1018
1019    /* if z is written, emit that first */
1020    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
1021       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
1022       fs_output_semantic_index[fs_num_outputs] = 0;
1023       outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
1024       fs_num_outputs++;
1025       outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
1026    }
1027
1028    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
1029       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
1030       fs_output_semantic_index[fs_num_outputs] = 0;
1031       outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
1032       fs_num_outputs++;
1033       outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
1034    }
1035
1036    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
1037       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_SAMPLEMASK;
1038       fs_output_semantic_index[fs_num_outputs] = 0;
1039       outputMapping[FRAG_RESULT_SAMPLE_MASK] = fs_num_outputs;
1040       fs_num_outputs++;
1041       outputsWritten &= ~(1 << FRAG_RESULT_SAMPLE_MASK);
1042    }
1043
1044    /* handle remaining outputs (color) */
1045    for (attr = 0; attr < ARRAY_SIZE(outputMapping); attr++) {
1046       const GLbitfield64 written = attr < FRAG_RESULT_MAX ? outputsWritten :
1047          stfp->Base.SecondaryOutputsWritten;
1048       const unsigned loc = attr % FRAG_RESULT_MAX;
1049
1050       if (written & BITFIELD64_BIT(loc)) {
1051          switch (loc) {
1052          case FRAG_RESULT_DEPTH:
1053          case FRAG_RESULT_STENCIL:
1054          case FRAG_RESULT_SAMPLE_MASK:
1055             /* handled above */
1056             assert(0);
1057             break;
1058          case FRAG_RESULT_COLOR:
1059             write_all = GL_TRUE; /* fallthrough */
1060          default: {
1061             int index;
1062             assert(loc == FRAG_RESULT_COLOR ||
1063                    (FRAG_RESULT_DATA0 <= loc && loc < FRAG_RESULT_MAX));
1064
1065             index = (loc == FRAG_RESULT_COLOR) ? 0 : (loc - FRAG_RESULT_DATA0);
1066
1067             if (attr >= FRAG_RESULT_MAX) {
1068                /* Secondary color for dual source blending. */
1069                assert(index == 0);
1070                index++;
1071             }
1072
1073             fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
1074             fs_output_semantic_index[fs_num_outputs] = index;
1075             outputMapping[attr] = fs_num_outputs;
1076             break;
1077          }
1078          }
1079
1080          fs_num_outputs++;
1081       }
1082    }
1083
1084    ureg = ureg_create_with_screen(PIPE_SHADER_FRAGMENT, st->pipe->screen);
1085    if (ureg == NULL)
1086       return false;
1087
1088    if (ST_DEBUG & DEBUG_MESA) {
1089       _mesa_print_program(&stfp->Base);
1090       _mesa_print_program_parameters(st->ctx, &stfp->Base);
1091       debug_printf("\n");
1092    }
1093    if (write_all == GL_TRUE)
1094       ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1);
1095
1096    if (stfp->Base.info.fs.depth_layout != FRAG_DEPTH_LAYOUT_NONE) {
1097       switch (stfp->Base.info.fs.depth_layout) {
1098       case FRAG_DEPTH_LAYOUT_ANY:
1099          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1100                        TGSI_FS_DEPTH_LAYOUT_ANY);
1101          break;
1102       case FRAG_DEPTH_LAYOUT_GREATER:
1103          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1104                        TGSI_FS_DEPTH_LAYOUT_GREATER);
1105          break;
1106       case FRAG_DEPTH_LAYOUT_LESS:
1107          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1108                        TGSI_FS_DEPTH_LAYOUT_LESS);
1109          break;
1110       case FRAG_DEPTH_LAYOUT_UNCHANGED:
1111          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1112                        TGSI_FS_DEPTH_LAYOUT_UNCHANGED);
1113          break;
1114       default:
1115          assert(0);
1116       }
1117    }
1118
1119    if (stfp->glsl_to_tgsi) {
1120       st_translate_program(st->ctx,
1121                            PIPE_SHADER_FRAGMENT,
1122                            ureg,
1123                            stfp->glsl_to_tgsi,
1124                            &stfp->Base,
1125                            /* inputs */
1126                            fs_num_inputs,
1127                            inputMapping,
1128                            inputSlotToAttr,
1129                            input_semantic_name,
1130                            input_semantic_index,
1131                            interpMode,
1132                            /* outputs */
1133                            fs_num_outputs,
1134                            outputMapping,
1135                            fs_output_semantic_name,
1136                            fs_output_semantic_index);
1137
1138       free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
1139    } else if (stfp->ati_fs)
1140       st_translate_atifs_program(ureg,
1141                                  stfp->ati_fs,
1142                                  &stfp->Base,
1143                                  /* inputs */
1144                                  fs_num_inputs,
1145                                  inputMapping,
1146                                  input_semantic_name,
1147                                  input_semantic_index,
1148                                  interpMode,
1149                                  /* outputs */
1150                                  fs_num_outputs,
1151                                  outputMapping,
1152                                  fs_output_semantic_name,
1153                                  fs_output_semantic_index);
1154    else
1155       st_translate_mesa_program(st->ctx,
1156                                 PIPE_SHADER_FRAGMENT,
1157                                 ureg,
1158                                 &stfp->Base,
1159                                 /* inputs */
1160                                 fs_num_inputs,
1161                                 inputMapping,
1162                                 input_semantic_name,
1163                                 input_semantic_index,
1164                                 interpMode,
1165                                 /* outputs */
1166                                 fs_num_outputs,
1167                                 outputMapping,
1168                                 fs_output_semantic_name,
1169                                 fs_output_semantic_index);
1170
1171    stfp->state.tokens = ureg_get_tokens(ureg, NULL);
1172    ureg_destroy(ureg);
1173
1174    if (stfp->glsl_to_tgsi) {
1175       stfp->glsl_to_tgsi = NULL;
1176       st_store_ir_in_disk_cache(st, &stfp->Base, false);
1177    }
1178
1179    return stfp->state.tokens != NULL;
1180 }
1181
1182 static struct st_fp_variant *
1183 st_create_fp_variant(struct st_context *st,
1184                      struct st_common_program *stfp,
1185                      const struct st_fp_variant_key *key)
1186 {
1187    struct pipe_context *pipe = st->pipe;
1188    struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
1189    struct pipe_shader_state state = {0};
1190    struct gl_program_parameter_list *params = stfp->Base.Parameters;
1191    static const gl_state_index16 texcoord_state[STATE_LENGTH] =
1192       { STATE_INTERNAL, STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
1193    static const gl_state_index16 scale_state[STATE_LENGTH] =
1194       { STATE_INTERNAL, STATE_PT_SCALE };
1195    static const gl_state_index16 bias_state[STATE_LENGTH] =
1196       { STATE_INTERNAL, STATE_PT_BIAS };
1197    static const gl_state_index16 alpha_ref_state[STATE_LENGTH] =
1198       { STATE_INTERNAL, STATE_ALPHA_REF };
1199
1200    if (!variant)
1201       return NULL;
1202
1203    if (stfp->state.type == PIPE_SHADER_IR_NIR) {
1204       state.type = PIPE_SHADER_IR_NIR;
1205       state.ir.nir = nir_shader_clone(NULL, stfp->state.ir.nir);
1206
1207       if (key->clamp_color)
1208          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1209
1210       if (key->lower_flatshade)
1211          NIR_PASS_V(state.ir.nir, nir_lower_flatshade);
1212
1213       if (key->lower_alpha_func != COMPARE_FUNC_NEVER) {
1214          _mesa_add_state_reference(params, alpha_ref_state);
1215          NIR_PASS_V(state.ir.nir, nir_lower_alpha_test, key->lower_alpha_func,
1216                     false, alpha_ref_state);
1217       }
1218
1219       if (key->lower_two_sided_color)
1220          NIR_PASS_V(state.ir.nir, nir_lower_two_sided_color);
1221
1222       if (key->persample_shading) {
1223           nir_shader *shader = state.ir.nir;
1224           nir_foreach_variable(var, &shader->inputs)
1225              var->data.sample = true;
1226       }
1227
1228       assert(!(key->bitmap && key->drawpixels));
1229
1230       /* glBitmap */
1231       if (key->bitmap) {
1232          nir_lower_bitmap_options options = {0};
1233
1234          variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1235          options.sampler = variant->bitmap_sampler;
1236          options.swizzle_xxxx = st->bitmap.tex_format == PIPE_FORMAT_R8_UNORM;
1237
1238          NIR_PASS_V(state.ir.nir, nir_lower_bitmap, &options);
1239       }
1240
1241       /* glDrawPixels (color only) */
1242       if (key->drawpixels) {
1243          nir_lower_drawpixels_options options = {{0}};
1244          unsigned samplers_used = stfp->Base.SamplersUsed;
1245
1246          /* Find the first unused slot. */
1247          variant->drawpix_sampler = ffs(~samplers_used) - 1;
1248          options.drawpix_sampler = variant->drawpix_sampler;
1249          samplers_used |= (1 << variant->drawpix_sampler);
1250
1251          options.pixel_maps = key->pixelMaps;
1252          if (key->pixelMaps) {
1253             variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1254             options.pixelmap_sampler = variant->pixelmap_sampler;
1255          }
1256
1257          options.scale_and_bias = key->scaleAndBias;
1258          if (key->scaleAndBias) {
1259             _mesa_add_state_reference(params, scale_state);
1260             memcpy(options.scale_state_tokens, scale_state,
1261                    sizeof(options.scale_state_tokens));
1262             _mesa_add_state_reference(params, bias_state);
1263             memcpy(options.bias_state_tokens, bias_state,
1264                    sizeof(options.bias_state_tokens));
1265          }
1266
1267          _mesa_add_state_reference(params, texcoord_state);
1268          memcpy(options.texcoord_state_tokens, texcoord_state,
1269                 sizeof(options.texcoord_state_tokens));
1270
1271          NIR_PASS_V(state.ir.nir, nir_lower_drawpixels, &options);
1272       }
1273
1274       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1275                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1276                    key->external.lower_ayuv || key->external.lower_xyuv)) {
1277          nir_lower_tex_options options = {0};
1278          options.lower_y_uv_external = key->external.lower_nv12;
1279          options.lower_y_u_v_external = key->external.lower_iyuv;
1280          options.lower_xy_uxvx_external = key->external.lower_xy_uxvx;
1281          options.lower_yx_xuxv_external = key->external.lower_yx_xuxv;
1282          options.lower_ayuv_external = key->external.lower_ayuv;
1283          options.lower_xyuv_external = key->external.lower_xyuv;
1284          NIR_PASS_V(state.ir.nir, nir_lower_tex, &options);
1285       }
1286
1287       st_finalize_nir(st, &stfp->Base, stfp->shader_program, state.ir.nir);
1288
1289       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1290                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1291          /* This pass needs to happen *after* nir_lower_sampler */
1292          NIR_PASS_V(state.ir.nir, st_nir_lower_tex_src_plane,
1293                     ~stfp->Base.SamplersUsed,
1294                     key->external.lower_nv12 || key->external.lower_xy_uxvx ||
1295                        key->external.lower_yx_xuxv,
1296                     key->external.lower_iyuv);
1297       }
1298
1299       /* Some of the lowering above may have introduced new varyings */
1300       nir_shader_gather_info(state.ir.nir,
1301                              nir_shader_get_entrypoint(state.ir.nir));
1302
1303       variant->driver_shader = pipe->create_fs_state(pipe, &state);
1304       variant->key = *key;
1305
1306       return variant;
1307    }
1308
1309    state.tokens = stfp->state.tokens;
1310
1311    assert(!(key->bitmap && key->drawpixels));
1312
1313    /* Fix texture targets and add fog for ATI_fs */
1314    if (stfp->ati_fs) {
1315       const struct tgsi_token *tokens = st_fixup_atifs(state.tokens, key);
1316
1317       if (tokens)
1318          state.tokens = tokens;
1319       else
1320          fprintf(stderr, "mesa: cannot post-process ATI_fs\n");
1321    }
1322
1323    /* Emulate features. */
1324    if (key->clamp_color || key->persample_shading) {
1325       const struct tgsi_token *tokens;
1326       unsigned flags =
1327          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
1328          (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
1329
1330       tokens = tgsi_emulate(state.tokens, flags);
1331
1332       if (tokens) {
1333          if (state.tokens != stfp->state.tokens)
1334             tgsi_free_tokens(state.tokens);
1335          state.tokens = tokens;
1336       } else
1337          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
1338    }
1339
1340    /* glBitmap */
1341    if (key->bitmap) {
1342       const struct tgsi_token *tokens;
1343
1344       variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1345
1346       tokens = st_get_bitmap_shader(state.tokens,
1347                                     st->internal_target,
1348                                     variant->bitmap_sampler,
1349                                     st->needs_texcoord_semantic,
1350                                     st->bitmap.tex_format ==
1351                                     PIPE_FORMAT_R8_UNORM);
1352
1353       if (tokens) {
1354          if (state.tokens != stfp->state.tokens)
1355             tgsi_free_tokens(state.tokens);
1356          state.tokens = tokens;
1357       } else
1358          fprintf(stderr, "mesa: cannot create a shader for glBitmap\n");
1359    }
1360
1361    /* glDrawPixels (color only) */
1362    if (key->drawpixels) {
1363       const struct tgsi_token *tokens;
1364       unsigned scale_const = 0, bias_const = 0, texcoord_const = 0;
1365
1366       /* Find the first unused slot. */
1367       variant->drawpix_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1368
1369       if (key->pixelMaps) {
1370          unsigned samplers_used = stfp->Base.SamplersUsed |
1371                                   (1 << variant->drawpix_sampler);
1372
1373          variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1374       }
1375
1376       if (key->scaleAndBias) {
1377          scale_const = _mesa_add_state_reference(params, scale_state);
1378          bias_const = _mesa_add_state_reference(params, bias_state);
1379       }
1380
1381       texcoord_const = _mesa_add_state_reference(params, texcoord_state);
1382
1383       tokens = st_get_drawpix_shader(state.tokens,
1384                                      st->needs_texcoord_semantic,
1385                                      key->scaleAndBias, scale_const,
1386                                      bias_const, key->pixelMaps,
1387                                      variant->drawpix_sampler,
1388                                      variant->pixelmap_sampler,
1389                                      texcoord_const, st->internal_target);
1390
1391       if (tokens) {
1392          if (state.tokens != stfp->state.tokens)
1393             tgsi_free_tokens(state.tokens);
1394          state.tokens = tokens;
1395       } else
1396          fprintf(stderr, "mesa: cannot create a shader for glDrawPixels\n");
1397    }
1398
1399    if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1400                 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1401       const struct tgsi_token *tokens;
1402
1403       /* samplers inserted would conflict, but this should be unpossible: */
1404       assert(!(key->bitmap || key->drawpixels));
1405
1406       tokens = st_tgsi_lower_yuv(state.tokens,
1407                                  ~stfp->Base.SamplersUsed,
1408                                  key->external.lower_nv12 ||
1409                                     key->external.lower_xy_uxvx ||
1410                                     key->external.lower_yx_xuxv,
1411                                  key->external.lower_iyuv);
1412       if (tokens) {
1413          if (state.tokens != stfp->state.tokens)
1414             tgsi_free_tokens(state.tokens);
1415          state.tokens = tokens;
1416       } else {
1417          fprintf(stderr, "mesa: cannot create a shader for samplerExternalOES\n");
1418       }
1419    }
1420
1421    if (key->lower_depth_clamp) {
1422       unsigned depth_range_const = _mesa_add_state_reference(params, depth_range_state);
1423
1424       const struct tgsi_token *tokens;
1425       tokens = st_tgsi_lower_depth_clamp_fs(state.tokens, depth_range_const);
1426       if (state.tokens != stfp->state.tokens)
1427          tgsi_free_tokens(state.tokens);
1428       state.tokens = tokens;
1429    }
1430
1431    if (ST_DEBUG & DEBUG_TGSI) {
1432       tgsi_dump(state.tokens, 0);
1433       debug_printf("\n");
1434    }
1435
1436    /* fill in variant */
1437    variant->driver_shader = pipe->create_fs_state(pipe, &state);
1438    variant->key = *key;
1439
1440    if (state.tokens != stfp->state.tokens)
1441       tgsi_free_tokens(state.tokens);
1442    return variant;
1443 }
1444
1445 /**
1446  * Translate fragment program if needed.
1447  */
1448 struct st_fp_variant *
1449 st_get_fp_variant(struct st_context *st,
1450                   struct st_common_program *stfp,
1451                   const struct st_fp_variant_key *key)
1452 {
1453    struct st_fp_variant *fpv;
1454
1455    /* Search for existing variant */
1456    for (fpv = stfp->fp_variants; fpv; fpv = fpv->next) {
1457       if (memcmp(&fpv->key, key, sizeof(*key)) == 0) {
1458          break;
1459       }
1460    }
1461
1462    if (!fpv) {
1463       /* create new */
1464       fpv = st_create_fp_variant(st, stfp, key);
1465       if (fpv) {
1466          if (key->bitmap || key->drawpixels) {
1467             /* Regular variants should always come before the
1468              * bitmap & drawpixels variants, (unless there
1469              * are no regular variants) so that
1470              * st_update_fp can take a fast path when
1471              * shader_has_one_variant is set.
1472              */
1473             if (!stfp->fp_variants) {
1474                stfp->fp_variants = fpv;
1475             } else {
1476                /* insert into list after the first one */
1477                fpv->next = stfp->fp_variants->next;
1478                stfp->fp_variants->next = fpv;
1479             }
1480          } else {
1481             /* insert into list */
1482             fpv->next = stfp->fp_variants;
1483             stfp->fp_variants = fpv;
1484          }
1485       }
1486    }
1487
1488    return fpv;
1489 }
1490
1491 /**
1492  * Translate a program. This is common code for geometry and tessellation
1493  * shaders.
1494  */
1495 bool
1496 st_translate_common_program(struct st_context *st,
1497                             struct st_common_program *stcp)
1498 {
1499    struct gl_program *prog = &stcp->Base;
1500    enum pipe_shader_type stage =
1501       pipe_shader_type_from_mesa(stcp->Base.info.stage);
1502    struct ureg_program *ureg = ureg_create_with_screen(stage, st->pipe->screen);
1503
1504    if (ureg == NULL)
1505       return false;
1506
1507    switch (stage) {
1508    case PIPE_SHADER_TESS_CTRL:
1509       ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT,
1510                     stcp->Base.info.tess.tcs_vertices_out);
1511       break;
1512
1513    case PIPE_SHADER_TESS_EVAL:
1514       if (stcp->Base.info.tess.primitive_mode == GL_ISOLINES)
1515          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, GL_LINES);
1516       else
1517          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE,
1518                        stcp->Base.info.tess.primitive_mode);
1519
1520       STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL);
1521       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 ==
1522                     PIPE_TESS_SPACING_FRACTIONAL_ODD);
1523       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 ==
1524                     PIPE_TESS_SPACING_FRACTIONAL_EVEN);
1525
1526       ureg_property(ureg, TGSI_PROPERTY_TES_SPACING,
1527                     (stcp->Base.info.tess.spacing + 1) % 3);
1528
1529       ureg_property(ureg, TGSI_PROPERTY_TES_VERTEX_ORDER_CW,
1530                     !stcp->Base.info.tess.ccw);
1531       ureg_property(ureg, TGSI_PROPERTY_TES_POINT_MODE,
1532                     stcp->Base.info.tess.point_mode);
1533       break;
1534
1535    case PIPE_SHADER_GEOMETRY:
1536       ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM,
1537                     stcp->Base.info.gs.input_primitive);
1538       ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM,
1539                     stcp->Base.info.gs.output_primitive);
1540       ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
1541                     stcp->Base.info.gs.vertices_out);
1542       ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS,
1543                     stcp->Base.info.gs.invocations);
1544       break;
1545
1546    default:
1547       break;
1548    }
1549
1550    ubyte inputSlotToAttr[VARYING_SLOT_TESS_MAX];
1551    ubyte inputMapping[VARYING_SLOT_TESS_MAX];
1552    ubyte outputMapping[VARYING_SLOT_TESS_MAX];
1553    GLuint attr;
1554
1555    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
1556    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
1557    uint num_inputs = 0;
1558
1559    ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
1560    ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
1561    uint num_outputs = 0;
1562
1563    GLint i;
1564
1565    memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr));
1566    memset(inputMapping, 0, sizeof(inputMapping));
1567    memset(outputMapping, 0, sizeof(outputMapping));
1568    memset(&stcp->state, 0, sizeof(stcp->state));
1569
1570    if (prog->info.clip_distance_array_size)
1571       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
1572                     prog->info.clip_distance_array_size);
1573    if (prog->info.cull_distance_array_size)
1574       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
1575                     prog->info.cull_distance_array_size);
1576
1577    /*
1578     * Convert Mesa program inputs to TGSI input register semantics.
1579     */
1580    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1581       if ((prog->info.inputs_read & BITFIELD64_BIT(attr)) == 0)
1582          continue;
1583
1584       unsigned slot = num_inputs++;
1585
1586       inputMapping[attr] = slot;
1587       inputSlotToAttr[slot] = attr;
1588
1589       unsigned semantic_name, semantic_index;
1590       tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1591                                    &semantic_name, &semantic_index);
1592       input_semantic_name[slot] = semantic_name;
1593       input_semantic_index[slot] = semantic_index;
1594    }
1595
1596    /* Also add patch inputs. */
1597    for (attr = 0; attr < 32; attr++) {
1598       if (prog->info.patch_inputs_read & (1u << attr)) {
1599          GLuint slot = num_inputs++;
1600          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1601
1602          inputMapping[patch_attr] = slot;
1603          inputSlotToAttr[slot] = patch_attr;
1604          input_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1605          input_semantic_index[slot] = attr;
1606       }
1607    }
1608
1609    /* initialize output semantics to defaults */
1610    for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
1611       output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
1612       output_semantic_index[i] = 0;
1613    }
1614
1615    /*
1616     * Determine number of outputs, the (default) output register
1617     * mapping and the semantic information for each output.
1618     */
1619    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1620       if (prog->info.outputs_written & BITFIELD64_BIT(attr)) {
1621          GLuint slot = num_outputs++;
1622
1623          outputMapping[attr] = slot;
1624
1625          unsigned semantic_name, semantic_index;
1626          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1627                                       &semantic_name, &semantic_index);
1628          output_semantic_name[slot] = semantic_name;
1629          output_semantic_index[slot] = semantic_index;
1630       }
1631    }
1632
1633    /* Also add patch outputs. */
1634    for (attr = 0; attr < 32; attr++) {
1635       if (prog->info.patch_outputs_written & (1u << attr)) {
1636          GLuint slot = num_outputs++;
1637          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1638
1639          outputMapping[patch_attr] = slot;
1640          output_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1641          output_semantic_index[slot] = attr;
1642       }
1643    }
1644
1645    st_translate_program(st->ctx,
1646                         stage,
1647                         ureg,
1648                         stcp->glsl_to_tgsi,
1649                         prog,
1650                         /* inputs */
1651                         num_inputs,
1652                         inputMapping,
1653                         inputSlotToAttr,
1654                         input_semantic_name,
1655                         input_semantic_index,
1656                         NULL,
1657                         /* outputs */
1658                         num_outputs,
1659                         outputMapping,
1660                         output_semantic_name,
1661                         output_semantic_index);
1662
1663    stcp->state.tokens = ureg_get_tokens(ureg, NULL);
1664
1665    ureg_destroy(ureg);
1666
1667    st_translate_stream_output_info(prog);
1668
1669    st_store_ir_in_disk_cache(st, prog, false);
1670
1671    if ((ST_DEBUG & DEBUG_TGSI) && (ST_DEBUG & DEBUG_MESA)) {
1672       _mesa_print_program(prog);
1673       debug_printf("\n");
1674    }
1675
1676    if (ST_DEBUG & DEBUG_TGSI) {
1677       tgsi_dump(stcp->state.tokens, 0);
1678       debug_printf("\n");
1679    }
1680
1681    free_glsl_to_tgsi_visitor(stcp->glsl_to_tgsi);
1682    stcp->glsl_to_tgsi = NULL;
1683    return true;
1684 }
1685
1686
1687 /**
1688  * Get/create a basic program variant.
1689  */
1690 struct st_common_variant *
1691 st_get_common_variant(struct st_context *st,
1692                       struct st_common_program *prog,
1693                       const struct st_common_variant_key *key)
1694 {
1695    struct pipe_context *pipe = st->pipe;
1696    struct st_common_variant *v;
1697    struct pipe_shader_state state = {0};
1698
1699    /* Search for existing variant */
1700    for (v = prog->variants; v; v = v->next) {
1701       if (memcmp(&v->key, key, sizeof(*key)) == 0) {
1702          break;
1703       }
1704    }
1705
1706    if (!v) {
1707       /* create new */
1708       v = CALLOC_STRUCT(st_common_variant);
1709       if (v) {
1710
1711          if (prog->state.type == PIPE_SHADER_IR_NIR) {
1712             state.type = PIPE_SHADER_IR_NIR;
1713             state.ir.nir = nir_shader_clone(NULL, prog->state.ir.nir);
1714
1715             if (key->clamp_color)
1716                NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1717
1718             state.stream_output = prog->state.stream_output;
1719
1720             st_finalize_nir(st, &prog->Base, prog->shader_program,
1721                             state.ir.nir);
1722          } else {
1723             if (key->lower_depth_clamp) {
1724                struct gl_program_parameter_list *params = prog->Base.Parameters;
1725
1726                unsigned depth_range_const =
1727                      _mesa_add_state_reference(params, depth_range_state);
1728
1729                const struct tgsi_token *tokens;
1730                tokens =
1731                      st_tgsi_lower_depth_clamp(prog->state.tokens,
1732                                                depth_range_const,
1733                                                key->clip_negative_one_to_one);
1734
1735                if (tokens != prog->state.tokens)
1736                   tgsi_free_tokens(prog->state.tokens);
1737
1738                prog->state.tokens = tokens;
1739             }
1740             state = prog->state;
1741          }
1742          /* fill in new variant */
1743          switch (prog->Base.info.stage) {
1744          case MESA_SHADER_TESS_CTRL:
1745             v->driver_shader = pipe->create_tcs_state(pipe, &state);
1746             break;
1747          case MESA_SHADER_TESS_EVAL:
1748             v->driver_shader = pipe->create_tes_state(pipe, &state);
1749             break;
1750          case MESA_SHADER_GEOMETRY:
1751             v->driver_shader = pipe->create_gs_state(pipe, &state);
1752             break;
1753          case MESA_SHADER_COMPUTE: {
1754             struct pipe_compute_state cs = {0};
1755             cs.ir_type = state.type;
1756             cs.req_local_mem = prog->Base.info.cs.shared_size;
1757
1758             if (state.type == PIPE_SHADER_IR_NIR)
1759                cs.prog = state.ir.nir;
1760             else
1761                cs.prog = state.tokens;
1762
1763             v->driver_shader = pipe->create_compute_state(pipe, &cs);
1764             break;
1765          }
1766          default:
1767             assert(!"unhandled shader type");
1768             free(v);
1769             return NULL;
1770          }
1771
1772          v->key = *key;
1773
1774          /* insert into list */
1775          v->next = prog->variants;
1776          prog->variants = v;
1777       }
1778    }
1779
1780    return v;
1781 }
1782
1783
1784 /**
1785  * Vert/Geom/Frag programs have per-context variants.  Free all the
1786  * variants attached to the given program which match the given context.
1787  */
1788 static void
1789 destroy_program_variants(struct st_context *st, struct gl_program *target)
1790 {
1791    if (!target || target == &_mesa_DummyProgram)
1792       return;
1793
1794    switch (target->Target) {
1795    case GL_VERTEX_PROGRAM_ARB:
1796       {
1797          struct st_vertex_program *stvp = (struct st_vertex_program *) target;
1798          struct st_vp_variant *vpv, **prevPtr = &stvp->variants;
1799
1800          for (vpv = stvp->variants; vpv; ) {
1801             struct st_vp_variant *next = vpv->next;
1802             if (vpv->key.st == st) {
1803                /* unlink from list */
1804                *prevPtr = next;
1805                /* destroy this variant */
1806                delete_vp_variant(st, vpv);
1807             }
1808             else {
1809                prevPtr = &vpv->next;
1810             }
1811             vpv = next;
1812          }
1813       }
1814       break;
1815    case GL_FRAGMENT_PROGRAM_ARB:
1816       {
1817          struct st_common_program *stfp =
1818             (struct st_common_program *) target;
1819          struct st_fp_variant *fpv, **prevPtr = &stfp->fp_variants;
1820
1821          for (fpv = stfp->fp_variants; fpv; ) {
1822             struct st_fp_variant *next = fpv->next;
1823             if (fpv->key.st == st) {
1824                /* unlink from list */
1825                *prevPtr = next;
1826                /* destroy this variant */
1827                delete_fp_variant(st, fpv);
1828             }
1829             else {
1830                prevPtr = &fpv->next;
1831             }
1832             fpv = next;
1833          }
1834       }
1835       break;
1836    case GL_GEOMETRY_PROGRAM_NV:
1837    case GL_TESS_CONTROL_PROGRAM_NV:
1838    case GL_TESS_EVALUATION_PROGRAM_NV:
1839    case GL_COMPUTE_PROGRAM_NV:
1840       {
1841          struct st_common_program *p = st_common_program(target);
1842          struct st_common_variant *v, **prevPtr = &p->variants;
1843
1844          for (v = p->variants; v; ) {
1845             struct st_common_variant *next = v->next;
1846             if (v->key.st == st) {
1847                /* unlink from list */
1848                *prevPtr = next;
1849                /* destroy this variant */
1850                delete_basic_variant(st, v, target->Target);
1851             }
1852             else {
1853                prevPtr = &v->next;
1854             }
1855             v = next;
1856          }
1857       }
1858       break;
1859    default:
1860       _mesa_problem(NULL, "Unexpected program target 0x%x in "
1861                     "destroy_program_variants_cb()", target->Target);
1862    }
1863 }
1864
1865
1866 /**
1867  * Callback for _mesa_HashWalk.  Free all the shader's program variants
1868  * which match the given context.
1869  */
1870 static void
1871 destroy_shader_program_variants_cb(GLuint key, void *data, void *userData)
1872 {
1873    struct st_context *st = (struct st_context *) userData;
1874    struct gl_shader *shader = (struct gl_shader *) data;
1875
1876    switch (shader->Type) {
1877    case GL_SHADER_PROGRAM_MESA:
1878       {
1879          struct gl_shader_program *shProg = (struct gl_shader_program *) data;
1880          GLuint i;
1881
1882          for (i = 0; i < ARRAY_SIZE(shProg->_LinkedShaders); i++) {
1883             if (shProg->_LinkedShaders[i])
1884                destroy_program_variants(st, shProg->_LinkedShaders[i]->Program);
1885          }
1886       }
1887       break;
1888    case GL_VERTEX_SHADER:
1889    case GL_FRAGMENT_SHADER:
1890    case GL_GEOMETRY_SHADER:
1891    case GL_TESS_CONTROL_SHADER:
1892    case GL_TESS_EVALUATION_SHADER:
1893    case GL_COMPUTE_SHADER:
1894       break;
1895    default:
1896       assert(0);
1897    }
1898 }
1899
1900
1901 /**
1902  * Callback for _mesa_HashWalk.  Free all the program variants which match
1903  * the given context.
1904  */
1905 static void
1906 destroy_program_variants_cb(GLuint key, void *data, void *userData)
1907 {
1908    struct st_context *st = (struct st_context *) userData;
1909    struct gl_program *program = (struct gl_program *) data;
1910    destroy_program_variants(st, program);
1911 }
1912
1913
1914 /**
1915  * Walk over all shaders and programs to delete any variants which
1916  * belong to the given context.
1917  * This is called during context tear-down.
1918  */
1919 void
1920 st_destroy_program_variants(struct st_context *st)
1921 {
1922    /* If shaders can be shared with other contexts, the last context will
1923     * call DeleteProgram on all shaders, releasing everything.
1924     */
1925    if (st->has_shareable_shaders)
1926       return;
1927
1928    /* ARB vert/frag program */
1929    _mesa_HashWalk(st->ctx->Shared->Programs,
1930                   destroy_program_variants_cb, st);
1931
1932    /* GLSL vert/frag/geom shaders */
1933    _mesa_HashWalk(st->ctx->Shared->ShaderObjects,
1934                   destroy_shader_program_variants_cb, st);
1935 }
1936
1937
1938 /**
1939  * For debugging, print/dump the current vertex program.
1940  */
1941 void
1942 st_print_current_vertex_program(void)
1943 {
1944    GET_CURRENT_CONTEXT(ctx);
1945
1946    if (ctx->VertexProgram._Current) {
1947       struct st_vertex_program *stvp =
1948          (struct st_vertex_program *) ctx->VertexProgram._Current;
1949       struct st_vp_variant *stv;
1950
1951       debug_printf("Vertex program %u\n", stvp->Base.Id);
1952
1953       for (stv = stvp->variants; stv; stv = stv->next) {
1954          debug_printf("variant %p\n", stv);
1955          tgsi_dump(stv->tokens, 0);
1956       }
1957    }
1958 }
1959
1960
1961 /**
1962  * Compile one shader variant.
1963  */
1964 void
1965 st_precompile_shader_variant(struct st_context *st,
1966                              struct gl_program *prog)
1967 {
1968    switch (prog->Target) {
1969    case GL_VERTEX_PROGRAM_ARB: {
1970       struct st_vertex_program *p = (struct st_vertex_program *)prog;
1971       struct st_common_variant_key key;
1972
1973       memset(&key, 0, sizeof(key));
1974
1975       key.st = st->has_shareable_shaders ? NULL : st;
1976       st_get_vp_variant(st, p, &key);
1977       break;
1978    }
1979
1980    case GL_FRAGMENT_PROGRAM_ARB: {
1981       struct st_common_program *p = (struct st_common_program *)prog;
1982       struct st_fp_variant_key key;
1983
1984       memset(&key, 0, sizeof(key));
1985
1986       key.st = st->has_shareable_shaders ? NULL : st;
1987       st_get_fp_variant(st, p, &key);
1988       break;
1989    }
1990
1991    case GL_TESS_CONTROL_PROGRAM_NV:
1992    case GL_TESS_EVALUATION_PROGRAM_NV:
1993    case GL_GEOMETRY_PROGRAM_NV:
1994    case GL_COMPUTE_PROGRAM_NV: {
1995       struct st_common_program *p = st_common_program(prog);
1996       struct st_common_variant_key key;
1997
1998       memset(&key, 0, sizeof(key));
1999
2000       key.st = st->has_shareable_shaders ? NULL : st;
2001       st_get_common_variant(st, p, &key);
2002       break;
2003    }
2004
2005    default:
2006       assert(0);
2007    }
2008 }