src/mesa/state_tracker/st_program.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   *   Brian Paul
  31   */
  32
  33
  34 #include "main/errors.h"
  35 #include "main/imports.h"
  36 #include "main/hash.h"
  37 #include "main/mtypes.h"
  38 #include "program/prog_parameter.h"
  39 #include "program/prog_print.h"
  40 #include "program/prog_to_nir.h"
  41 #include "program/programopt.h"
  42
  43 #include "compiler/nir/nir.h"
  44 #include "draw/draw_context.h"
  45
  46 #include "pipe/p_context.h"
  47 #include "pipe/p_defines.h"
  48 #include "pipe/p_shader_tokens.h"
  49 #include "draw/draw_context.h"
  50 #include "tgsi/tgsi_dump.h"
  51 #include "tgsi/tgsi_emulate.h"
  52 #include "tgsi/tgsi_parse.h"
  53 #include "tgsi/tgsi_ureg.h"
  54
  55 #include "st_debug.h"
  56 #include "st_cb_bitmap.h"
  57 #include "st_cb_drawpixels.h"
  58 #include "st_context.h"
  59 #include "st_tgsi_lower_depth_clamp.h"
  60 #include "st_tgsi_lower_yuv.h"
  61 #include "st_program.h"
  62 #include "st_mesa_to_tgsi.h"
  63 #include "st_atifs_to_tgsi.h"
  64 #include "st_nir.h"
  65 #include "st_shader_cache.h"
  66 #include "st_util.h"
  67 #include "cso_cache/cso_context.h"
  68
  69
  70
  71 static void
  72 set_affected_state_flags(uint64_t *states,
  73                          struct gl_program *prog,
  74                          uint64_t new_constants,
  75                          uint64_t new_sampler_views,
  76                          uint64_t new_samplers,
  77                          uint64_t new_images,
  78                          uint64_t new_ubos,
  79                          uint64_t new_ssbos,
  80                          uint64_t new_atomics)
  81 {
  82    if (prog->Parameters->NumParameters)
  83       *states |= new_constants;
  84
  85    if (prog->info.num_textures)
  86       *states |= new_sampler_views | new_samplers;
  87
  88    if (prog->info.num_images)
  89       *states |= new_images;
  90
  91    if (prog->info.num_ubos)
  92       *states |= new_ubos;
  93
  94    if (prog->info.num_ssbos)
  95       *states |= new_ssbos;
  96
  97    if (prog->info.num_abos)
  98       *states |= new_atomics;
  99 }
 100
 101 /**
 102  * This determines which states will be updated when the shader is bound.
 103  */
 104 void
 105 st_set_prog_affected_state_flags(struct gl_program *prog)
 106 {
 107    uint64_t *states;
 108
 109    switch (prog->info.stage) {
 110    case MESA_SHADER_VERTEX:
 111       states = &((struct st_program*)prog)->affected_states;
 112
 113       *states = ST_NEW_VS_STATE |
 114                 ST_NEW_RASTERIZER |
 115                 ST_NEW_VERTEX_ARRAYS;
 116
 117       set_affected_state_flags(states, prog,
 118                                ST_NEW_VS_CONSTANTS,
 119                                ST_NEW_VS_SAMPLER_VIEWS,
 120                                ST_NEW_VS_SAMPLERS,
 121                                ST_NEW_VS_IMAGES,
 122                                ST_NEW_VS_UBOS,
 123                                ST_NEW_VS_SSBOS,
 124                                ST_NEW_VS_ATOMICS);
 125       break;
 126
 127    case MESA_SHADER_TESS_CTRL:
 128       states = &(st_program(prog))->affected_states;
 129
 130       *states = ST_NEW_TCS_STATE;
 131
 132       set_affected_state_flags(states, prog,
 133                                ST_NEW_TCS_CONSTANTS,
 134                                ST_NEW_TCS_SAMPLER_VIEWS,
 135                                ST_NEW_TCS_SAMPLERS,
 136                                ST_NEW_TCS_IMAGES,
 137                                ST_NEW_TCS_UBOS,
 138                                ST_NEW_TCS_SSBOS,
 139                                ST_NEW_TCS_ATOMICS);
 140       break;
 141
 142    case MESA_SHADER_TESS_EVAL:
 143       states = &(st_program(prog))->affected_states;
 144
 145       *states = ST_NEW_TES_STATE |
 146                 ST_NEW_RASTERIZER;
 147
 148       set_affected_state_flags(states, prog,
 149                                ST_NEW_TES_CONSTANTS,
 150                                ST_NEW_TES_SAMPLER_VIEWS,
 151                                ST_NEW_TES_SAMPLERS,
 152                                ST_NEW_TES_IMAGES,
 153                                ST_NEW_TES_UBOS,
 154                                ST_NEW_TES_SSBOS,
 155                                ST_NEW_TES_ATOMICS);
 156       break;
 157
 158    case MESA_SHADER_GEOMETRY:
 159       states = &(st_program(prog))->affected_states;
 160
 161       *states = ST_NEW_GS_STATE |
 162                 ST_NEW_RASTERIZER;
 163
 164       set_affected_state_flags(states, prog,
 165                                ST_NEW_GS_CONSTANTS,
 166                                ST_NEW_GS_SAMPLER_VIEWS,
 167                                ST_NEW_GS_SAMPLERS,
 168                                ST_NEW_GS_IMAGES,
 169                                ST_NEW_GS_UBOS,
 170                                ST_NEW_GS_SSBOS,
 171                                ST_NEW_GS_ATOMICS);
 172       break;
 173
 174    case MESA_SHADER_FRAGMENT:
 175       states = &((struct st_program*)prog)->affected_states;
 176
 177       /* gl_FragCoord and glDrawPixels always use constants. */
 178       *states = ST_NEW_FS_STATE |
 179                 ST_NEW_SAMPLE_SHADING |
 180                 ST_NEW_FS_CONSTANTS;
 181
 182       set_affected_state_flags(states, prog,
 183                                ST_NEW_FS_CONSTANTS,
 184                                ST_NEW_FS_SAMPLER_VIEWS,
 185                                ST_NEW_FS_SAMPLERS,
 186                                ST_NEW_FS_IMAGES,
 187                                ST_NEW_FS_UBOS,
 188                                ST_NEW_FS_SSBOS,
 189                                ST_NEW_FS_ATOMICS);
 190       break;
 191
 192    case MESA_SHADER_COMPUTE:
 193       states = &((struct st_program*)prog)->affected_states;
 194
 195       *states = ST_NEW_CS_STATE;
 196
 197       set_affected_state_flags(states, prog,
 198                                ST_NEW_CS_CONSTANTS,
 199                                ST_NEW_CS_SAMPLER_VIEWS,
 200                                ST_NEW_CS_SAMPLERS,
 201                                ST_NEW_CS_IMAGES,
 202                                ST_NEW_CS_UBOS,
 203                                ST_NEW_CS_SSBOS,
 204                                ST_NEW_CS_ATOMICS);
 205       break;
 206
 207    default:
 208       unreachable("unhandled shader stage");
 209    }
 210 }
 211
 212
 213 /**
 214  * Delete a shader variant.  Note the caller must unlink the variant from
 215  * the linked list.
 216  */
 217 static void
 218 delete_variant(struct st_context *st, struct st_variant *v, GLenum target)
 219 {
 220    if (v->driver_shader) {
 221       if (target == GL_VERTEX_PROGRAM_ARB &&
 222           ((struct st_common_variant*)v)->key.is_draw_shader) {
 223          /* Draw shader. */
 224          draw_delete_vertex_shader(st->draw, v->driver_shader);
 225       } else if (st->has_shareable_shaders || v->st == st) {
 226          /* The shader's context matches the calling context, or we
 227           * don't care.
 228           */
 229          switch (target) {
 230          case GL_VERTEX_PROGRAM_ARB:
 231             st->pipe->delete_vs_state(st->pipe, v->driver_shader);
 232             break;
 233          case GL_TESS_CONTROL_PROGRAM_NV:
 234             st->pipe->delete_tcs_state(st->pipe, v->driver_shader);
 235             break;
 236          case GL_TESS_EVALUATION_PROGRAM_NV:
 237             st->pipe->delete_tes_state(st->pipe, v->driver_shader);
 238             break;
 239          case GL_GEOMETRY_PROGRAM_NV:
 240             st->pipe->delete_gs_state(st->pipe, v->driver_shader);
 241             break;
 242          case GL_FRAGMENT_PROGRAM_ARB:
 243             st->pipe->delete_fs_state(st->pipe, v->driver_shader);
 244             break;
 245          case GL_COMPUTE_PROGRAM_NV:
 246             st->pipe->delete_compute_state(st->pipe, v->driver_shader);
 247             break;
 248          default:
 249             unreachable("bad shader type in delete_basic_variant");
 250          }
 251       } else {
 252          /* We can't delete a shader with a context different from the one
 253           * that created it.  Add it to the creating context's zombie list.
 254           */
 255          enum pipe_shader_type type =
 256             pipe_shader_type_from_mesa(_mesa_program_enum_to_shader_stage(target));
 257
 258          st_save_zombie_shader(v->st, type, v->driver_shader);
 259       }
 260    }
 261
 262    free(v);
 263 }
 264
 265 static void
 266 st_unbind_program(struct st_context *st, struct st_program *p)
 267 {
 268    /* Unbind the shader in cso_context and re-bind in st/mesa. */
 269    switch (p->Base.info.stage) {
 270    case MESA_SHADER_VERTEX:
 271       cso_set_vertex_shader_handle(st->cso_context, NULL);
 272       st->dirty |= ST_NEW_VS_STATE;
 273       break;
 274    case MESA_SHADER_TESS_CTRL:
 275       cso_set_tessctrl_shader_handle(st->cso_context, NULL);
 276       st->dirty |= ST_NEW_TCS_STATE;
 277       break;
 278    case MESA_SHADER_TESS_EVAL:
 279       cso_set_tesseval_shader_handle(st->cso_context, NULL);
 280       st->dirty |= ST_NEW_TES_STATE;
 281       break;
 282    case MESA_SHADER_GEOMETRY:
 283       cso_set_geometry_shader_handle(st->cso_context, NULL);
 284       st->dirty |= ST_NEW_GS_STATE;
 285       break;
 286    case MESA_SHADER_FRAGMENT:
 287       cso_set_fragment_shader_handle(st->cso_context, NULL);
 288       st->dirty |= ST_NEW_FS_STATE;
 289       break;
 290    case MESA_SHADER_COMPUTE:
 291       cso_set_compute_shader_handle(st->cso_context, NULL);
 292       st->dirty |= ST_NEW_CS_STATE;
 293       break;
 294    default:
 295       unreachable("invalid shader type");
 296    }
 297 }
 298
 299 /**
 300  * Free all basic program variants.
 301  */
 302 void
 303 st_release_variants(struct st_context *st, struct st_program *p)
 304 {
 305    struct st_variant *v;
 306
 307    /* If we are releasing shaders, re-bind them, because we don't
 308     * know which shaders are bound in the driver.
 309     */
 310    if (p->variants)
 311       st_unbind_program(st, p);
 312
 313    for (v = p->variants; v; ) {
 314       struct st_variant *next = v->next;
 315       delete_variant(st, v, p->Base.Target);
 316       v = next;
 317    }
 318
 319    p->variants = NULL;
 320
 321    if (p->state.tokens) {
 322       ureg_free_tokens(p->state.tokens);
 323       p->state.tokens = NULL;
 324    }
 325
 326    /* Note: Any setup of ->ir.nir that has had pipe->create_*_state called on
 327     * it has resulted in the driver taking ownership of the NIR.  Those
 328     * callers should be NULLing out the nir field in any pipe_shader_state
 329     * that might have this called in order to indicate that.
 330     *
 331     * GLSL IR and ARB programs will have set gl_program->nir to the same
 332     * shader as ir->ir.nir, so it will be freed by _mesa_delete_program().
 333     */
 334 }
 335
 336 void
 337 st_finalize_nir_before_variants(struct nir_shader *nir)
 338 {
 339    NIR_PASS_V(nir, nir_opt_access);
 340
 341    NIR_PASS_V(nir, nir_split_var_copies);
 342    NIR_PASS_V(nir, nir_lower_var_copies);
 343    if (nir->options->lower_all_io_to_temps ||
 344        nir->options->lower_all_io_to_elements ||
 345        nir->info.stage == MESA_SHADER_VERTEX ||
 346        nir->info.stage == MESA_SHADER_GEOMETRY) {
 347       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
 348    } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
 349       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
 350    }
 351
 352    st_nir_assign_vs_in_locations(nir);
 353 }
 354
 355 /**
 356  * Translate ARB (asm) program to NIR
 357  */
 358 static nir_shader *
 359 st_translate_prog_to_nir(struct st_context *st, struct gl_program *prog,
 360                          gl_shader_stage stage)
 361 {
 362    struct pipe_screen *screen = st->pipe->screen;
 363    const struct gl_shader_compiler_options *options =
 364       &st->ctx->Const.ShaderCompilerOptions[stage];
 365
 366    /* Translate to NIR */
 367    nir_shader *nir = prog_to_nir(prog, options->NirOptions);
 368    NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
 369    nir_validate_shader(nir, "after st/ptn lower_regs_to_ssa");
 370
 371    NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, prog, screen);
 372    NIR_PASS_V(nir, nir_lower_system_values);
 373
 374    /* Optimise NIR */
 375    NIR_PASS_V(nir, nir_opt_constant_folding);
 376    st_nir_opts(nir);
 377    st_finalize_nir_before_variants(nir);
 378
 379    if (st->allow_st_finalize_nir_twice)
 380       st_finalize_nir(st, prog, NULL, nir, true);
 381
 382    nir_validate_shader(nir, "after st/glsl finalize_nir");
 383
 384    return nir;
 385 }
 386
 387 void
 388 st_prepare_vertex_program(struct st_program *stp)
 389 {
 390    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 391
 392    stvp->num_inputs = 0;
 393    memset(stvp->input_to_index, ~0, sizeof(stvp->input_to_index));
 394    memset(stvp->result_to_output, ~0, sizeof(stvp->result_to_output));
 395
 396    /* Determine number of inputs, the mappings between VERT_ATTRIB_x
 397     * and TGSI generic input indexes, plus input attrib semantic info.
 398     */
 399    for (unsigned attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
 400       if ((stp->Base.info.inputs_read & BITFIELD64_BIT(attr)) != 0) {
 401          stvp->input_to_index[attr] = stvp->num_inputs;
 402          stvp->index_to_input[stvp->num_inputs] = attr;
 403          stvp->num_inputs++;
 404
 405          if ((stp->Base.DualSlotInputs & BITFIELD64_BIT(attr)) != 0) {
 406             /* add placeholder for second part of a double attribute */
 407             stvp->index_to_input[stvp->num_inputs] = ST_DOUBLE_ATTRIB_PLACEHOLDER;
 408             stvp->num_inputs++;
 409          }
 410       }
 411    }
 412    /* pre-setup potentially unused edgeflag input */
 413    stvp->input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
 414    stvp->index_to_input[stvp->num_inputs] = VERT_ATTRIB_EDGEFLAG;
 415
 416    /* Compute mapping of vertex program outputs to slots. */
 417    unsigned num_outputs = 0;
 418    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 419       if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr))
 420          stvp->result_to_output[attr] = num_outputs++;
 421    }
 422    /* pre-setup potentially unused edgeflag output */
 423    stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs;
 424 }
 425
 426 void
 427 st_translate_stream_output_info(struct gl_program *prog)
 428 {
 429    struct gl_transform_feedback_info *info = prog->sh.LinkedTransformFeedback;
 430    if (!info)
 431       return;
 432
 433    /* Determine the (default) output register mapping for each output. */
 434    unsigned num_outputs = 0;
 435    ubyte output_mapping[VARYING_SLOT_TESS_MAX];
 436    memset(output_mapping, 0, sizeof(output_mapping));
 437
 438    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 439       if (prog->info.outputs_written & BITFIELD64_BIT(attr))
 440          output_mapping[attr] = num_outputs++;
 441    }
 442
 443    /* Translate stream output info. */
 444    struct pipe_stream_output_info *so_info =
 445       &((struct st_program*)prog)->state.stream_output;
 446
 447    for (unsigned i = 0; i < info->NumOutputs; i++) {
 448       so_info->output[i].register_index =
 449          output_mapping[info->Outputs[i].OutputRegister];
 450       so_info->output[i].start_component = info->Outputs[i].ComponentOffset;
 451       so_info->output[i].num_components = info->Outputs[i].NumComponents;
 452       so_info->output[i].output_buffer = info->Outputs[i].OutputBuffer;
 453       so_info->output[i].dst_offset = info->Outputs[i].DstOffset;
 454       so_info->output[i].stream = info->Outputs[i].StreamId;
 455    }
 456
 457    for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
 458       so_info->stride[i] = info->Buffers[i].Stride;
 459    }
 460    so_info->num_outputs = info->NumOutputs;
 461 }
 462
 463 /**
 464  * Translate a vertex program.
 465  */
 466 bool
 467 st_translate_vertex_program(struct st_context *st,
 468                             struct st_program *stp)
 469 {
 470    struct ureg_program *ureg;
 471    enum pipe_error error;
 472    unsigned num_outputs = 0;
 473    unsigned attr;
 474    ubyte output_semantic_name[VARYING_SLOT_MAX] = {0};
 475    ubyte output_semantic_index[VARYING_SLOT_MAX] = {0};
 476
 477    if (stp->Base.arb.IsPositionInvariant)
 478       _mesa_insert_mvp_code(st->ctx, &stp->Base);
 479
 480    st_prepare_vertex_program(stp);
 481
 482    /* ARB_vp: */
 483    if (!stp->glsl_to_tgsi) {
 484       _mesa_remove_output_reads(&stp->Base, PROGRAM_OUTPUT);
 485
 486       /* This determines which states will be updated when the assembly
 487        * shader is bound.
 488        */
 489       stp->affected_states = ST_NEW_VS_STATE |
 490                               ST_NEW_RASTERIZER |
 491                               ST_NEW_VERTEX_ARRAYS;
 492
 493       if (stp->Base.Parameters->NumParameters)
 494          stp->affected_states |= ST_NEW_VS_CONSTANTS;
 495
 496       /* Translate to NIR if preferred. */
 497       if (st->pipe->screen->get_shader_param(st->pipe->screen,
 498                                              PIPE_SHADER_VERTEX,
 499                                              PIPE_SHADER_CAP_PREFERRED_IR)) {
 500          assert(!stp->glsl_to_tgsi);
 501
 502          if (stp->Base.nir)
 503             ralloc_free(stp->Base.nir);
 504
 505          stp->state.type = PIPE_SHADER_IR_NIR;
 506          stp->Base.nir = st_translate_prog_to_nir(st, &stp->Base,
 507                                                   MESA_SHADER_VERTEX);
 508          /* For st_draw_feedback, we need to generate TGSI too if draw doesn't
 509           * use LLVM.
 510           */
 511          if (draw_has_llvm())
 512             return true;
 513       }
 514    }
 515
 516    /* Get semantic names and indices. */
 517    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 518       if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr)) {
 519          unsigned slot = num_outputs++;
 520          unsigned semantic_name, semantic_index;
 521          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
 522                                       &semantic_name, &semantic_index);
 523          output_semantic_name[slot] = semantic_name;
 524          output_semantic_index[slot] = semantic_index;
 525       }
 526    }
 527    /* pre-setup potentially unused edgeflag output */
 528    output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
 529    output_semantic_index[num_outputs] = 0;
 530
 531    ureg = ureg_create_with_screen(PIPE_SHADER_VERTEX, st->pipe->screen);
 532    if (ureg == NULL)
 533       return false;
 534
 535    if (stp->Base.info.clip_distance_array_size)
 536       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
 537                     stp->Base.info.clip_distance_array_size);
 538    if (stp->Base.info.cull_distance_array_size)
 539       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
 540                     stp->Base.info.cull_distance_array_size);
 541
 542    if (ST_DEBUG & DEBUG_MESA) {
 543       _mesa_print_program(&stp->Base);
 544       _mesa_print_program_parameters(st->ctx, &stp->Base);
 545       debug_printf("\n");
 546    }
 547
 548    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 549
 550    if (stp->glsl_to_tgsi) {
 551       error = st_translate_program(st->ctx,
 552                                    PIPE_SHADER_VERTEX,
 553                                    ureg,
 554                                    stp->glsl_to_tgsi,
 555                                    &stp->Base,
 556                                    /* inputs */
 557                                    stvp->num_inputs,
 558                                    stvp->input_to_index,
 559                                    NULL, /* inputSlotToAttr */
 560                                    NULL, /* input semantic name */
 561                                    NULL, /* input semantic index */
 562                                    NULL, /* interp mode */
 563                                    /* outputs */
 564                                    num_outputs,
 565                                    stvp->result_to_output,
 566                                    output_semantic_name,
 567                                    output_semantic_index);
 568
 569       st_translate_stream_output_info(&stp->Base);
 570
 571       free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
 572    } else
 573       error = st_translate_mesa_program(st->ctx,
 574                                         PIPE_SHADER_VERTEX,
 575                                         ureg,
 576                                         &stp->Base,
 577                                         /* inputs */
 578                                         stvp->num_inputs,
 579                                         stvp->input_to_index,
 580                                         NULL, /* input semantic name */
 581                                         NULL, /* input semantic index */
 582                                         NULL,
 583                                         /* outputs */
 584                                         num_outputs,
 585                                         stvp->result_to_output,
 586                                         output_semantic_name,
 587                                         output_semantic_index);
 588
 589    if (error) {
 590       debug_printf("%s: failed to translate Mesa program:\n", __func__);
 591       _mesa_print_program(&stp->Base);
 592       debug_assert(0);
 593       return false;
 594    }
 595
 596    stp->state.tokens = ureg_get_tokens(ureg, NULL);
 597    ureg_destroy(ureg);
 598
 599    if (stp->glsl_to_tgsi) {
 600       stp->glsl_to_tgsi = NULL;
 601       st_store_ir_in_disk_cache(st, &stp->Base, false);
 602    }
 603
 604    return stp->state.tokens != NULL;
 605 }
 606
 607 static const gl_state_index16 depth_range_state[STATE_LENGTH] =
 608    { STATE_DEPTH_RANGE };
 609
 610 static struct st_common_variant *
 611 st_create_vp_variant(struct st_context *st,
 612                      struct st_program *stvp,
 613                      const struct st_common_variant_key *key)
 614 {
 615    struct st_common_variant *vpv = CALLOC_STRUCT(st_common_variant);
 616    struct pipe_context *pipe = st->pipe;
 617    struct pipe_screen *screen = pipe->screen;
 618    struct pipe_shader_state state = {0};
 619
 620    static const gl_state_index16 point_size_state[STATE_LENGTH] =
 621       { STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED, 0 };
 622    struct gl_program_parameter_list *params = stvp->Base.Parameters;
 623
 624    vpv->key = *key;
 625
 626    state.stream_output = stvp->state.stream_output;
 627
 628    if (stvp->state.type == PIPE_SHADER_IR_NIR &&
 629        (!key->is_draw_shader || draw_has_llvm())) {
 630       bool finalize = false;
 631
 632       state.type = PIPE_SHADER_IR_NIR;
 633       state.ir.nir = nir_shader_clone(NULL, stvp->Base.nir);
 634       if (key->clamp_color) {
 635          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
 636          finalize = true;
 637       }
 638       if (key->passthrough_edgeflags) {
 639          NIR_PASS_V(state.ir.nir, nir_lower_passthrough_edgeflags);
 640          finalize = true;
 641       }
 642
 643       if (key->lower_point_size) {
 644          _mesa_add_state_reference(params, point_size_state);
 645          NIR_PASS_V(state.ir.nir, nir_lower_point_size_mov,
 646                     point_size_state);
 647          finalize = true;
 648       }
 649
 650       if (key->lower_ucp) {
 651          bool can_compact = screen->get_param(screen,
 652                                               PIPE_CAP_NIR_COMPACT_ARRAYS);
 653
 654          bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;
 655          gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH];
 656          for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
 657             if (use_eye) {
 658                clipplane_state[i][0] = STATE_CLIPPLANE;
 659                clipplane_state[i][1] = i;
 660             } else {
 661                clipplane_state[i][0] = STATE_INTERNAL;
 662                clipplane_state[i][1] = STATE_CLIP_INTERNAL;
 663                clipplane_state[i][2] = i;
 664             }
 665             _mesa_add_state_reference(params, clipplane_state[i]);
 666          }
 667
 668          NIR_PASS_V(state.ir.nir, nir_lower_clip_vs, key->lower_ucp,
 669                     true, can_compact, clipplane_state);
 670          NIR_PASS_V(state.ir.nir, nir_lower_io_to_temporaries,
 671                     nir_shader_get_entrypoint(state.ir.nir), true, false);
 672          NIR_PASS_V(state.ir.nir, nir_lower_global_vars_to_local);
 673          finalize = true;
 674       }
 675
 676       if (finalize || !st->allow_st_finalize_nir_twice) {
 677          st_finalize_nir(st, &stvp->Base, stvp->shader_program, state.ir.nir,
 678                          true);
 679
 680          /* Some of the lowering above may have introduced new varyings */
 681          nir_shader_gather_info(state.ir.nir,
 682                                 nir_shader_get_entrypoint(state.ir.nir));
 683       }
 684
 685       if (ST_DEBUG & DEBUG_PRINT_IR)
 686          nir_print_shader(state.ir.nir, stderr);
 687
 688       if (key->is_draw_shader)
 689          vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
 690       else
 691          vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
 692
 693       return vpv;
 694    }
 695
 696    state.type = PIPE_SHADER_IR_TGSI;
 697    state.tokens = tgsi_dup_tokens(stvp->state.tokens);
 698
 699    /* Emulate features. */
 700    if (key->clamp_color || key->passthrough_edgeflags) {
 701       const struct tgsi_token *tokens;
 702       unsigned flags =
 703          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
 704          (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
 705
 706       tokens = tgsi_emulate(state.tokens, flags);
 707
 708       if (tokens) {
 709          tgsi_free_tokens(state.tokens);
 710          state.tokens = tokens;
 711       } else {
 712          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
 713       }
 714    }
 715
 716    if (key->lower_depth_clamp) {
 717       unsigned depth_range_const =
 718             _mesa_add_state_reference(params, depth_range_state);
 719
 720       const struct tgsi_token *tokens;
 721       tokens = st_tgsi_lower_depth_clamp(state.tokens, depth_range_const,
 722                                          key->clip_negative_one_to_one);
 723       if (tokens != state.tokens)
 724          tgsi_free_tokens(state.tokens);
 725       state.tokens = tokens;
 726    }
 727
 728    if (ST_DEBUG & DEBUG_PRINT_IR)
 729       tgsi_dump(state.tokens, 0);
 730
 731    if (key->is_draw_shader)
 732       vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
 733    else
 734       vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
 735
 736    if (state.tokens) {
 737       tgsi_free_tokens(state.tokens);
 738    }
 739
 740    return vpv;
 741 }
 742
 743
 744 /**
 745  * Find/create a vertex program variant.
 746  */
 747 struct st_common_variant *
 748 st_get_vp_variant(struct st_context *st,
 749                   struct st_program *stp,
 750                   const struct st_common_variant_key *key)
 751 {
 752    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 753    struct st_common_variant *vpv;
 754
 755    /* Search for existing variant */
 756    for (vpv = st_common_variant(stp->variants); vpv;
 757         vpv = st_common_variant(vpv->base.next)) {
 758       if (memcmp(&vpv->key, key, sizeof(*key)) == 0) {
 759          break;
 760       }
 761    }
 762
 763    if (!vpv) {
 764       /* create now */
 765       vpv = st_create_vp_variant(st, stp, key);
 766       if (vpv) {
 767          vpv->base.st = key->st;
 768
 769          unsigned num_inputs = stvp->num_inputs + key->passthrough_edgeflags;
 770          for (unsigned index = 0; index < num_inputs; ++index) {
 771             unsigned attr = stvp->index_to_input[index];
 772             if (attr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
 773                continue;
 774             vpv->vert_attrib_mask |= 1u << attr;
 775          }
 776
 777          /* insert into list */
 778          vpv->base.next = stp->variants;
 779          stp->variants = &vpv->base;
 780       }
 781    }
 782
 783    return vpv;
 784 }
 785
 786
 787 /**
 788  * Translate a Mesa fragment shader into a TGSI shader.
 789  */
 790 bool
 791 st_translate_fragment_program(struct st_context *st,
 792                               struct st_program *stfp)
 793 {
 794    /* Non-GLSL programs: */
 795    if (!stfp->glsl_to_tgsi) {
 796       _mesa_remove_output_reads(&stfp->Base, PROGRAM_OUTPUT);
 797       if (st->ctx->Const.GLSLFragCoordIsSysVal)
 798          _mesa_program_fragment_position_to_sysval(&stfp->Base);
 799
 800       /* This determines which states will be updated when the assembly
 801        * shader is bound.
 802        *
 803        * fragment.position and glDrawPixels always use constants.
 804        */
 805       stfp->affected_states = ST_NEW_FS_STATE |
 806                               ST_NEW_SAMPLE_SHADING |
 807                               ST_NEW_FS_CONSTANTS;
 808
 809       if (stfp->ati_fs) {
 810          /* Just set them for ATI_fs unconditionally. */
 811          stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 812                                   ST_NEW_FS_SAMPLERS;
 813       } else {
 814          /* ARB_fp */
 815          if (stfp->Base.SamplersUsed)
 816             stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 817                                      ST_NEW_FS_SAMPLERS;
 818       }
 819
 820       /* Translate to NIR. */
 821       if (!stfp->ati_fs &&
 822           st->pipe->screen->get_shader_param(st->pipe->screen,
 823                                              PIPE_SHADER_FRAGMENT,
 824                                              PIPE_SHADER_CAP_PREFERRED_IR)) {
 825          nir_shader *nir =
 826             st_translate_prog_to_nir(st, &stfp->Base, MESA_SHADER_FRAGMENT);
 827
 828          if (stfp->Base.nir)
 829             ralloc_free(stfp->Base.nir);
 830          stfp->state.type = PIPE_SHADER_IR_NIR;
 831          stfp->Base.nir = nir;
 832          return true;
 833       }
 834    }
 835
 836    ubyte outputMapping[2 * FRAG_RESULT_MAX];
 837    ubyte inputMapping[VARYING_SLOT_MAX];
 838    ubyte inputSlotToAttr[VARYING_SLOT_MAX];
 839    ubyte interpMode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
 840    GLuint attr;
 841    GLbitfield64 inputsRead;
 842    struct ureg_program *ureg;
 843
 844    GLboolean write_all = GL_FALSE;
 845
 846    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
 847    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
 848    uint fs_num_inputs = 0;
 849
 850    ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
 851    ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
 852    uint fs_num_outputs = 0;
 853
 854    memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
 855
 856    /*
 857     * Convert Mesa program inputs to TGSI input register semantics.
 858     */
 859    inputsRead = stfp->Base.info.inputs_read;
 860    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 861       if ((inputsRead & BITFIELD64_BIT(attr)) != 0) {
 862          const GLuint slot = fs_num_inputs++;
 863
 864          inputMapping[attr] = slot;
 865          inputSlotToAttr[slot] = attr;
 866
 867          switch (attr) {
 868          case VARYING_SLOT_POS:
 869             input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
 870             input_semantic_index[slot] = 0;
 871             interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 872             break;
 873          case VARYING_SLOT_COL0:
 874             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 875             input_semantic_index[slot] = 0;
 876             interpMode[slot] = stfp->glsl_to_tgsi ?
 877                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 878             break;
 879          case VARYING_SLOT_COL1:
 880             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 881             input_semantic_index[slot] = 1;
 882             interpMode[slot] = stfp->glsl_to_tgsi ?
 883                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 884             break;
 885          case VARYING_SLOT_FOGC:
 886             input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
 887             input_semantic_index[slot] = 0;
 888             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 889             break;
 890          case VARYING_SLOT_FACE:
 891             input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
 892             input_semantic_index[slot] = 0;
 893             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 894             break;
 895          case VARYING_SLOT_PRIMITIVE_ID:
 896             input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
 897             input_semantic_index[slot] = 0;
 898             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 899             break;
 900          case VARYING_SLOT_LAYER:
 901             input_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
 902             input_semantic_index[slot] = 0;
 903             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 904             break;
 905          case VARYING_SLOT_VIEWPORT:
 906             input_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
 907             input_semantic_index[slot] = 0;
 908             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 909             break;
 910          case VARYING_SLOT_CLIP_DIST0:
 911             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
 912             input_semantic_index[slot] = 0;
 913             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 914             break;
 915          case VARYING_SLOT_CLIP_DIST1:
 916             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
 917             input_semantic_index[slot] = 1;
 918             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 919             break;
 920          case VARYING_SLOT_CULL_DIST0:
 921          case VARYING_SLOT_CULL_DIST1:
 922             /* these should have been lowered by GLSL */
 923             assert(0);
 924             break;
 925             /* In most cases, there is nothing special about these
 926              * inputs, so adopt a convention to use the generic
 927              * semantic name and the mesa VARYING_SLOT_ number as the
 928              * index.
 929              *
 930              * All that is required is that the vertex shader labels
 931              * its own outputs similarly, and that the vertex shader
 932              * generates at least every output required by the
 933              * fragment shader plus fixed-function hardware (such as
 934              * BFC).
 935              *
 936              * However, some drivers may need us to identify the PNTC and TEXi
 937              * varyings if, for example, their capability to replace them with
 938              * sprite coordinates is limited.
 939              */
 940          case VARYING_SLOT_PNTC:
 941             if (st->needs_texcoord_semantic) {
 942                input_semantic_name[slot] = TGSI_SEMANTIC_PCOORD;
 943                input_semantic_index[slot] = 0;
 944                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 945                break;
 946             }
 947             /* fall through */
 948          case VARYING_SLOT_TEX0:
 949          case VARYING_SLOT_TEX1:
 950          case VARYING_SLOT_TEX2:
 951          case VARYING_SLOT_TEX3:
 952          case VARYING_SLOT_TEX4:
 953          case VARYING_SLOT_TEX5:
 954          case VARYING_SLOT_TEX6:
 955          case VARYING_SLOT_TEX7:
 956             if (st->needs_texcoord_semantic) {
 957                input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
 958                input_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
 959                interpMode[slot] = stfp->glsl_to_tgsi ?
 960                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
 961                break;
 962             }
 963             /* fall through */
 964          case VARYING_SLOT_VAR0:
 965          default:
 966             /* Semantic indices should be zero-based because drivers may choose
 967              * to assign a fixed slot determined by that index.
 968              * This is useful because ARB_separate_shader_objects uses location
 969              * qualifiers for linkage, and if the semantic index corresponds to
 970              * these locations, linkage passes in the driver become unecessary.
 971              *
 972              * If needs_texcoord_semantic is true, no semantic indices will be
 973              * consumed for the TEXi varyings, and we can base the locations of
 974              * the user varyings on VAR0.  Otherwise, we use TEX0 as base index.
 975              */
 976             assert(attr >= VARYING_SLOT_VAR0 || attr == VARYING_SLOT_PNTC ||
 977                    (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
 978             input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
 979             input_semantic_index[slot] = st_get_generic_varying_index(st, attr);
 980             if (attr == VARYING_SLOT_PNTC)
 981                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 982             else {
 983                interpMode[slot] = stfp->glsl_to_tgsi ?
 984                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
 985             }
 986             break;
 987          }
 988       }
 989       else {
 990          inputMapping[attr] = -1;
 991       }
 992    }
 993
 994    /*
 995     * Semantics and mapping for outputs
 996     */
 997    GLbitfield64 outputsWritten = stfp->Base.info.outputs_written;
 998
 999    /* if z is written, emit that first */
1000    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
1001       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
1002       fs_output_semantic_index[fs_num_outputs] = 0;
1003       outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
1004       fs_num_outputs++;
1005       outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
1006    }
1007
1008    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
1009       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
1010       fs_output_semantic_index[fs_num_outputs] = 0;
1011       outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
1012       fs_num_outputs++;
1013       outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
1014    }
1015
1016    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
1017       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_SAMPLEMASK;
1018       fs_output_semantic_index[fs_num_outputs] = 0;
1019       outputMapping[FRAG_RESULT_SAMPLE_MASK] = fs_num_outputs;
1020       fs_num_outputs++;
1021       outputsWritten &= ~(1 << FRAG_RESULT_SAMPLE_MASK);
1022    }
1023
1024    /* handle remaining outputs (color) */
1025    for (attr = 0; attr < ARRAY_SIZE(outputMapping); attr++) {
1026       const GLbitfield64 written = attr < FRAG_RESULT_MAX ? outputsWritten :
1027          stfp->Base.SecondaryOutputsWritten;
1028       const unsigned loc = attr % FRAG_RESULT_MAX;
1029
1030       if (written & BITFIELD64_BIT(loc)) {
1031          switch (loc) {
1032          case FRAG_RESULT_DEPTH:
1033          case FRAG_RESULT_STENCIL:
1034          case FRAG_RESULT_SAMPLE_MASK:
1035             /* handled above */
1036             assert(0);
1037             break;
1038          case FRAG_RESULT_COLOR:
1039             write_all = GL_TRUE; /* fallthrough */
1040          default: {
1041             int index;
1042             assert(loc == FRAG_RESULT_COLOR ||
1043                    (FRAG_RESULT_DATA0 <= loc && loc < FRAG_RESULT_MAX));
1044
1045             index = (loc == FRAG_RESULT_COLOR) ? 0 : (loc - FRAG_RESULT_DATA0);
1046
1047             if (attr >= FRAG_RESULT_MAX) {
1048                /* Secondary color for dual source blending. */
1049                assert(index == 0);
1050                index++;
1051             }
1052
1053             fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
1054             fs_output_semantic_index[fs_num_outputs] = index;
1055             outputMapping[attr] = fs_num_outputs;
1056             break;
1057          }
1058          }
1059
1060          fs_num_outputs++;
1061       }
1062    }
1063
1064    ureg = ureg_create_with_screen(PIPE_SHADER_FRAGMENT, st->pipe->screen);
1065    if (ureg == NULL)
1066       return false;
1067
1068    if (ST_DEBUG & DEBUG_MESA) {
1069       _mesa_print_program(&stfp->Base);
1070       _mesa_print_program_parameters(st->ctx, &stfp->Base);
1071       debug_printf("\n");
1072    }
1073    if (write_all == GL_TRUE)
1074       ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1);
1075
1076    if (stfp->Base.info.fs.depth_layout != FRAG_DEPTH_LAYOUT_NONE) {
1077       switch (stfp->Base.info.fs.depth_layout) {
1078       case FRAG_DEPTH_LAYOUT_ANY:
1079          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1080                        TGSI_FS_DEPTH_LAYOUT_ANY);
1081          break;
1082       case FRAG_DEPTH_LAYOUT_GREATER:
1083          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1084                        TGSI_FS_DEPTH_LAYOUT_GREATER);
1085          break;
1086       case FRAG_DEPTH_LAYOUT_LESS:
1087          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1088                        TGSI_FS_DEPTH_LAYOUT_LESS);
1089          break;
1090       case FRAG_DEPTH_LAYOUT_UNCHANGED:
1091          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1092                        TGSI_FS_DEPTH_LAYOUT_UNCHANGED);
1093          break;
1094       default:
1095          assert(0);
1096       }
1097    }
1098
1099    if (stfp->glsl_to_tgsi) {
1100       st_translate_program(st->ctx,
1101                            PIPE_SHADER_FRAGMENT,
1102                            ureg,
1103                            stfp->glsl_to_tgsi,
1104                            &stfp->Base,
1105                            /* inputs */
1106                            fs_num_inputs,
1107                            inputMapping,
1108                            inputSlotToAttr,
1109                            input_semantic_name,
1110                            input_semantic_index,
1111                            interpMode,
1112                            /* outputs */
1113                            fs_num_outputs,
1114                            outputMapping,
1115                            fs_output_semantic_name,
1116                            fs_output_semantic_index);
1117
1118       free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
1119    } else if (stfp->ati_fs)
1120       st_translate_atifs_program(ureg,
1121                                  stfp->ati_fs,
1122                                  &stfp->Base,
1123                                  /* inputs */
1124                                  fs_num_inputs,
1125                                  inputMapping,
1126                                  input_semantic_name,
1127                                  input_semantic_index,
1128                                  interpMode,
1129                                  /* outputs */
1130                                  fs_num_outputs,
1131                                  outputMapping,
1132                                  fs_output_semantic_name,
1133                                  fs_output_semantic_index);
1134    else
1135       st_translate_mesa_program(st->ctx,
1136                                 PIPE_SHADER_FRAGMENT,
1137                                 ureg,
1138                                 &stfp->Base,
1139                                 /* inputs */
1140                                 fs_num_inputs,
1141                                 inputMapping,
1142                                 input_semantic_name,
1143                                 input_semantic_index,
1144                                 interpMode,
1145                                 /* outputs */
1146                                 fs_num_outputs,
1147                                 outputMapping,
1148                                 fs_output_semantic_name,
1149                                 fs_output_semantic_index);
1150
1151    stfp->state.tokens = ureg_get_tokens(ureg, NULL);
1152    ureg_destroy(ureg);
1153
1154    if (stfp->glsl_to_tgsi) {
1155       stfp->glsl_to_tgsi = NULL;
1156       st_store_ir_in_disk_cache(st, &stfp->Base, false);
1157    }
1158
1159    return stfp->state.tokens != NULL;
1160 }
1161
1162 static struct st_fp_variant *
1163 st_create_fp_variant(struct st_context *st,
1164                      struct st_program *stfp,
1165                      const struct st_fp_variant_key *key)
1166 {
1167    struct pipe_context *pipe = st->pipe;
1168    struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
1169    struct pipe_shader_state state = {0};
1170    struct gl_program_parameter_list *params = stfp->Base.Parameters;
1171    static const gl_state_index16 texcoord_state[STATE_LENGTH] =
1172       { STATE_INTERNAL, STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
1173    static const gl_state_index16 scale_state[STATE_LENGTH] =
1174       { STATE_INTERNAL, STATE_PT_SCALE };
1175    static const gl_state_index16 bias_state[STATE_LENGTH] =
1176       { STATE_INTERNAL, STATE_PT_BIAS };
1177    static const gl_state_index16 alpha_ref_state[STATE_LENGTH] =
1178       { STATE_INTERNAL, STATE_ALPHA_REF };
1179
1180    if (!variant)
1181       return NULL;
1182
1183    if (stfp->state.type == PIPE_SHADER_IR_NIR) {
1184       bool finalize = false;
1185
1186       state.type = PIPE_SHADER_IR_NIR;
1187       state.ir.nir = nir_shader_clone(NULL, stfp->Base.nir);
1188
1189       if (key->clamp_color) {
1190          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1191          finalize = true;
1192       }
1193
1194       if (key->lower_flatshade) {
1195          NIR_PASS_V(state.ir.nir, nir_lower_flatshade);
1196          finalize = true;
1197       }
1198
1199       if (key->lower_alpha_func != COMPARE_FUNC_NEVER) {
1200          _mesa_add_state_reference(params, alpha_ref_state);
1201          NIR_PASS_V(state.ir.nir, nir_lower_alpha_test, key->lower_alpha_func,
1202                     false, alpha_ref_state);
1203          finalize = true;
1204       }
1205
1206       if (key->lower_two_sided_color) {
1207          NIR_PASS_V(state.ir.nir, nir_lower_two_sided_color);
1208          finalize = true;
1209       }
1210
1211       if (key->persample_shading) {
1212           nir_shader *shader = state.ir.nir;
1213           nir_foreach_variable(var, &shader->inputs)
1214              var->data.sample = true;
1215           finalize = true;
1216       }
1217
1218       assert(!(key->bitmap && key->drawpixels));
1219
1220       /* glBitmap */
1221       if (key->bitmap) {
1222          nir_lower_bitmap_options options = {0};
1223
1224          variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1225          options.sampler = variant->bitmap_sampler;
1226          options.swizzle_xxxx = st->bitmap.tex_format == PIPE_FORMAT_R8_UNORM;
1227
1228          NIR_PASS_V(state.ir.nir, nir_lower_bitmap, &options);
1229          finalize = true;
1230       }
1231
1232       /* glDrawPixels (color only) */
1233       if (key->drawpixels) {
1234          nir_lower_drawpixels_options options = {{0}};
1235          unsigned samplers_used = stfp->Base.SamplersUsed;
1236
1237          /* Find the first unused slot. */
1238          variant->drawpix_sampler = ffs(~samplers_used) - 1;
1239          options.drawpix_sampler = variant->drawpix_sampler;
1240          samplers_used |= (1 << variant->drawpix_sampler);
1241
1242          options.pixel_maps = key->pixelMaps;
1243          if (key->pixelMaps) {
1244             variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1245             options.pixelmap_sampler = variant->pixelmap_sampler;
1246          }
1247
1248          options.scale_and_bias = key->scaleAndBias;
1249          if (key->scaleAndBias) {
1250             _mesa_add_state_reference(params, scale_state);
1251             memcpy(options.scale_state_tokens, scale_state,
1252                    sizeof(options.scale_state_tokens));
1253             _mesa_add_state_reference(params, bias_state);
1254             memcpy(options.bias_state_tokens, bias_state,
1255                    sizeof(options.bias_state_tokens));
1256          }
1257
1258          _mesa_add_state_reference(params, texcoord_state);
1259          memcpy(options.texcoord_state_tokens, texcoord_state,
1260                 sizeof(options.texcoord_state_tokens));
1261
1262          NIR_PASS_V(state.ir.nir, nir_lower_drawpixels, &options);
1263          finalize = true;
1264       }
1265
1266       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1267                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1268                    key->external.lower_ayuv || key->external.lower_xyuv)) {
1269
1270          st_nir_lower_samplers(pipe->screen, state.ir.nir,
1271                                stfp->shader_program, &stfp->Base);
1272
1273          nir_lower_tex_options options = {0};
1274          options.lower_y_uv_external = key->external.lower_nv12;
1275          options.lower_y_u_v_external = key->external.lower_iyuv;
1276          options.lower_xy_uxvx_external = key->external.lower_xy_uxvx;
1277          options.lower_yx_xuxv_external = key->external.lower_yx_xuxv;
1278          options.lower_ayuv_external = key->external.lower_ayuv;
1279          options.lower_xyuv_external = key->external.lower_xyuv;
1280          NIR_PASS_V(state.ir.nir, nir_lower_tex, &options);
1281          finalize = true;
1282       }
1283
1284       if (finalize || !st->allow_st_finalize_nir_twice) {
1285          st_finalize_nir(st, &stfp->Base, stfp->shader_program, state.ir.nir,
1286                          false);
1287       }
1288
1289       /* This pass needs to happen *after* nir_lower_sampler */
1290       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1291                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1292                    key->external.lower_ayuv || key->external.lower_xyuv)) {
1293          NIR_PASS_V(state.ir.nir, st_nir_lower_tex_src_plane,
1294                     ~stfp->Base.SamplersUsed,
1295                     key->external.lower_nv12 || key->external.lower_xy_uxvx ||
1296                        key->external.lower_yx_xuxv,
1297                     key->external.lower_iyuv);
1298          finalize = true;
1299       }
1300
1301       if (finalize || !st->allow_st_finalize_nir_twice) {
1302          /* Some of the lowering above may have introduced new varyings */
1303          nir_shader_gather_info(state.ir.nir,
1304                                 nir_shader_get_entrypoint(state.ir.nir));
1305
1306          struct pipe_screen *screen = pipe->screen;
1307          if (screen->finalize_nir)
1308             screen->finalize_nir(screen, state.ir.nir, false);
1309       }
1310
1311       if (ST_DEBUG & DEBUG_PRINT_IR)
1312          nir_print_shader(state.ir.nir, stderr);
1313
1314       variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1315       variant->key = *key;
1316
1317       return variant;
1318    }
1319
1320    state.tokens = stfp->state.tokens;
1321
1322    assert(!(key->bitmap && key->drawpixels));
1323
1324    /* Fix texture targets and add fog for ATI_fs */
1325    if (stfp->ati_fs) {
1326       const struct tgsi_token *tokens = st_fixup_atifs(state.tokens, key);
1327
1328       if (tokens)
1329          state.tokens = tokens;
1330       else
1331          fprintf(stderr, "mesa: cannot post-process ATI_fs\n");
1332    }
1333
1334    /* Emulate features. */
1335    if (key->clamp_color || key->persample_shading) {
1336       const struct tgsi_token *tokens;
1337       unsigned flags =
1338          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
1339          (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
1340
1341       tokens = tgsi_emulate(state.tokens, flags);
1342
1343       if (tokens) {
1344          if (state.tokens != stfp->state.tokens)
1345             tgsi_free_tokens(state.tokens);
1346          state.tokens = tokens;
1347       } else
1348          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
1349    }
1350
1351    /* glBitmap */
1352    if (key->bitmap) {
1353       const struct tgsi_token *tokens;
1354
1355       variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1356
1357       tokens = st_get_bitmap_shader(state.tokens,
1358                                     st->internal_target,
1359                                     variant->bitmap_sampler,
1360                                     st->needs_texcoord_semantic,
1361                                     st->bitmap.tex_format ==
1362                                     PIPE_FORMAT_R8_UNORM);
1363
1364       if (tokens) {
1365          if (state.tokens != stfp->state.tokens)
1366             tgsi_free_tokens(state.tokens);
1367          state.tokens = tokens;
1368       } else
1369          fprintf(stderr, "mesa: cannot create a shader for glBitmap\n");
1370    }
1371
1372    /* glDrawPixels (color only) */
1373    if (key->drawpixels) {
1374       const struct tgsi_token *tokens;
1375       unsigned scale_const = 0, bias_const = 0, texcoord_const = 0;
1376
1377       /* Find the first unused slot. */
1378       variant->drawpix_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1379
1380       if (key->pixelMaps) {
1381          unsigned samplers_used = stfp->Base.SamplersUsed |
1382                                   (1 << variant->drawpix_sampler);
1383
1384          variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1385       }
1386
1387       if (key->scaleAndBias) {
1388          scale_const = _mesa_add_state_reference(params, scale_state);
1389          bias_const = _mesa_add_state_reference(params, bias_state);
1390       }
1391
1392       texcoord_const = _mesa_add_state_reference(params, texcoord_state);
1393
1394       tokens = st_get_drawpix_shader(state.tokens,
1395                                      st->needs_texcoord_semantic,
1396                                      key->scaleAndBias, scale_const,
1397                                      bias_const, key->pixelMaps,
1398                                      variant->drawpix_sampler,
1399                                      variant->pixelmap_sampler,
1400                                      texcoord_const, st->internal_target);
1401
1402       if (tokens) {
1403          if (state.tokens != stfp->state.tokens)
1404             tgsi_free_tokens(state.tokens);
1405          state.tokens = tokens;
1406       } else
1407          fprintf(stderr, "mesa: cannot create a shader for glDrawPixels\n");
1408    }
1409
1410    if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1411                 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1412       const struct tgsi_token *tokens;
1413
1414       /* samplers inserted would conflict, but this should be unpossible: */
1415       assert(!(key->bitmap || key->drawpixels));
1416
1417       tokens = st_tgsi_lower_yuv(state.tokens,
1418                                  ~stfp->Base.SamplersUsed,
1419                                  key->external.lower_nv12 ||
1420                                     key->external.lower_xy_uxvx ||
1421                                     key->external.lower_yx_xuxv,
1422                                  key->external.lower_iyuv);
1423       if (tokens) {
1424          if (state.tokens != stfp->state.tokens)
1425             tgsi_free_tokens(state.tokens);
1426          state.tokens = tokens;
1427       } else {
1428          fprintf(stderr, "mesa: cannot create a shader for samplerExternalOES\n");
1429       }
1430    }
1431
1432    if (key->lower_depth_clamp) {
1433       unsigned depth_range_const = _mesa_add_state_reference(params, depth_range_state);
1434
1435       const struct tgsi_token *tokens;
1436       tokens = st_tgsi_lower_depth_clamp_fs(state.tokens, depth_range_const);
1437       if (state.tokens != stfp->state.tokens)
1438          tgsi_free_tokens(state.tokens);
1439       state.tokens = tokens;
1440    }
1441
1442    if (ST_DEBUG & DEBUG_PRINT_IR)
1443       tgsi_dump(state.tokens, 0);
1444
1445    /* fill in variant */
1446    variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1447    variant->key = *key;
1448
1449    if (state.tokens != stfp->state.tokens)
1450       tgsi_free_tokens(state.tokens);
1451    return variant;
1452 }
1453
1454 /**
1455  * Translate fragment program if needed.
1456  */
1457 struct st_fp_variant *
1458 st_get_fp_variant(struct st_context *st,
1459                   struct st_program *stfp,
1460                   const struct st_fp_variant_key *key)
1461 {
1462    struct st_fp_variant *fpv;
1463
1464    /* Search for existing variant */
1465    for (fpv = st_fp_variant(stfp->variants); fpv;
1466         fpv = st_fp_variant(fpv->base.next)) {
1467       if (memcmp(&fpv->key, key, sizeof(*key)) == 0) {
1468          break;
1469       }
1470    }
1471
1472    if (!fpv) {
1473       /* create new */
1474       fpv = st_create_fp_variant(st, stfp, key);
1475       if (fpv) {
1476          fpv->base.st = key->st;
1477
1478          if (key->bitmap || key->drawpixels) {
1479             /* Regular variants should always come before the
1480              * bitmap & drawpixels variants, (unless there
1481              * are no regular variants) so that
1482              * st_update_fp can take a fast path when
1483              * shader_has_one_variant is set.
1484              */
1485             if (!stfp->variants) {
1486                stfp->variants = &fpv->base;
1487             } else {
1488                /* insert into list after the first one */
1489                fpv->base.next = stfp->variants->next;
1490                stfp->variants->next = &fpv->base;
1491             }
1492          } else {
1493             /* insert into list */
1494             fpv->base.next = stfp->variants;
1495             stfp->variants = &fpv->base;
1496          }
1497       }
1498    }
1499
1500    return fpv;
1501 }
1502
1503 /**
1504  * Translate a program. This is common code for geometry and tessellation
1505  * shaders.
1506  */
1507 bool
1508 st_translate_common_program(struct st_context *st,
1509                             struct st_program *stp)
1510 {
1511    struct gl_program *prog = &stp->Base;
1512    enum pipe_shader_type stage =
1513       pipe_shader_type_from_mesa(stp->Base.info.stage);
1514    struct ureg_program *ureg = ureg_create_with_screen(stage, st->pipe->screen);
1515
1516    if (ureg == NULL)
1517       return false;
1518
1519    switch (stage) {
1520    case PIPE_SHADER_TESS_CTRL:
1521       ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT,
1522                     stp->Base.info.tess.tcs_vertices_out);
1523       break;
1524
1525    case PIPE_SHADER_TESS_EVAL:
1526       if (stp->Base.info.tess.primitive_mode == GL_ISOLINES)
1527          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, GL_LINES);
1528       else
1529          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE,
1530                        stp->Base.info.tess.primitive_mode);
1531
1532       STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL);
1533       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 ==
1534                     PIPE_TESS_SPACING_FRACTIONAL_ODD);
1535       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 ==
1536                     PIPE_TESS_SPACING_FRACTIONAL_EVEN);
1537
1538       ureg_property(ureg, TGSI_PROPERTY_TES_SPACING,
1539                     (stp->Base.info.tess.spacing + 1) % 3);
1540
1541       ureg_property(ureg, TGSI_PROPERTY_TES_VERTEX_ORDER_CW,
1542                     !stp->Base.info.tess.ccw);
1543       ureg_property(ureg, TGSI_PROPERTY_TES_POINT_MODE,
1544                     stp->Base.info.tess.point_mode);
1545       break;
1546
1547    case PIPE_SHADER_GEOMETRY:
1548       ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM,
1549                     stp->Base.info.gs.input_primitive);
1550       ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM,
1551                     stp->Base.info.gs.output_primitive);
1552       ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
1553                     stp->Base.info.gs.vertices_out);
1554       ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS,
1555                     stp->Base.info.gs.invocations);
1556       break;
1557
1558    default:
1559       break;
1560    }
1561
1562    ubyte inputSlotToAttr[VARYING_SLOT_TESS_MAX];
1563    ubyte inputMapping[VARYING_SLOT_TESS_MAX];
1564    ubyte outputMapping[VARYING_SLOT_TESS_MAX];
1565    GLuint attr;
1566
1567    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
1568    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
1569    uint num_inputs = 0;
1570
1571    ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
1572    ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
1573    uint num_outputs = 0;
1574
1575    GLint i;
1576
1577    memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr));
1578    memset(inputMapping, 0, sizeof(inputMapping));
1579    memset(outputMapping, 0, sizeof(outputMapping));
1580    memset(&stp->state, 0, sizeof(stp->state));
1581
1582    if (prog->info.clip_distance_array_size)
1583       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
1584                     prog->info.clip_distance_array_size);
1585    if (prog->info.cull_distance_array_size)
1586       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
1587                     prog->info.cull_distance_array_size);
1588
1589    /*
1590     * Convert Mesa program inputs to TGSI input register semantics.
1591     */
1592    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1593       if ((prog->info.inputs_read & BITFIELD64_BIT(attr)) == 0)
1594          continue;
1595
1596       unsigned slot = num_inputs++;
1597
1598       inputMapping[attr] = slot;
1599       inputSlotToAttr[slot] = attr;
1600
1601       unsigned semantic_name, semantic_index;
1602       tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1603                                    &semantic_name, &semantic_index);
1604       input_semantic_name[slot] = semantic_name;
1605       input_semantic_index[slot] = semantic_index;
1606    }
1607
1608    /* Also add patch inputs. */
1609    for (attr = 0; attr < 32; attr++) {
1610       if (prog->info.patch_inputs_read & (1u << attr)) {
1611          GLuint slot = num_inputs++;
1612          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1613
1614          inputMapping[patch_attr] = slot;
1615          inputSlotToAttr[slot] = patch_attr;
1616          input_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1617          input_semantic_index[slot] = attr;
1618       }
1619    }
1620
1621    /* initialize output semantics to defaults */
1622    for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
1623       output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
1624       output_semantic_index[i] = 0;
1625    }
1626
1627    /*
1628     * Determine number of outputs, the (default) output register
1629     * mapping and the semantic information for each output.
1630     */
1631    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1632       if (prog->info.outputs_written & BITFIELD64_BIT(attr)) {
1633          GLuint slot = num_outputs++;
1634
1635          outputMapping[attr] = slot;
1636
1637          unsigned semantic_name, semantic_index;
1638          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1639                                       &semantic_name, &semantic_index);
1640          output_semantic_name[slot] = semantic_name;
1641          output_semantic_index[slot] = semantic_index;
1642       }
1643    }
1644
1645    /* Also add patch outputs. */
1646    for (attr = 0; attr < 32; attr++) {
1647       if (prog->info.patch_outputs_written & (1u << attr)) {
1648          GLuint slot = num_outputs++;
1649          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1650
1651          outputMapping[patch_attr] = slot;
1652          output_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1653          output_semantic_index[slot] = attr;
1654       }
1655    }
1656
1657    st_translate_program(st->ctx,
1658                         stage,
1659                         ureg,
1660                         stp->glsl_to_tgsi,
1661                         prog,
1662                         /* inputs */
1663                         num_inputs,
1664                         inputMapping,
1665                         inputSlotToAttr,
1666                         input_semantic_name,
1667                         input_semantic_index,
1668                         NULL,
1669                         /* outputs */
1670                         num_outputs,
1671                         outputMapping,
1672                         output_semantic_name,
1673                         output_semantic_index);
1674
1675    stp->state.tokens = ureg_get_tokens(ureg, NULL);
1676
1677    ureg_destroy(ureg);
1678
1679    st_translate_stream_output_info(prog);
1680
1681    st_store_ir_in_disk_cache(st, prog, false);
1682
1683    if (ST_DEBUG & DEBUG_PRINT_IR && ST_DEBUG & DEBUG_MESA)
1684       _mesa_print_program(prog);
1685
1686    free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
1687    stp->glsl_to_tgsi = NULL;
1688    return true;
1689 }
1690
1691
1692 /**
1693  * Get/create a basic program variant.
1694  */
1695 struct st_variant *
1696 st_get_common_variant(struct st_context *st,
1697                       struct st_program *prog,
1698                       const struct st_common_variant_key *key)
1699 {
1700    struct pipe_context *pipe = st->pipe;
1701    struct st_variant *v;
1702    struct pipe_shader_state state = {0};
1703
1704    /* Search for existing variant */
1705    for (v = prog->variants; v; v = v->next) {
1706       if (memcmp(&st_common_variant(v)->key, key, sizeof(*key)) == 0)
1707          break;
1708    }
1709
1710    if (!v) {
1711       /* create new */
1712       v = (struct st_variant*)CALLOC_STRUCT(st_common_variant);
1713       if (v) {
1714          if (prog->state.type == PIPE_SHADER_IR_NIR) {
1715             bool finalize = false;
1716
1717             state.type = PIPE_SHADER_IR_NIR;
1718             state.ir.nir = nir_shader_clone(NULL, prog->Base.nir);
1719
1720             if (key->clamp_color) {
1721                NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1722                finalize = true;
1723             }
1724
1725             state.stream_output = prog->state.stream_output;
1726
1727             if (finalize || !st->allow_st_finalize_nir_twice) {
1728                st_finalize_nir(st, &prog->Base, prog->shader_program,
1729                                state.ir.nir, true);
1730             }
1731
1732             if (ST_DEBUG & DEBUG_PRINT_IR)
1733                nir_print_shader(state.ir.nir, stderr);
1734          } else {
1735             if (key->lower_depth_clamp) {
1736                struct gl_program_parameter_list *params = prog->Base.Parameters;
1737
1738                unsigned depth_range_const =
1739                      _mesa_add_state_reference(params, depth_range_state);
1740
1741                const struct tgsi_token *tokens;
1742                tokens =
1743                      st_tgsi_lower_depth_clamp(prog->state.tokens,
1744                                                depth_range_const,
1745                                                key->clip_negative_one_to_one);
1746
1747                if (tokens != prog->state.tokens)
1748                   tgsi_free_tokens(prog->state.tokens);
1749
1750                prog->state.tokens = tokens;
1751             }
1752             state = prog->state;
1753
1754             if (ST_DEBUG & DEBUG_PRINT_IR)
1755                tgsi_dump(state.tokens, 0);
1756          }
1757          /* fill in new variant */
1758          switch (prog->Base.info.stage) {
1759          case MESA_SHADER_TESS_CTRL:
1760             v->driver_shader = pipe->create_tcs_state(pipe, &state);
1761             break;
1762          case MESA_SHADER_TESS_EVAL:
1763             v->driver_shader = pipe->create_tes_state(pipe, &state);
1764             break;
1765          case MESA_SHADER_GEOMETRY:
1766             v->driver_shader = pipe->create_gs_state(pipe, &state);
1767             break;
1768          case MESA_SHADER_COMPUTE: {
1769             struct pipe_compute_state cs = {0};
1770             cs.ir_type = state.type;
1771             cs.req_local_mem = prog->Base.info.cs.shared_size;
1772
1773             if (state.type == PIPE_SHADER_IR_NIR)
1774                cs.prog = state.ir.nir;
1775             else
1776                cs.prog = state.tokens;
1777
1778             v->driver_shader = pipe->create_compute_state(pipe, &cs);
1779             break;
1780          }
1781          default:
1782             assert(!"unhandled shader type");
1783             free(v);
1784             return NULL;
1785          }
1786
1787          st_common_variant(v)->key = *key;
1788          v->st = key->st;
1789
1790          /* insert into list */
1791          v->next = prog->variants;
1792          prog->variants = v;
1793       }
1794    }
1795
1796    return v;
1797 }
1798
1799
1800 /**
1801  * Vert/Geom/Frag programs have per-context variants.  Free all the
1802  * variants attached to the given program which match the given context.
1803  */
1804 static void
1805 destroy_program_variants(struct st_context *st, struct gl_program *target)
1806 {
1807    if (!target || target == &_mesa_DummyProgram)
1808       return;
1809
1810    struct st_program *p = st_program(target);
1811    struct st_variant *v, **prevPtr = &p->variants;
1812    bool unbound = false;
1813
1814    for (v = p->variants; v; ) {
1815       struct st_variant *next = v->next;
1816       if (v->st == st) {
1817          if (!unbound) {
1818             st_unbind_program(st, p);
1819             unbound = true;
1820          }
1821
1822          /* unlink from list */
1823          *prevPtr = next;
1824          /* destroy this variant */
1825          delete_variant(st, v, target->Target);
1826       }
1827       else {
1828          prevPtr = &v->next;
1829       }
1830       v = next;
1831    }
1832 }
1833
1834
1835 /**
1836  * Callback for _mesa_HashWalk.  Free all the shader's program variants
1837  * which match the given context.
1838  */
1839 static void
1840 destroy_shader_program_variants_cb(GLuint key, void *data, void *userData)
1841 {
1842    struct st_context *st = (struct st_context *) userData;
1843    struct gl_shader *shader = (struct gl_shader *) data;
1844
1845    switch (shader->Type) {
1846    case GL_SHADER_PROGRAM_MESA:
1847       {
1848          struct gl_shader_program *shProg = (struct gl_shader_program *) data;
1849          GLuint i;
1850
1851          for (i = 0; i < ARRAY_SIZE(shProg->_LinkedShaders); i++) {
1852             if (shProg->_LinkedShaders[i])
1853                destroy_program_variants(st, shProg->_LinkedShaders[i]->Program);
1854          }
1855       }
1856       break;
1857    case GL_VERTEX_SHADER:
1858    case GL_FRAGMENT_SHADER:
1859    case GL_GEOMETRY_SHADER:
1860    case GL_TESS_CONTROL_SHADER:
1861    case GL_TESS_EVALUATION_SHADER:
1862    case GL_COMPUTE_SHADER:
1863       break;
1864    default:
1865       assert(0);
1866    }
1867 }
1868
1869
1870 /**
1871  * Callback for _mesa_HashWalk.  Free all the program variants which match
1872  * the given context.
1873  */
1874 static void
1875 destroy_program_variants_cb(GLuint key, void *data, void *userData)
1876 {
1877    struct st_context *st = (struct st_context *) userData;
1878    struct gl_program *program = (struct gl_program *) data;
1879    destroy_program_variants(st, program);
1880 }
1881
1882
1883 /**
1884  * Walk over all shaders and programs to delete any variants which
1885  * belong to the given context.
1886  * This is called during context tear-down.
1887  */
1888 void
1889 st_destroy_program_variants(struct st_context *st)
1890 {
1891    /* If shaders can be shared with other contexts, the last context will
1892     * call DeleteProgram on all shaders, releasing everything.
1893     */
1894    if (st->has_shareable_shaders)
1895       return;
1896
1897    /* ARB vert/frag program */
1898    _mesa_HashWalk(st->ctx->Shared->Programs,
1899                   destroy_program_variants_cb, st);
1900
1901    /* GLSL vert/frag/geom shaders */
1902    _mesa_HashWalk(st->ctx->Shared->ShaderObjects,
1903                   destroy_shader_program_variants_cb, st);
1904 }
1905
1906
1907 /**
1908  * Compile one shader variant.
1909  */
1910 static void
1911 st_precompile_shader_variant(struct st_context *st,
1912                              struct gl_program *prog)
1913 {
1914    switch (prog->Target) {
1915    case GL_VERTEX_PROGRAM_ARB: {
1916       struct st_program *p = (struct st_program *)prog;
1917       struct st_common_variant_key key;
1918
1919       memset(&key, 0, sizeof(key));
1920
1921       key.st = st->has_shareable_shaders ? NULL : st;
1922       st_get_vp_variant(st, p, &key);
1923       break;
1924    }
1925
1926    case GL_FRAGMENT_PROGRAM_ARB: {
1927       struct st_program *p = (struct st_program *)prog;
1928       struct st_fp_variant_key key;
1929
1930       memset(&key, 0, sizeof(key));
1931
1932       key.st = st->has_shareable_shaders ? NULL : st;
1933       st_get_fp_variant(st, p, &key);
1934       break;
1935    }
1936
1937    case GL_TESS_CONTROL_PROGRAM_NV:
1938    case GL_TESS_EVALUATION_PROGRAM_NV:
1939    case GL_GEOMETRY_PROGRAM_NV:
1940    case GL_COMPUTE_PROGRAM_NV: {
1941       struct st_program *p = st_program(prog);
1942       struct st_common_variant_key key;
1943
1944       memset(&key, 0, sizeof(key));
1945
1946       key.st = st->has_shareable_shaders ? NULL : st;
1947       st_get_common_variant(st, p, &key);
1948       break;
1949    }
1950
1951    default:
1952       assert(0);
1953    }
1954 }
1955
1956 void
1957 st_finalize_program(struct st_context *st, struct gl_program *prog)
1958 {
1959    if (st->current_program[prog->info.stage] == prog) {
1960       if (prog->info.stage == MESA_SHADER_VERTEX)
1961          st->dirty |= ST_NEW_VERTEX_PROGRAM(st, (struct st_program *)prog);
1962       else
1963          st->dirty |= ((struct st_program *)prog)->affected_states;
1964    }
1965
1966    if (prog->nir)
1967       nir_sweep(prog->nir);
1968
1969    /* Create Gallium shaders now instead of on demand. */
1970    if (ST_DEBUG & DEBUG_PRECOMPILE ||
1971        st->shader_has_one_variant[prog->info.stage])
1972       st_precompile_shader_variant(st, prog);
1973 }