src/mesa/state_tracker/st_program.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   *   Brian Paul
  31   */
  32
  33
  34 #include "main/errors.h"
  35 #include "main/imports.h"
  36 #include "main/hash.h"
  37 #include "main/mtypes.h"
  38 #include "program/prog_parameter.h"
  39 #include "program/prog_print.h"
  40 #include "program/prog_to_nir.h"
  41 #include "program/programopt.h"
  42
  43 #include "compiler/nir/nir.h"
  44 #include "draw/draw_context.h"
  45
  46 #include "pipe/p_context.h"
  47 #include "pipe/p_defines.h"
  48 #include "pipe/p_shader_tokens.h"
  49 #include "draw/draw_context.h"
  50 #include "tgsi/tgsi_dump.h"
  51 #include "tgsi/tgsi_emulate.h"
  52 #include "tgsi/tgsi_parse.h"
  53 #include "tgsi/tgsi_ureg.h"
  54
  55 #include "st_debug.h"
  56 #include "st_cb_bitmap.h"
  57 #include "st_cb_drawpixels.h"
  58 #include "st_context.h"
  59 #include "st_tgsi_lower_depth_clamp.h"
  60 #include "st_tgsi_lower_yuv.h"
  61 #include "st_program.h"
  62 #include "st_mesa_to_tgsi.h"
  63 #include "st_atifs_to_tgsi.h"
  64 #include "st_nir.h"
  65 #include "st_shader_cache.h"
  66 #include "st_util.h"
  67 #include "cso_cache/cso_context.h"
  68
  69
  70
  71 static void
  72 set_affected_state_flags(uint64_t *states,
  73                          struct gl_program *prog,
  74                          uint64_t new_constants,
  75                          uint64_t new_sampler_views,
  76                          uint64_t new_samplers,
  77                          uint64_t new_images,
  78                          uint64_t new_ubos,
  79                          uint64_t new_ssbos,
  80                          uint64_t new_atomics)
  81 {
  82    if (prog->Parameters->NumParameters)
  83       *states |= new_constants;
  84
  85    if (prog->info.num_textures)
  86       *states |= new_sampler_views | new_samplers;
  87
  88    if (prog->info.num_images)
  89       *states |= new_images;
  90
  91    if (prog->info.num_ubos)
  92       *states |= new_ubos;
  93
  94    if (prog->info.num_ssbos)
  95       *states |= new_ssbos;
  96
  97    if (prog->info.num_abos)
  98       *states |= new_atomics;
  99 }
 100
 101 /**
 102  * This determines which states will be updated when the shader is bound.
 103  */
 104 void
 105 st_set_prog_affected_state_flags(struct gl_program *prog)
 106 {
 107    uint64_t *states;
 108
 109    switch (prog->info.stage) {
 110    case MESA_SHADER_VERTEX:
 111       states = &((struct st_program*)prog)->affected_states;
 112
 113       *states = ST_NEW_VS_STATE |
 114                 ST_NEW_RASTERIZER |
 115                 ST_NEW_VERTEX_ARRAYS;
 116
 117       set_affected_state_flags(states, prog,
 118                                ST_NEW_VS_CONSTANTS,
 119                                ST_NEW_VS_SAMPLER_VIEWS,
 120                                ST_NEW_VS_SAMPLERS,
 121                                ST_NEW_VS_IMAGES,
 122                                ST_NEW_VS_UBOS,
 123                                ST_NEW_VS_SSBOS,
 124                                ST_NEW_VS_ATOMICS);
 125       break;
 126
 127    case MESA_SHADER_TESS_CTRL:
 128       states = &(st_program(prog))->affected_states;
 129
 130       *states = ST_NEW_TCS_STATE;
 131
 132       set_affected_state_flags(states, prog,
 133                                ST_NEW_TCS_CONSTANTS,
 134                                ST_NEW_TCS_SAMPLER_VIEWS,
 135                                ST_NEW_TCS_SAMPLERS,
 136                                ST_NEW_TCS_IMAGES,
 137                                ST_NEW_TCS_UBOS,
 138                                ST_NEW_TCS_SSBOS,
 139                                ST_NEW_TCS_ATOMICS);
 140       break;
 141
 142    case MESA_SHADER_TESS_EVAL:
 143       states = &(st_program(prog))->affected_states;
 144
 145       *states = ST_NEW_TES_STATE |
 146                 ST_NEW_RASTERIZER;
 147
 148       set_affected_state_flags(states, prog,
 149                                ST_NEW_TES_CONSTANTS,
 150                                ST_NEW_TES_SAMPLER_VIEWS,
 151                                ST_NEW_TES_SAMPLERS,
 152                                ST_NEW_TES_IMAGES,
 153                                ST_NEW_TES_UBOS,
 154                                ST_NEW_TES_SSBOS,
 155                                ST_NEW_TES_ATOMICS);
 156       break;
 157
 158    case MESA_SHADER_GEOMETRY:
 159       states = &(st_program(prog))->affected_states;
 160
 161       *states = ST_NEW_GS_STATE |
 162                 ST_NEW_RASTERIZER;
 163
 164       set_affected_state_flags(states, prog,
 165                                ST_NEW_GS_CONSTANTS,
 166                                ST_NEW_GS_SAMPLER_VIEWS,
 167                                ST_NEW_GS_SAMPLERS,
 168                                ST_NEW_GS_IMAGES,
 169                                ST_NEW_GS_UBOS,
 170                                ST_NEW_GS_SSBOS,
 171                                ST_NEW_GS_ATOMICS);
 172       break;
 173
 174    case MESA_SHADER_FRAGMENT:
 175       states = &((struct st_program*)prog)->affected_states;
 176
 177       /* gl_FragCoord and glDrawPixels always use constants. */
 178       *states = ST_NEW_FS_STATE |
 179                 ST_NEW_SAMPLE_SHADING |
 180                 ST_NEW_FS_CONSTANTS;
 181
 182       set_affected_state_flags(states, prog,
 183                                ST_NEW_FS_CONSTANTS,
 184                                ST_NEW_FS_SAMPLER_VIEWS,
 185                                ST_NEW_FS_SAMPLERS,
 186                                ST_NEW_FS_IMAGES,
 187                                ST_NEW_FS_UBOS,
 188                                ST_NEW_FS_SSBOS,
 189                                ST_NEW_FS_ATOMICS);
 190       break;
 191
 192    case MESA_SHADER_COMPUTE:
 193       states = &((struct st_program*)prog)->affected_states;
 194
 195       *states = ST_NEW_CS_STATE;
 196
 197       set_affected_state_flags(states, prog,
 198                                ST_NEW_CS_CONSTANTS,
 199                                ST_NEW_CS_SAMPLER_VIEWS,
 200                                ST_NEW_CS_SAMPLERS,
 201                                ST_NEW_CS_IMAGES,
 202                                ST_NEW_CS_UBOS,
 203                                ST_NEW_CS_SSBOS,
 204                                ST_NEW_CS_ATOMICS);
 205       break;
 206
 207    default:
 208       unreachable("unhandled shader stage");
 209    }
 210 }
 211
 212
 213 /**
 214  * Delete a shader variant.  Note the caller must unlink the variant from
 215  * the linked list.
 216  */
 217 static void
 218 delete_variant(struct st_context *st, struct st_variant *v, GLenum target)
 219 {
 220    if (v->driver_shader) {
 221       if (target == GL_VERTEX_PROGRAM_ARB &&
 222           ((struct st_common_variant*)v)->key.is_draw_shader) {
 223          /* Draw shader. */
 224          draw_delete_vertex_shader(st->draw, v->driver_shader);
 225       } else if (st->has_shareable_shaders || v->st == st) {
 226          /* The shader's context matches the calling context, or we
 227           * don't care.
 228           */
 229          switch (target) {
 230          case GL_VERTEX_PROGRAM_ARB:
 231             cso_delete_vertex_shader(st->cso_context, v->driver_shader);
 232             break;
 233          case GL_TESS_CONTROL_PROGRAM_NV:
 234             cso_delete_tessctrl_shader(st->cso_context, v->driver_shader);
 235             break;
 236          case GL_TESS_EVALUATION_PROGRAM_NV:
 237             cso_delete_tesseval_shader(st->cso_context, v->driver_shader);
 238             break;
 239          case GL_GEOMETRY_PROGRAM_NV:
 240             cso_delete_geometry_shader(st->cso_context, v->driver_shader);
 241             break;
 242          case GL_FRAGMENT_PROGRAM_ARB:
 243             cso_delete_fragment_shader(st->cso_context, v->driver_shader);
 244             break;
 245          case GL_COMPUTE_PROGRAM_NV:
 246             cso_delete_compute_shader(st->cso_context, v->driver_shader);
 247             break;
 248          default:
 249             unreachable("bad shader type in delete_basic_variant");
 250          }
 251       } else {
 252          /* We can't delete a shader with a context different from the one
 253           * that created it.  Add it to the creating context's zombie list.
 254           */
 255          enum pipe_shader_type type =
 256             pipe_shader_type_from_mesa(_mesa_program_enum_to_shader_stage(target));
 257
 258          st_save_zombie_shader(v->st, type, v->driver_shader);
 259       }
 260    }
 261
 262    free(v);
 263 }
 264
 265
 266 /**
 267  * Free all basic program variants.
 268  */
 269 void
 270 st_release_variants(struct st_context *st, struct st_program *p)
 271 {
 272    struct st_variant *v;
 273
 274    for (v = p->variants; v; ) {
 275       struct st_variant *next = v->next;
 276       delete_variant(st, v, p->Base.Target);
 277       v = next;
 278    }
 279
 280    p->variants = NULL;
 281
 282    if (p->state.tokens) {
 283       ureg_free_tokens(p->state.tokens);
 284       p->state.tokens = NULL;
 285    }
 286
 287    /* Note: Any setup of ->ir.nir that has had pipe->create_*_state called on
 288     * it has resulted in the driver taking ownership of the NIR.  Those
 289     * callers should be NULLing out the nir field in any pipe_shader_state
 290     * that might have this called in order to indicate that.
 291     *
 292     * GLSL IR and ARB programs will have set gl_program->nir to the same
 293     * shader as ir->ir.nir, so it will be freed by _mesa_delete_program().
 294     */
 295 }
 296
 297 void
 298 st_finalize_nir_before_variants(struct nir_shader *nir)
 299 {
 300    NIR_PASS_V(nir, nir_opt_access);
 301
 302    NIR_PASS_V(nir, nir_split_var_copies);
 303    NIR_PASS_V(nir, nir_lower_var_copies);
 304    if (nir->options->lower_all_io_to_temps ||
 305        nir->options->lower_all_io_to_elements ||
 306        nir->info.stage == MESA_SHADER_VERTEX ||
 307        nir->info.stage == MESA_SHADER_GEOMETRY) {
 308       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
 309    } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
 310       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
 311    }
 312
 313    st_nir_assign_vs_in_locations(nir);
 314 }
 315
 316 /**
 317  * Translate ARB (asm) program to NIR
 318  */
 319 static nir_shader *
 320 st_translate_prog_to_nir(struct st_context *st, struct gl_program *prog,
 321                          gl_shader_stage stage)
 322 {
 323    struct pipe_screen *screen = st->pipe->screen;
 324    const struct gl_shader_compiler_options *options =
 325       &st->ctx->Const.ShaderCompilerOptions[stage];
 326
 327    /* Translate to NIR */
 328    nir_shader *nir = prog_to_nir(prog, options->NirOptions);
 329    NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
 330    nir_validate_shader(nir, "after st/ptn lower_regs_to_ssa");
 331
 332    NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, prog, screen);
 333    NIR_PASS_V(nir, nir_lower_system_values);
 334
 335    /* Optimise NIR */
 336    NIR_PASS_V(nir, nir_opt_constant_folding);
 337    st_nir_opts(nir);
 338    st_finalize_nir_before_variants(nir);
 339
 340    if (st->allow_st_finalize_nir_twice)
 341       st_finalize_nir(st, prog, NULL, nir, true);
 342
 343    nir_validate_shader(nir, "after st/glsl finalize_nir");
 344
 345    return nir;
 346 }
 347
 348 void
 349 st_prepare_vertex_program(struct st_program *stp)
 350 {
 351    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 352
 353    stvp->num_inputs = 0;
 354    memset(stvp->input_to_index, ~0, sizeof(stvp->input_to_index));
 355    memset(stvp->result_to_output, ~0, sizeof(stvp->result_to_output));
 356
 357    /* Determine number of inputs, the mappings between VERT_ATTRIB_x
 358     * and TGSI generic input indexes, plus input attrib semantic info.
 359     */
 360    for (unsigned attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
 361       if ((stp->Base.info.inputs_read & BITFIELD64_BIT(attr)) != 0) {
 362          stvp->input_to_index[attr] = stvp->num_inputs;
 363          stvp->index_to_input[stvp->num_inputs] = attr;
 364          stvp->num_inputs++;
 365
 366          if ((stp->Base.DualSlotInputs & BITFIELD64_BIT(attr)) != 0) {
 367             /* add placeholder for second part of a double attribute */
 368             stvp->index_to_input[stvp->num_inputs] = ST_DOUBLE_ATTRIB_PLACEHOLDER;
 369             stvp->num_inputs++;
 370          }
 371       }
 372    }
 373    /* pre-setup potentially unused edgeflag input */
 374    stvp->input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
 375    stvp->index_to_input[stvp->num_inputs] = VERT_ATTRIB_EDGEFLAG;
 376
 377    /* Compute mapping of vertex program outputs to slots. */
 378    unsigned num_outputs = 0;
 379    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 380       if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr))
 381          stvp->result_to_output[attr] = num_outputs++;
 382    }
 383    /* pre-setup potentially unused edgeflag output */
 384    stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs;
 385 }
 386
 387 void
 388 st_translate_stream_output_info(struct gl_program *prog)
 389 {
 390    struct gl_transform_feedback_info *info = prog->sh.LinkedTransformFeedback;
 391    if (!info)
 392       return;
 393
 394    /* Determine the (default) output register mapping for each output. */
 395    unsigned num_outputs = 0;
 396    ubyte output_mapping[VARYING_SLOT_TESS_MAX];
 397    memset(output_mapping, 0, sizeof(output_mapping));
 398
 399    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 400       if (prog->info.outputs_written & BITFIELD64_BIT(attr))
 401          output_mapping[attr] = num_outputs++;
 402    }
 403
 404    /* Translate stream output info. */
 405    struct pipe_stream_output_info *so_info =
 406       &((struct st_program*)prog)->state.stream_output;
 407
 408    for (unsigned i = 0; i < info->NumOutputs; i++) {
 409       so_info->output[i].register_index =
 410          output_mapping[info->Outputs[i].OutputRegister];
 411       so_info->output[i].start_component = info->Outputs[i].ComponentOffset;
 412       so_info->output[i].num_components = info->Outputs[i].NumComponents;
 413       so_info->output[i].output_buffer = info->Outputs[i].OutputBuffer;
 414       so_info->output[i].dst_offset = info->Outputs[i].DstOffset;
 415       so_info->output[i].stream = info->Outputs[i].StreamId;
 416    }
 417
 418    for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
 419       so_info->stride[i] = info->Buffers[i].Stride;
 420    }
 421    so_info->num_outputs = info->NumOutputs;
 422 }
 423
 424 /**
 425  * Translate a vertex program.
 426  */
 427 bool
 428 st_translate_vertex_program(struct st_context *st,
 429                             struct st_program *stp)
 430 {
 431    struct ureg_program *ureg;
 432    enum pipe_error error;
 433    unsigned num_outputs = 0;
 434    unsigned attr;
 435    ubyte output_semantic_name[VARYING_SLOT_MAX] = {0};
 436    ubyte output_semantic_index[VARYING_SLOT_MAX] = {0};
 437
 438    if (stp->Base.arb.IsPositionInvariant)
 439       _mesa_insert_mvp_code(st->ctx, &stp->Base);
 440
 441    st_prepare_vertex_program(stp);
 442
 443    /* ARB_vp: */
 444    if (!stp->glsl_to_tgsi) {
 445       _mesa_remove_output_reads(&stp->Base, PROGRAM_OUTPUT);
 446
 447       /* This determines which states will be updated when the assembly
 448        * shader is bound.
 449        */
 450       stp->affected_states = ST_NEW_VS_STATE |
 451                               ST_NEW_RASTERIZER |
 452                               ST_NEW_VERTEX_ARRAYS;
 453
 454       if (stp->Base.Parameters->NumParameters)
 455          stp->affected_states |= ST_NEW_VS_CONSTANTS;
 456
 457       /* Translate to NIR if preferred. */
 458       if (st->pipe->screen->get_shader_param(st->pipe->screen,
 459                                              PIPE_SHADER_VERTEX,
 460                                              PIPE_SHADER_CAP_PREFERRED_IR)) {
 461          assert(!stp->glsl_to_tgsi);
 462
 463          if (stp->Base.nir)
 464             ralloc_free(stp->Base.nir);
 465
 466          stp->state.type = PIPE_SHADER_IR_NIR;
 467          stp->Base.nir = st_translate_prog_to_nir(st, &stp->Base,
 468                                                   MESA_SHADER_VERTEX);
 469          /* For st_draw_feedback, we need to generate TGSI too if draw doesn't
 470           * use LLVM.
 471           */
 472          if (draw_has_llvm())
 473             return true;
 474       }
 475    }
 476
 477    /* Get semantic names and indices. */
 478    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 479       if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr)) {
 480          unsigned slot = num_outputs++;
 481          unsigned semantic_name, semantic_index;
 482          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
 483                                       &semantic_name, &semantic_index);
 484          output_semantic_name[slot] = semantic_name;
 485          output_semantic_index[slot] = semantic_index;
 486       }
 487    }
 488    /* pre-setup potentially unused edgeflag output */
 489    output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
 490    output_semantic_index[num_outputs] = 0;
 491
 492    ureg = ureg_create_with_screen(PIPE_SHADER_VERTEX, st->pipe->screen);
 493    if (ureg == NULL)
 494       return false;
 495
 496    if (stp->Base.info.clip_distance_array_size)
 497       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
 498                     stp->Base.info.clip_distance_array_size);
 499    if (stp->Base.info.cull_distance_array_size)
 500       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
 501                     stp->Base.info.cull_distance_array_size);
 502
 503    if (ST_DEBUG & DEBUG_MESA) {
 504       _mesa_print_program(&stp->Base);
 505       _mesa_print_program_parameters(st->ctx, &stp->Base);
 506       debug_printf("\n");
 507    }
 508
 509    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 510
 511    if (stp->glsl_to_tgsi) {
 512       error = st_translate_program(st->ctx,
 513                                    PIPE_SHADER_VERTEX,
 514                                    ureg,
 515                                    stp->glsl_to_tgsi,
 516                                    &stp->Base,
 517                                    /* inputs */
 518                                    stvp->num_inputs,
 519                                    stvp->input_to_index,
 520                                    NULL, /* inputSlotToAttr */
 521                                    NULL, /* input semantic name */
 522                                    NULL, /* input semantic index */
 523                                    NULL, /* interp mode */
 524                                    /* outputs */
 525                                    num_outputs,
 526                                    stvp->result_to_output,
 527                                    output_semantic_name,
 528                                    output_semantic_index);
 529
 530       st_translate_stream_output_info(&stp->Base);
 531
 532       free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
 533    } else
 534       error = st_translate_mesa_program(st->ctx,
 535                                         PIPE_SHADER_VERTEX,
 536                                         ureg,
 537                                         &stp->Base,
 538                                         /* inputs */
 539                                         stvp->num_inputs,
 540                                         stvp->input_to_index,
 541                                         NULL, /* input semantic name */
 542                                         NULL, /* input semantic index */
 543                                         NULL,
 544                                         /* outputs */
 545                                         num_outputs,
 546                                         stvp->result_to_output,
 547                                         output_semantic_name,
 548                                         output_semantic_index);
 549
 550    if (error) {
 551       debug_printf("%s: failed to translate Mesa program:\n", __func__);
 552       _mesa_print_program(&stp->Base);
 553       debug_assert(0);
 554       return false;
 555    }
 556
 557    stp->state.tokens = ureg_get_tokens(ureg, NULL);
 558    ureg_destroy(ureg);
 559
 560    if (stp->glsl_to_tgsi) {
 561       stp->glsl_to_tgsi = NULL;
 562       st_store_ir_in_disk_cache(st, &stp->Base, false);
 563    }
 564
 565    return stp->state.tokens != NULL;
 566 }
 567
 568 static const gl_state_index16 depth_range_state[STATE_LENGTH] =
 569    { STATE_DEPTH_RANGE };
 570
 571 static struct st_common_variant *
 572 st_create_vp_variant(struct st_context *st,
 573                      struct st_program *stvp,
 574                      const struct st_common_variant_key *key)
 575 {
 576    struct st_common_variant *vpv = CALLOC_STRUCT(st_common_variant);
 577    struct pipe_context *pipe = st->pipe;
 578    struct pipe_screen *screen = pipe->screen;
 579    struct pipe_shader_state state = {0};
 580
 581    static const gl_state_index16 point_size_state[STATE_LENGTH] =
 582       { STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED, 0 };
 583    struct gl_program_parameter_list *params = stvp->Base.Parameters;
 584
 585    vpv->key = *key;
 586
 587    state.stream_output = stvp->state.stream_output;
 588
 589    if (stvp->state.type == PIPE_SHADER_IR_NIR &&
 590        (!key->is_draw_shader || draw_has_llvm())) {
 591       bool finalize = false;
 592
 593       state.type = PIPE_SHADER_IR_NIR;
 594       state.ir.nir = nir_shader_clone(NULL, stvp->Base.nir);
 595       if (key->clamp_color) {
 596          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
 597          finalize = true;
 598       }
 599       if (key->passthrough_edgeflags) {
 600          NIR_PASS_V(state.ir.nir, nir_lower_passthrough_edgeflags);
 601          finalize = true;
 602       }
 603
 604       if (key->lower_point_size) {
 605          _mesa_add_state_reference(params, point_size_state);
 606          NIR_PASS_V(state.ir.nir, nir_lower_point_size_mov,
 607                     point_size_state);
 608          finalize = true;
 609       }
 610
 611       if (key->lower_ucp) {
 612          bool can_compact = screen->get_param(screen,
 613                                               PIPE_CAP_NIR_COMPACT_ARRAYS);
 614
 615          bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;
 616          gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH];
 617          for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
 618             if (use_eye) {
 619                clipplane_state[i][0] = STATE_CLIPPLANE;
 620                clipplane_state[i][1] = i;
 621             } else {
 622                clipplane_state[i][0] = STATE_INTERNAL;
 623                clipplane_state[i][1] = STATE_CLIP_INTERNAL;
 624                clipplane_state[i][2] = i;
 625             }
 626             _mesa_add_state_reference(params, clipplane_state[i]);
 627          }
 628
 629          NIR_PASS_V(state.ir.nir, nir_lower_clip_vs, key->lower_ucp,
 630                     true, can_compact, clipplane_state);
 631          NIR_PASS_V(state.ir.nir, nir_lower_io_to_temporaries,
 632                     nir_shader_get_entrypoint(state.ir.nir), true, false);
 633          NIR_PASS_V(state.ir.nir, nir_lower_global_vars_to_local);
 634          finalize = true;
 635       }
 636
 637       if (finalize || !st->allow_st_finalize_nir_twice) {
 638          st_finalize_nir(st, &stvp->Base, stvp->shader_program, state.ir.nir,
 639                          true);
 640
 641          /* Some of the lowering above may have introduced new varyings */
 642          nir_shader_gather_info(state.ir.nir,
 643                                 nir_shader_get_entrypoint(state.ir.nir));
 644       }
 645
 646       if (ST_DEBUG & DEBUG_PRINT_IR)
 647          nir_print_shader(state.ir.nir, stderr);
 648
 649       if (key->is_draw_shader)
 650          vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
 651       else
 652          vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
 653
 654       return vpv;
 655    }
 656
 657    state.type = PIPE_SHADER_IR_TGSI;
 658    state.tokens = tgsi_dup_tokens(stvp->state.tokens);
 659
 660    /* Emulate features. */
 661    if (key->clamp_color || key->passthrough_edgeflags) {
 662       const struct tgsi_token *tokens;
 663       unsigned flags =
 664          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
 665          (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
 666
 667       tokens = tgsi_emulate(state.tokens, flags);
 668
 669       if (tokens) {
 670          tgsi_free_tokens(state.tokens);
 671          state.tokens = tokens;
 672       } else {
 673          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
 674       }
 675    }
 676
 677    if (key->lower_depth_clamp) {
 678       unsigned depth_range_const =
 679             _mesa_add_state_reference(params, depth_range_state);
 680
 681       const struct tgsi_token *tokens;
 682       tokens = st_tgsi_lower_depth_clamp(state.tokens, depth_range_const,
 683                                          key->clip_negative_one_to_one);
 684       if (tokens != state.tokens)
 685          tgsi_free_tokens(state.tokens);
 686       state.tokens = tokens;
 687    }
 688
 689    if (ST_DEBUG & DEBUG_PRINT_IR)
 690       tgsi_dump(state.tokens, 0);
 691
 692    if (key->is_draw_shader)
 693       vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
 694    else
 695       vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
 696
 697    return vpv;
 698 }
 699
 700
 701 /**
 702  * Find/create a vertex program variant.
 703  */
 704 struct st_common_variant *
 705 st_get_vp_variant(struct st_context *st,
 706                   struct st_program *stp,
 707                   const struct st_common_variant_key *key)
 708 {
 709    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 710    struct st_common_variant *vpv;
 711
 712    /* Search for existing variant */
 713    for (vpv = st_common_variant(stp->variants); vpv;
 714         vpv = st_common_variant(vpv->base.next)) {
 715       if (memcmp(&vpv->key, key, sizeof(*key)) == 0) {
 716          break;
 717       }
 718    }
 719
 720    if (!vpv) {
 721       /* create now */
 722       vpv = st_create_vp_variant(st, stp, key);
 723       if (vpv) {
 724          vpv->base.st = key->st;
 725
 726          unsigned num_inputs = stvp->num_inputs + key->passthrough_edgeflags;
 727          for (unsigned index = 0; index < num_inputs; ++index) {
 728             unsigned attr = stvp->index_to_input[index];
 729             if (attr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
 730                continue;
 731             vpv->vert_attrib_mask |= 1u << attr;
 732          }
 733
 734          /* insert into list */
 735          vpv->base.next = stp->variants;
 736          stp->variants = &vpv->base;
 737       }
 738    }
 739
 740    return vpv;
 741 }
 742
 743
 744 /**
 745  * Translate a Mesa fragment shader into a TGSI shader.
 746  */
 747 bool
 748 st_translate_fragment_program(struct st_context *st,
 749                               struct st_program *stfp)
 750 {
 751    /* Non-GLSL programs: */
 752    if (!stfp->glsl_to_tgsi) {
 753       _mesa_remove_output_reads(&stfp->Base, PROGRAM_OUTPUT);
 754       if (st->ctx->Const.GLSLFragCoordIsSysVal)
 755          _mesa_program_fragment_position_to_sysval(&stfp->Base);
 756
 757       /* This determines which states will be updated when the assembly
 758        * shader is bound.
 759        *
 760        * fragment.position and glDrawPixels always use constants.
 761        */
 762       stfp->affected_states = ST_NEW_FS_STATE |
 763                               ST_NEW_SAMPLE_SHADING |
 764                               ST_NEW_FS_CONSTANTS;
 765
 766       if (stfp->ati_fs) {
 767          /* Just set them for ATI_fs unconditionally. */
 768          stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 769                                   ST_NEW_FS_SAMPLERS;
 770       } else {
 771          /* ARB_fp */
 772          if (stfp->Base.SamplersUsed)
 773             stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 774                                      ST_NEW_FS_SAMPLERS;
 775       }
 776
 777       /* Translate to NIR. */
 778       if (!stfp->ati_fs &&
 779           st->pipe->screen->get_shader_param(st->pipe->screen,
 780                                              PIPE_SHADER_FRAGMENT,
 781                                              PIPE_SHADER_CAP_PREFERRED_IR)) {
 782          nir_shader *nir =
 783             st_translate_prog_to_nir(st, &stfp->Base, MESA_SHADER_FRAGMENT);
 784
 785          if (stfp->Base.nir)
 786             ralloc_free(stfp->Base.nir);
 787          stfp->state.type = PIPE_SHADER_IR_NIR;
 788          stfp->Base.nir = nir;
 789          return true;
 790       }
 791    }
 792
 793    ubyte outputMapping[2 * FRAG_RESULT_MAX];
 794    ubyte inputMapping[VARYING_SLOT_MAX];
 795    ubyte inputSlotToAttr[VARYING_SLOT_MAX];
 796    ubyte interpMode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
 797    GLuint attr;
 798    GLbitfield64 inputsRead;
 799    struct ureg_program *ureg;
 800
 801    GLboolean write_all = GL_FALSE;
 802
 803    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
 804    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
 805    uint fs_num_inputs = 0;
 806
 807    ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
 808    ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
 809    uint fs_num_outputs = 0;
 810
 811    memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
 812
 813    /*
 814     * Convert Mesa program inputs to TGSI input register semantics.
 815     */
 816    inputsRead = stfp->Base.info.inputs_read;
 817    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 818       if ((inputsRead & BITFIELD64_BIT(attr)) != 0) {
 819          const GLuint slot = fs_num_inputs++;
 820
 821          inputMapping[attr] = slot;
 822          inputSlotToAttr[slot] = attr;
 823
 824          switch (attr) {
 825          case VARYING_SLOT_POS:
 826             input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
 827             input_semantic_index[slot] = 0;
 828             interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 829             break;
 830          case VARYING_SLOT_COL0:
 831             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 832             input_semantic_index[slot] = 0;
 833             interpMode[slot] = stfp->glsl_to_tgsi ?
 834                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 835             break;
 836          case VARYING_SLOT_COL1:
 837             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 838             input_semantic_index[slot] = 1;
 839             interpMode[slot] = stfp->glsl_to_tgsi ?
 840                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 841             break;
 842          case VARYING_SLOT_FOGC:
 843             input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
 844             input_semantic_index[slot] = 0;
 845             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 846             break;
 847          case VARYING_SLOT_FACE:
 848             input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
 849             input_semantic_index[slot] = 0;
 850             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 851             break;
 852          case VARYING_SLOT_PRIMITIVE_ID:
 853             input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
 854             input_semantic_index[slot] = 0;
 855             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 856             break;
 857          case VARYING_SLOT_LAYER:
 858             input_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
 859             input_semantic_index[slot] = 0;
 860             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 861             break;
 862          case VARYING_SLOT_VIEWPORT:
 863             input_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
 864             input_semantic_index[slot] = 0;
 865             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 866             break;
 867          case VARYING_SLOT_CLIP_DIST0:
 868             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
 869             input_semantic_index[slot] = 0;
 870             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 871             break;
 872          case VARYING_SLOT_CLIP_DIST1:
 873             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
 874             input_semantic_index[slot] = 1;
 875             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 876             break;
 877          case VARYING_SLOT_CULL_DIST0:
 878          case VARYING_SLOT_CULL_DIST1:
 879             /* these should have been lowered by GLSL */
 880             assert(0);
 881             break;
 882             /* In most cases, there is nothing special about these
 883              * inputs, so adopt a convention to use the generic
 884              * semantic name and the mesa VARYING_SLOT_ number as the
 885              * index.
 886              *
 887              * All that is required is that the vertex shader labels
 888              * its own outputs similarly, and that the vertex shader
 889              * generates at least every output required by the
 890              * fragment shader plus fixed-function hardware (such as
 891              * BFC).
 892              *
 893              * However, some drivers may need us to identify the PNTC and TEXi
 894              * varyings if, for example, their capability to replace them with
 895              * sprite coordinates is limited.
 896              */
 897          case VARYING_SLOT_PNTC:
 898             if (st->needs_texcoord_semantic) {
 899                input_semantic_name[slot] = TGSI_SEMANTIC_PCOORD;
 900                input_semantic_index[slot] = 0;
 901                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 902                break;
 903             }
 904             /* fall through */
 905          case VARYING_SLOT_TEX0:
 906          case VARYING_SLOT_TEX1:
 907          case VARYING_SLOT_TEX2:
 908          case VARYING_SLOT_TEX3:
 909          case VARYING_SLOT_TEX4:
 910          case VARYING_SLOT_TEX5:
 911          case VARYING_SLOT_TEX6:
 912          case VARYING_SLOT_TEX7:
 913             if (st->needs_texcoord_semantic) {
 914                input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
 915                input_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
 916                interpMode[slot] = stfp->glsl_to_tgsi ?
 917                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
 918                break;
 919             }
 920             /* fall through */
 921          case VARYING_SLOT_VAR0:
 922          default:
 923             /* Semantic indices should be zero-based because drivers may choose
 924              * to assign a fixed slot determined by that index.
 925              * This is useful because ARB_separate_shader_objects uses location
 926              * qualifiers for linkage, and if the semantic index corresponds to
 927              * these locations, linkage passes in the driver become unecessary.
 928              *
 929              * If needs_texcoord_semantic is true, no semantic indices will be
 930              * consumed for the TEXi varyings, and we can base the locations of
 931              * the user varyings on VAR0.  Otherwise, we use TEX0 as base index.
 932              */
 933             assert(attr >= VARYING_SLOT_VAR0 || attr == VARYING_SLOT_PNTC ||
 934                    (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
 935             input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
 936             input_semantic_index[slot] = st_get_generic_varying_index(st, attr);
 937             if (attr == VARYING_SLOT_PNTC)
 938                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 939             else {
 940                interpMode[slot] = stfp->glsl_to_tgsi ?
 941                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
 942             }
 943             break;
 944          }
 945       }
 946       else {
 947          inputMapping[attr] = -1;
 948       }
 949    }
 950
 951    /*
 952     * Semantics and mapping for outputs
 953     */
 954    GLbitfield64 outputsWritten = stfp->Base.info.outputs_written;
 955
 956    /* if z is written, emit that first */
 957    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
 958       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
 959       fs_output_semantic_index[fs_num_outputs] = 0;
 960       outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
 961       fs_num_outputs++;
 962       outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
 963    }
 964
 965    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
 966       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
 967       fs_output_semantic_index[fs_num_outputs] = 0;
 968       outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
 969       fs_num_outputs++;
 970       outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
 971    }
 972
 973    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
 974       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_SAMPLEMASK;
 975       fs_output_semantic_index[fs_num_outputs] = 0;
 976       outputMapping[FRAG_RESULT_SAMPLE_MASK] = fs_num_outputs;
 977       fs_num_outputs++;
 978       outputsWritten &= ~(1 << FRAG_RESULT_SAMPLE_MASK);
 979    }
 980
 981    /* handle remaining outputs (color) */
 982    for (attr = 0; attr < ARRAY_SIZE(outputMapping); attr++) {
 983       const GLbitfield64 written = attr < FRAG_RESULT_MAX ? outputsWritten :
 984          stfp->Base.SecondaryOutputsWritten;
 985       const unsigned loc = attr % FRAG_RESULT_MAX;
 986
 987       if (written & BITFIELD64_BIT(loc)) {
 988          switch (loc) {
 989          case FRAG_RESULT_DEPTH:
 990          case FRAG_RESULT_STENCIL:
 991          case FRAG_RESULT_SAMPLE_MASK:
 992             /* handled above */
 993             assert(0);
 994             break;
 995          case FRAG_RESULT_COLOR:
 996             write_all = GL_TRUE; /* fallthrough */
 997          default: {
 998             int index;
 999             assert(loc == FRAG_RESULT_COLOR ||
1000                    (FRAG_RESULT_DATA0 <= loc && loc < FRAG_RESULT_MAX));
1001
1002             index = (loc == FRAG_RESULT_COLOR) ? 0 : (loc - FRAG_RESULT_DATA0);
1003
1004             if (attr >= FRAG_RESULT_MAX) {
1005                /* Secondary color for dual source blending. */
1006                assert(index == 0);
1007                index++;
1008             }
1009
1010             fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
1011             fs_output_semantic_index[fs_num_outputs] = index;
1012             outputMapping[attr] = fs_num_outputs;
1013             break;
1014          }
1015          }
1016
1017          fs_num_outputs++;
1018       }
1019    }
1020
1021    ureg = ureg_create_with_screen(PIPE_SHADER_FRAGMENT, st->pipe->screen);
1022    if (ureg == NULL)
1023       return false;
1024
1025    if (ST_DEBUG & DEBUG_MESA) {
1026       _mesa_print_program(&stfp->Base);
1027       _mesa_print_program_parameters(st->ctx, &stfp->Base);
1028       debug_printf("\n");
1029    }
1030    if (write_all == GL_TRUE)
1031       ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1);
1032
1033    if (stfp->Base.info.fs.depth_layout != FRAG_DEPTH_LAYOUT_NONE) {
1034       switch (stfp->Base.info.fs.depth_layout) {
1035       case FRAG_DEPTH_LAYOUT_ANY:
1036          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1037                        TGSI_FS_DEPTH_LAYOUT_ANY);
1038          break;
1039       case FRAG_DEPTH_LAYOUT_GREATER:
1040          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1041                        TGSI_FS_DEPTH_LAYOUT_GREATER);
1042          break;
1043       case FRAG_DEPTH_LAYOUT_LESS:
1044          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1045                        TGSI_FS_DEPTH_LAYOUT_LESS);
1046          break;
1047       case FRAG_DEPTH_LAYOUT_UNCHANGED:
1048          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1049                        TGSI_FS_DEPTH_LAYOUT_UNCHANGED);
1050          break;
1051       default:
1052          assert(0);
1053       }
1054    }
1055
1056    if (stfp->glsl_to_tgsi) {
1057       st_translate_program(st->ctx,
1058                            PIPE_SHADER_FRAGMENT,
1059                            ureg,
1060                            stfp->glsl_to_tgsi,
1061                            &stfp->Base,
1062                            /* inputs */
1063                            fs_num_inputs,
1064                            inputMapping,
1065                            inputSlotToAttr,
1066                            input_semantic_name,
1067                            input_semantic_index,
1068                            interpMode,
1069                            /* outputs */
1070                            fs_num_outputs,
1071                            outputMapping,
1072                            fs_output_semantic_name,
1073                            fs_output_semantic_index);
1074
1075       free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
1076    } else if (stfp->ati_fs)
1077       st_translate_atifs_program(ureg,
1078                                  stfp->ati_fs,
1079                                  &stfp->Base,
1080                                  /* inputs */
1081                                  fs_num_inputs,
1082                                  inputMapping,
1083                                  input_semantic_name,
1084                                  input_semantic_index,
1085                                  interpMode,
1086                                  /* outputs */
1087                                  fs_num_outputs,
1088                                  outputMapping,
1089                                  fs_output_semantic_name,
1090                                  fs_output_semantic_index);
1091    else
1092       st_translate_mesa_program(st->ctx,
1093                                 PIPE_SHADER_FRAGMENT,
1094                                 ureg,
1095                                 &stfp->Base,
1096                                 /* inputs */
1097                                 fs_num_inputs,
1098                                 inputMapping,
1099                                 input_semantic_name,
1100                                 input_semantic_index,
1101                                 interpMode,
1102                                 /* outputs */
1103                                 fs_num_outputs,
1104                                 outputMapping,
1105                                 fs_output_semantic_name,
1106                                 fs_output_semantic_index);
1107
1108    stfp->state.tokens = ureg_get_tokens(ureg, NULL);
1109    ureg_destroy(ureg);
1110
1111    if (stfp->glsl_to_tgsi) {
1112       stfp->glsl_to_tgsi = NULL;
1113       st_store_ir_in_disk_cache(st, &stfp->Base, false);
1114    }
1115
1116    return stfp->state.tokens != NULL;
1117 }
1118
1119 static struct st_fp_variant *
1120 st_create_fp_variant(struct st_context *st,
1121                      struct st_program *stfp,
1122                      const struct st_fp_variant_key *key)
1123 {
1124    struct pipe_context *pipe = st->pipe;
1125    struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
1126    struct pipe_shader_state state = {0};
1127    struct gl_program_parameter_list *params = stfp->Base.Parameters;
1128    static const gl_state_index16 texcoord_state[STATE_LENGTH] =
1129       { STATE_INTERNAL, STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
1130    static const gl_state_index16 scale_state[STATE_LENGTH] =
1131       { STATE_INTERNAL, STATE_PT_SCALE };
1132    static const gl_state_index16 bias_state[STATE_LENGTH] =
1133       { STATE_INTERNAL, STATE_PT_BIAS };
1134    static const gl_state_index16 alpha_ref_state[STATE_LENGTH] =
1135       { STATE_INTERNAL, STATE_ALPHA_REF };
1136
1137    if (!variant)
1138       return NULL;
1139
1140    if (stfp->state.type == PIPE_SHADER_IR_NIR) {
1141       bool finalize = false;
1142
1143       state.type = PIPE_SHADER_IR_NIR;
1144       state.ir.nir = nir_shader_clone(NULL, stfp->Base.nir);
1145
1146       if (key->clamp_color) {
1147          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1148          finalize = true;
1149       }
1150
1151       if (key->lower_flatshade) {
1152          NIR_PASS_V(state.ir.nir, nir_lower_flatshade);
1153          finalize = true;
1154       }
1155
1156       if (key->lower_alpha_func != COMPARE_FUNC_NEVER) {
1157          _mesa_add_state_reference(params, alpha_ref_state);
1158          NIR_PASS_V(state.ir.nir, nir_lower_alpha_test, key->lower_alpha_func,
1159                     false, alpha_ref_state);
1160          finalize = true;
1161       }
1162
1163       if (key->lower_two_sided_color) {
1164          NIR_PASS_V(state.ir.nir, nir_lower_two_sided_color);
1165          finalize = true;
1166       }
1167
1168       if (key->persample_shading) {
1169           nir_shader *shader = state.ir.nir;
1170           nir_foreach_variable(var, &shader->inputs)
1171              var->data.sample = true;
1172           finalize = true;
1173       }
1174
1175       assert(!(key->bitmap && key->drawpixels));
1176
1177       /* glBitmap */
1178       if (key->bitmap) {
1179          nir_lower_bitmap_options options = {0};
1180
1181          variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1182          options.sampler = variant->bitmap_sampler;
1183          options.swizzle_xxxx = st->bitmap.tex_format == PIPE_FORMAT_R8_UNORM;
1184
1185          NIR_PASS_V(state.ir.nir, nir_lower_bitmap, &options);
1186          finalize = true;
1187       }
1188
1189       /* glDrawPixels (color only) */
1190       if (key->drawpixels) {
1191          nir_lower_drawpixels_options options = {{0}};
1192          unsigned samplers_used = stfp->Base.SamplersUsed;
1193
1194          /* Find the first unused slot. */
1195          variant->drawpix_sampler = ffs(~samplers_used) - 1;
1196          options.drawpix_sampler = variant->drawpix_sampler;
1197          samplers_used |= (1 << variant->drawpix_sampler);
1198
1199          options.pixel_maps = key->pixelMaps;
1200          if (key->pixelMaps) {
1201             variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1202             options.pixelmap_sampler = variant->pixelmap_sampler;
1203          }
1204
1205          options.scale_and_bias = key->scaleAndBias;
1206          if (key->scaleAndBias) {
1207             _mesa_add_state_reference(params, scale_state);
1208             memcpy(options.scale_state_tokens, scale_state,
1209                    sizeof(options.scale_state_tokens));
1210             _mesa_add_state_reference(params, bias_state);
1211             memcpy(options.bias_state_tokens, bias_state,
1212                    sizeof(options.bias_state_tokens));
1213          }
1214
1215          _mesa_add_state_reference(params, texcoord_state);
1216          memcpy(options.texcoord_state_tokens, texcoord_state,
1217                 sizeof(options.texcoord_state_tokens));
1218
1219          NIR_PASS_V(state.ir.nir, nir_lower_drawpixels, &options);
1220          finalize = true;
1221       }
1222
1223       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1224                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1225                    key->external.lower_ayuv || key->external.lower_xyuv)) {
1226          nir_lower_tex_options options = {0};
1227          options.lower_y_uv_external = key->external.lower_nv12;
1228          options.lower_y_u_v_external = key->external.lower_iyuv;
1229          options.lower_xy_uxvx_external = key->external.lower_xy_uxvx;
1230          options.lower_yx_xuxv_external = key->external.lower_yx_xuxv;
1231          options.lower_ayuv_external = key->external.lower_ayuv;
1232          options.lower_xyuv_external = key->external.lower_xyuv;
1233          NIR_PASS_V(state.ir.nir, nir_lower_tex, &options);
1234          finalize = true;
1235       }
1236
1237       if (finalize || !st->allow_st_finalize_nir_twice) {
1238          st_finalize_nir(st, &stfp->Base, stfp->shader_program, state.ir.nir,
1239                          false);
1240       }
1241
1242       /* This pass needs to happen *after* nir_lower_sampler */
1243       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1244                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1245          NIR_PASS_V(state.ir.nir, st_nir_lower_tex_src_plane,
1246                     ~stfp->Base.SamplersUsed,
1247                     key->external.lower_nv12 || key->external.lower_xy_uxvx ||
1248                        key->external.lower_yx_xuxv,
1249                     key->external.lower_iyuv);
1250          finalize = true;
1251       }
1252
1253       if (finalize || !st->allow_st_finalize_nir_twice) {
1254          /* Some of the lowering above may have introduced new varyings */
1255          nir_shader_gather_info(state.ir.nir,
1256                                 nir_shader_get_entrypoint(state.ir.nir));
1257
1258          struct pipe_screen *screen = pipe->screen;
1259          if (screen->finalize_nir)
1260             screen->finalize_nir(screen, state.ir.nir, false);
1261       }
1262
1263       if (ST_DEBUG & DEBUG_PRINT_IR)
1264          nir_print_shader(state.ir.nir, stderr);
1265
1266       variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1267       variant->key = *key;
1268
1269       return variant;
1270    }
1271
1272    state.tokens = stfp->state.tokens;
1273
1274    assert(!(key->bitmap && key->drawpixels));
1275
1276    /* Fix texture targets and add fog for ATI_fs */
1277    if (stfp->ati_fs) {
1278       const struct tgsi_token *tokens = st_fixup_atifs(state.tokens, key);
1279
1280       if (tokens)
1281          state.tokens = tokens;
1282       else
1283          fprintf(stderr, "mesa: cannot post-process ATI_fs\n");
1284    }
1285
1286    /* Emulate features. */
1287    if (key->clamp_color || key->persample_shading) {
1288       const struct tgsi_token *tokens;
1289       unsigned flags =
1290          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
1291          (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
1292
1293       tokens = tgsi_emulate(state.tokens, flags);
1294
1295       if (tokens) {
1296          if (state.tokens != stfp->state.tokens)
1297             tgsi_free_tokens(state.tokens);
1298          state.tokens = tokens;
1299       } else
1300          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
1301    }
1302
1303    /* glBitmap */
1304    if (key->bitmap) {
1305       const struct tgsi_token *tokens;
1306
1307       variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1308
1309       tokens = st_get_bitmap_shader(state.tokens,
1310                                     st->internal_target,
1311                                     variant->bitmap_sampler,
1312                                     st->needs_texcoord_semantic,
1313                                     st->bitmap.tex_format ==
1314                                     PIPE_FORMAT_R8_UNORM);
1315
1316       if (tokens) {
1317          if (state.tokens != stfp->state.tokens)
1318             tgsi_free_tokens(state.tokens);
1319          state.tokens = tokens;
1320       } else
1321          fprintf(stderr, "mesa: cannot create a shader for glBitmap\n");
1322    }
1323
1324    /* glDrawPixels (color only) */
1325    if (key->drawpixels) {
1326       const struct tgsi_token *tokens;
1327       unsigned scale_const = 0, bias_const = 0, texcoord_const = 0;
1328
1329       /* Find the first unused slot. */
1330       variant->drawpix_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1331
1332       if (key->pixelMaps) {
1333          unsigned samplers_used = stfp->Base.SamplersUsed |
1334                                   (1 << variant->drawpix_sampler);
1335
1336          variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1337       }
1338
1339       if (key->scaleAndBias) {
1340          scale_const = _mesa_add_state_reference(params, scale_state);
1341          bias_const = _mesa_add_state_reference(params, bias_state);
1342       }
1343
1344       texcoord_const = _mesa_add_state_reference(params, texcoord_state);
1345
1346       tokens = st_get_drawpix_shader(state.tokens,
1347                                      st->needs_texcoord_semantic,
1348                                      key->scaleAndBias, scale_const,
1349                                      bias_const, key->pixelMaps,
1350                                      variant->drawpix_sampler,
1351                                      variant->pixelmap_sampler,
1352                                      texcoord_const, st->internal_target);
1353
1354       if (tokens) {
1355          if (state.tokens != stfp->state.tokens)
1356             tgsi_free_tokens(state.tokens);
1357          state.tokens = tokens;
1358       } else
1359          fprintf(stderr, "mesa: cannot create a shader for glDrawPixels\n");
1360    }
1361
1362    if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1363                 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1364       const struct tgsi_token *tokens;
1365
1366       /* samplers inserted would conflict, but this should be unpossible: */
1367       assert(!(key->bitmap || key->drawpixels));
1368
1369       tokens = st_tgsi_lower_yuv(state.tokens,
1370                                  ~stfp->Base.SamplersUsed,
1371                                  key->external.lower_nv12 ||
1372                                     key->external.lower_xy_uxvx ||
1373                                     key->external.lower_yx_xuxv,
1374                                  key->external.lower_iyuv);
1375       if (tokens) {
1376          if (state.tokens != stfp->state.tokens)
1377             tgsi_free_tokens(state.tokens);
1378          state.tokens = tokens;
1379       } else {
1380          fprintf(stderr, "mesa: cannot create a shader for samplerExternalOES\n");
1381       }
1382    }
1383
1384    if (key->lower_depth_clamp) {
1385       unsigned depth_range_const = _mesa_add_state_reference(params, depth_range_state);
1386
1387       const struct tgsi_token *tokens;
1388       tokens = st_tgsi_lower_depth_clamp_fs(state.tokens, depth_range_const);
1389       if (state.tokens != stfp->state.tokens)
1390          tgsi_free_tokens(state.tokens);
1391       state.tokens = tokens;
1392    }
1393
1394    if (ST_DEBUG & DEBUG_PRINT_IR)
1395       tgsi_dump(state.tokens, 0);
1396
1397    /* fill in variant */
1398    variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1399    variant->key = *key;
1400
1401    if (state.tokens != stfp->state.tokens)
1402       tgsi_free_tokens(state.tokens);
1403    return variant;
1404 }
1405
1406 /**
1407  * Translate fragment program if needed.
1408  */
1409 struct st_fp_variant *
1410 st_get_fp_variant(struct st_context *st,
1411                   struct st_program *stfp,
1412                   const struct st_fp_variant_key *key)
1413 {
1414    struct st_fp_variant *fpv;
1415
1416    /* Search for existing variant */
1417    for (fpv = st_fp_variant(stfp->variants); fpv;
1418         fpv = st_fp_variant(fpv->base.next)) {
1419       if (memcmp(&fpv->key, key, sizeof(*key)) == 0) {
1420          break;
1421       }
1422    }
1423
1424    if (!fpv) {
1425       /* create new */
1426       fpv = st_create_fp_variant(st, stfp, key);
1427       if (fpv) {
1428          fpv->base.st = key->st;
1429
1430          if (key->bitmap || key->drawpixels) {
1431             /* Regular variants should always come before the
1432              * bitmap & drawpixels variants, (unless there
1433              * are no regular variants) so that
1434              * st_update_fp can take a fast path when
1435              * shader_has_one_variant is set.
1436              */
1437             if (!stfp->variants) {
1438                stfp->variants = &fpv->base;
1439             } else {
1440                /* insert into list after the first one */
1441                fpv->base.next = stfp->variants->next;
1442                stfp->variants->next = &fpv->base;
1443             }
1444          } else {
1445             /* insert into list */
1446             fpv->base.next = stfp->variants;
1447             stfp->variants = &fpv->base;
1448          }
1449       }
1450    }
1451
1452    return fpv;
1453 }
1454
1455 /**
1456  * Translate a program. This is common code for geometry and tessellation
1457  * shaders.
1458  */
1459 bool
1460 st_translate_common_program(struct st_context *st,
1461                             struct st_program *stp)
1462 {
1463    struct gl_program *prog = &stp->Base;
1464    enum pipe_shader_type stage =
1465       pipe_shader_type_from_mesa(stp->Base.info.stage);
1466    struct ureg_program *ureg = ureg_create_with_screen(stage, st->pipe->screen);
1467
1468    if (ureg == NULL)
1469       return false;
1470
1471    switch (stage) {
1472    case PIPE_SHADER_TESS_CTRL:
1473       ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT,
1474                     stp->Base.info.tess.tcs_vertices_out);
1475       break;
1476
1477    case PIPE_SHADER_TESS_EVAL:
1478       if (stp->Base.info.tess.primitive_mode == GL_ISOLINES)
1479          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, GL_LINES);
1480       else
1481          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE,
1482                        stp->Base.info.tess.primitive_mode);
1483
1484       STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL);
1485       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 ==
1486                     PIPE_TESS_SPACING_FRACTIONAL_ODD);
1487       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 ==
1488                     PIPE_TESS_SPACING_FRACTIONAL_EVEN);
1489
1490       ureg_property(ureg, TGSI_PROPERTY_TES_SPACING,
1491                     (stp->Base.info.tess.spacing + 1) % 3);
1492
1493       ureg_property(ureg, TGSI_PROPERTY_TES_VERTEX_ORDER_CW,
1494                     !stp->Base.info.tess.ccw);
1495       ureg_property(ureg, TGSI_PROPERTY_TES_POINT_MODE,
1496                     stp->Base.info.tess.point_mode);
1497       break;
1498
1499    case PIPE_SHADER_GEOMETRY:
1500       ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM,
1501                     stp->Base.info.gs.input_primitive);
1502       ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM,
1503                     stp->Base.info.gs.output_primitive);
1504       ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
1505                     stp->Base.info.gs.vertices_out);
1506       ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS,
1507                     stp->Base.info.gs.invocations);
1508       break;
1509
1510    default:
1511       break;
1512    }
1513
1514    ubyte inputSlotToAttr[VARYING_SLOT_TESS_MAX];
1515    ubyte inputMapping[VARYING_SLOT_TESS_MAX];
1516    ubyte outputMapping[VARYING_SLOT_TESS_MAX];
1517    GLuint attr;
1518
1519    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
1520    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
1521    uint num_inputs = 0;
1522
1523    ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
1524    ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
1525    uint num_outputs = 0;
1526
1527    GLint i;
1528
1529    memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr));
1530    memset(inputMapping, 0, sizeof(inputMapping));
1531    memset(outputMapping, 0, sizeof(outputMapping));
1532    memset(&stp->state, 0, sizeof(stp->state));
1533
1534    if (prog->info.clip_distance_array_size)
1535       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
1536                     prog->info.clip_distance_array_size);
1537    if (prog->info.cull_distance_array_size)
1538       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
1539                     prog->info.cull_distance_array_size);
1540
1541    /*
1542     * Convert Mesa program inputs to TGSI input register semantics.
1543     */
1544    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1545       if ((prog->info.inputs_read & BITFIELD64_BIT(attr)) == 0)
1546          continue;
1547
1548       unsigned slot = num_inputs++;
1549
1550       inputMapping[attr] = slot;
1551       inputSlotToAttr[slot] = attr;
1552
1553       unsigned semantic_name, semantic_index;
1554       tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1555                                    &semantic_name, &semantic_index);
1556       input_semantic_name[slot] = semantic_name;
1557       input_semantic_index[slot] = semantic_index;
1558    }
1559
1560    /* Also add patch inputs. */
1561    for (attr = 0; attr < 32; attr++) {
1562       if (prog->info.patch_inputs_read & (1u << attr)) {
1563          GLuint slot = num_inputs++;
1564          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1565
1566          inputMapping[patch_attr] = slot;
1567          inputSlotToAttr[slot] = patch_attr;
1568          input_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1569          input_semantic_index[slot] = attr;
1570       }
1571    }
1572
1573    /* initialize output semantics to defaults */
1574    for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
1575       output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
1576       output_semantic_index[i] = 0;
1577    }
1578
1579    /*
1580     * Determine number of outputs, the (default) output register
1581     * mapping and the semantic information for each output.
1582     */
1583    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1584       if (prog->info.outputs_written & BITFIELD64_BIT(attr)) {
1585          GLuint slot = num_outputs++;
1586
1587          outputMapping[attr] = slot;
1588
1589          unsigned semantic_name, semantic_index;
1590          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1591                                       &semantic_name, &semantic_index);
1592          output_semantic_name[slot] = semantic_name;
1593          output_semantic_index[slot] = semantic_index;
1594       }
1595    }
1596
1597    /* Also add patch outputs. */
1598    for (attr = 0; attr < 32; attr++) {
1599       if (prog->info.patch_outputs_written & (1u << attr)) {
1600          GLuint slot = num_outputs++;
1601          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1602
1603          outputMapping[patch_attr] = slot;
1604          output_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1605          output_semantic_index[slot] = attr;
1606       }
1607    }
1608
1609    st_translate_program(st->ctx,
1610                         stage,
1611                         ureg,
1612                         stp->glsl_to_tgsi,
1613                         prog,
1614                         /* inputs */
1615                         num_inputs,
1616                         inputMapping,
1617                         inputSlotToAttr,
1618                         input_semantic_name,
1619                         input_semantic_index,
1620                         NULL,
1621                         /* outputs */
1622                         num_outputs,
1623                         outputMapping,
1624                         output_semantic_name,
1625                         output_semantic_index);
1626
1627    stp->state.tokens = ureg_get_tokens(ureg, NULL);
1628
1629    ureg_destroy(ureg);
1630
1631    st_translate_stream_output_info(prog);
1632
1633    st_store_ir_in_disk_cache(st, prog, false);
1634
1635    if (ST_DEBUG & DEBUG_PRINT_IR && ST_DEBUG & DEBUG_MESA)
1636       _mesa_print_program(prog);
1637
1638    free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
1639    stp->glsl_to_tgsi = NULL;
1640    return true;
1641 }
1642
1643
1644 /**
1645  * Get/create a basic program variant.
1646  */
1647 struct st_variant *
1648 st_get_common_variant(struct st_context *st,
1649                       struct st_program *prog,
1650                       const struct st_common_variant_key *key)
1651 {
1652    struct pipe_context *pipe = st->pipe;
1653    struct st_variant *v;
1654    struct pipe_shader_state state = {0};
1655
1656    /* Search for existing variant */
1657    for (v = prog->variants; v; v = v->next) {
1658       if (memcmp(&st_common_variant(v)->key, key, sizeof(*key)) == 0)
1659          break;
1660    }
1661
1662    if (!v) {
1663       /* create new */
1664       v = (struct st_variant*)CALLOC_STRUCT(st_common_variant);
1665       if (v) {
1666          if (prog->state.type == PIPE_SHADER_IR_NIR) {
1667             bool finalize = false;
1668
1669             state.type = PIPE_SHADER_IR_NIR;
1670             state.ir.nir = nir_shader_clone(NULL, prog->Base.nir);
1671
1672             if (key->clamp_color) {
1673                NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1674                finalize = true;
1675             }
1676
1677             state.stream_output = prog->state.stream_output;
1678
1679             if (finalize || !st->allow_st_finalize_nir_twice) {
1680                st_finalize_nir(st, &prog->Base, prog->shader_program,
1681                                state.ir.nir, true);
1682             }
1683
1684             if (ST_DEBUG & DEBUG_PRINT_IR)
1685                nir_print_shader(state.ir.nir, stderr);
1686          } else {
1687             if (key->lower_depth_clamp) {
1688                struct gl_program_parameter_list *params = prog->Base.Parameters;
1689
1690                unsigned depth_range_const =
1691                      _mesa_add_state_reference(params, depth_range_state);
1692
1693                const struct tgsi_token *tokens;
1694                tokens =
1695                      st_tgsi_lower_depth_clamp(prog->state.tokens,
1696                                                depth_range_const,
1697                                                key->clip_negative_one_to_one);
1698
1699                if (tokens != prog->state.tokens)
1700                   tgsi_free_tokens(prog->state.tokens);
1701
1702                prog->state.tokens = tokens;
1703             }
1704             state = prog->state;
1705
1706             if (ST_DEBUG & DEBUG_PRINT_IR)
1707                tgsi_dump(state.tokens, 0);
1708          }
1709          /* fill in new variant */
1710          switch (prog->Base.info.stage) {
1711          case MESA_SHADER_TESS_CTRL:
1712             v->driver_shader = pipe->create_tcs_state(pipe, &state);
1713             break;
1714          case MESA_SHADER_TESS_EVAL:
1715             v->driver_shader = pipe->create_tes_state(pipe, &state);
1716             break;
1717          case MESA_SHADER_GEOMETRY:
1718             v->driver_shader = pipe->create_gs_state(pipe, &state);
1719             break;
1720          case MESA_SHADER_COMPUTE: {
1721             struct pipe_compute_state cs = {0};
1722             cs.ir_type = state.type;
1723             cs.req_local_mem = prog->Base.info.cs.shared_size;
1724
1725             if (state.type == PIPE_SHADER_IR_NIR)
1726                cs.prog = state.ir.nir;
1727             else
1728                cs.prog = state.tokens;
1729
1730             v->driver_shader = pipe->create_compute_state(pipe, &cs);
1731             break;
1732          }
1733          default:
1734             assert(!"unhandled shader type");
1735             free(v);
1736             return NULL;
1737          }
1738
1739          st_common_variant(v)->key = *key;
1740          v->st = key->st;
1741
1742          /* insert into list */
1743          v->next = prog->variants;
1744          prog->variants = v;
1745       }
1746    }
1747
1748    return v;
1749 }
1750
1751
1752 /**
1753  * Vert/Geom/Frag programs have per-context variants.  Free all the
1754  * variants attached to the given program which match the given context.
1755  */
1756 static void
1757 destroy_program_variants(struct st_context *st, struct gl_program *target)
1758 {
1759    if (!target || target == &_mesa_DummyProgram)
1760       return;
1761
1762    struct st_program *p = st_program(target);
1763    struct st_variant *v, **prevPtr = &p->variants;
1764
1765    for (v = p->variants; v; ) {
1766       struct st_variant *next = v->next;
1767       if (v->st == st) {
1768          /* unlink from list */
1769          *prevPtr = next;
1770          /* destroy this variant */
1771          delete_variant(st, v, target->Target);
1772       }
1773       else {
1774          prevPtr = &v->next;
1775       }
1776       v = next;
1777    }
1778 }
1779
1780
1781 /**
1782  * Callback for _mesa_HashWalk.  Free all the shader's program variants
1783  * which match the given context.
1784  */
1785 static void
1786 destroy_shader_program_variants_cb(GLuint key, void *data, void *userData)
1787 {
1788    struct st_context *st = (struct st_context *) userData;
1789    struct gl_shader *shader = (struct gl_shader *) data;
1790
1791    switch (shader->Type) {
1792    case GL_SHADER_PROGRAM_MESA:
1793       {
1794          struct gl_shader_program *shProg = (struct gl_shader_program *) data;
1795          GLuint i;
1796
1797          for (i = 0; i < ARRAY_SIZE(shProg->_LinkedShaders); i++) {
1798             if (shProg->_LinkedShaders[i])
1799                destroy_program_variants(st, shProg->_LinkedShaders[i]->Program);
1800          }
1801       }
1802       break;
1803    case GL_VERTEX_SHADER:
1804    case GL_FRAGMENT_SHADER:
1805    case GL_GEOMETRY_SHADER:
1806    case GL_TESS_CONTROL_SHADER:
1807    case GL_TESS_EVALUATION_SHADER:
1808    case GL_COMPUTE_SHADER:
1809       break;
1810    default:
1811       assert(0);
1812    }
1813 }
1814
1815
1816 /**
1817  * Callback for _mesa_HashWalk.  Free all the program variants which match
1818  * the given context.
1819  */
1820 static void
1821 destroy_program_variants_cb(GLuint key, void *data, void *userData)
1822 {
1823    struct st_context *st = (struct st_context *) userData;
1824    struct gl_program *program = (struct gl_program *) data;
1825    destroy_program_variants(st, program);
1826 }
1827
1828
1829 /**
1830  * Walk over all shaders and programs to delete any variants which
1831  * belong to the given context.
1832  * This is called during context tear-down.
1833  */
1834 void
1835 st_destroy_program_variants(struct st_context *st)
1836 {
1837    /* If shaders can be shared with other contexts, the last context will
1838     * call DeleteProgram on all shaders, releasing everything.
1839     */
1840    if (st->has_shareable_shaders)
1841       return;
1842
1843    /* ARB vert/frag program */
1844    _mesa_HashWalk(st->ctx->Shared->Programs,
1845                   destroy_program_variants_cb, st);
1846
1847    /* GLSL vert/frag/geom shaders */
1848    _mesa_HashWalk(st->ctx->Shared->ShaderObjects,
1849                   destroy_shader_program_variants_cb, st);
1850 }
1851
1852
1853 /**
1854  * Compile one shader variant.
1855  */
1856 static void
1857 st_precompile_shader_variant(struct st_context *st,
1858                              struct gl_program *prog)
1859 {
1860    switch (prog->Target) {
1861    case GL_VERTEX_PROGRAM_ARB: {
1862       struct st_program *p = (struct st_program *)prog;
1863       struct st_common_variant_key key;
1864
1865       memset(&key, 0, sizeof(key));
1866
1867       key.st = st->has_shareable_shaders ? NULL : st;
1868       st_get_vp_variant(st, p, &key);
1869       break;
1870    }
1871
1872    case GL_FRAGMENT_PROGRAM_ARB: {
1873       struct st_program *p = (struct st_program *)prog;
1874       struct st_fp_variant_key key;
1875
1876       memset(&key, 0, sizeof(key));
1877
1878       key.st = st->has_shareable_shaders ? NULL : st;
1879       st_get_fp_variant(st, p, &key);
1880       break;
1881    }
1882
1883    case GL_TESS_CONTROL_PROGRAM_NV:
1884    case GL_TESS_EVALUATION_PROGRAM_NV:
1885    case GL_GEOMETRY_PROGRAM_NV:
1886    case GL_COMPUTE_PROGRAM_NV: {
1887       struct st_program *p = st_program(prog);
1888       struct st_common_variant_key key;
1889
1890       memset(&key, 0, sizeof(key));
1891
1892       key.st = st->has_shareable_shaders ? NULL : st;
1893       st_get_common_variant(st, p, &key);
1894       break;
1895    }
1896
1897    default:
1898       assert(0);
1899    }
1900 }
1901
1902 void
1903 st_finalize_program(struct st_context *st, struct gl_program *prog)
1904 {
1905    if (st->current_program[prog->info.stage] == prog) {
1906       if (prog->info.stage == MESA_SHADER_VERTEX)
1907          st->dirty |= ST_NEW_VERTEX_PROGRAM(st, (struct st_program *)prog);
1908       else
1909          st->dirty |= ((struct st_program *)prog)->affected_states;
1910    }
1911
1912    if (prog->nir)
1913       nir_sweep(prog->nir);
1914
1915    /* Create Gallium shaders now instead of on demand. */
1916    if (ST_DEBUG & DEBUG_PRECOMPILE ||
1917        st->shader_has_one_variant[prog->info.stage])
1918       st_precompile_shader_variant(st, prog);
1919 }