src/mesa/state_tracker/st_program.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   *   Brian Paul
  31   */
  32
  33
  34 #include "main/errors.h"
  35
  36 #include "main/hash.h"
  37 #include "main/mtypes.h"
  38 #include "program/prog_parameter.h"
  39 #include "program/prog_print.h"
  40 #include "program/prog_to_nir.h"
  41 #include "program/programopt.h"
  42
  43 #include "compiler/nir/nir.h"
  44 #include "compiler/nir/nir_serialize.h"
  45 #include "draw/draw_context.h"
  46
  47 #include "pipe/p_context.h"
  48 #include "pipe/p_defines.h"
  49 #include "pipe/p_shader_tokens.h"
  50 #include "draw/draw_context.h"
  51 #include "tgsi/tgsi_dump.h"
  52 #include "tgsi/tgsi_emulate.h"
  53 #include "tgsi/tgsi_parse.h"
  54 #include "tgsi/tgsi_ureg.h"
  55
  56 #include "util/u_memory.h"
  57
  58 #include "st_debug.h"
  59 #include "st_cb_bitmap.h"
  60 #include "st_cb_drawpixels.h"
  61 #include "st_context.h"
  62 #include "st_tgsi_lower_depth_clamp.h"
  63 #include "st_tgsi_lower_yuv.h"
  64 #include "st_program.h"
  65 #include "st_mesa_to_tgsi.h"
  66 #include "st_atifs_to_tgsi.h"
  67 #include "st_nir.h"
  68 #include "st_shader_cache.h"
  69 #include "st_util.h"
  70 #include "cso_cache/cso_context.h"
  71
  72
  73 static void
  74 destroy_program_variants(struct st_context *st, struct gl_program *target);
  75
  76 static void
  77 set_affected_state_flags(uint64_t *states,
  78                          struct gl_program *prog,
  79                          uint64_t new_constants,
  80                          uint64_t new_sampler_views,
  81                          uint64_t new_samplers,
  82                          uint64_t new_images,
  83                          uint64_t new_ubos,
  84                          uint64_t new_ssbos,
  85                          uint64_t new_atomics)
  86 {
  87    if (prog->Parameters->NumParameters)
  88       *states |= new_constants;
  89
  90    if (prog->info.num_textures)
  91       *states |= new_sampler_views | new_samplers;
  92
  93    if (prog->info.num_images)
  94       *states |= new_images;
  95
  96    if (prog->info.num_ubos)
  97       *states |= new_ubos;
  98
  99    if (prog->info.num_ssbos)
 100       *states |= new_ssbos;
 101
 102    if (prog->info.num_abos)
 103       *states |= new_atomics;
 104 }
 105
 106 /**
 107  * This determines which states will be updated when the shader is bound.
 108  */
 109 void
 110 st_set_prog_affected_state_flags(struct gl_program *prog)
 111 {
 112    uint64_t *states;
 113
 114    switch (prog->info.stage) {
 115    case MESA_SHADER_VERTEX:
 116       states = &((struct st_program*)prog)->affected_states;
 117
 118       *states = ST_NEW_VS_STATE |
 119                 ST_NEW_RASTERIZER |
 120                 ST_NEW_VERTEX_ARRAYS;
 121
 122       set_affected_state_flags(states, prog,
 123                                ST_NEW_VS_CONSTANTS,
 124                                ST_NEW_VS_SAMPLER_VIEWS,
 125                                ST_NEW_VS_SAMPLERS,
 126                                ST_NEW_VS_IMAGES,
 127                                ST_NEW_VS_UBOS,
 128                                ST_NEW_VS_SSBOS,
 129                                ST_NEW_VS_ATOMICS);
 130       break;
 131
 132    case MESA_SHADER_TESS_CTRL:
 133       states = &(st_program(prog))->affected_states;
 134
 135       *states = ST_NEW_TCS_STATE;
 136
 137       set_affected_state_flags(states, prog,
 138                                ST_NEW_TCS_CONSTANTS,
 139                                ST_NEW_TCS_SAMPLER_VIEWS,
 140                                ST_NEW_TCS_SAMPLERS,
 141                                ST_NEW_TCS_IMAGES,
 142                                ST_NEW_TCS_UBOS,
 143                                ST_NEW_TCS_SSBOS,
 144                                ST_NEW_TCS_ATOMICS);
 145       break;
 146
 147    case MESA_SHADER_TESS_EVAL:
 148       states = &(st_program(prog))->affected_states;
 149
 150       *states = ST_NEW_TES_STATE |
 151                 ST_NEW_RASTERIZER;
 152
 153       set_affected_state_flags(states, prog,
 154                                ST_NEW_TES_CONSTANTS,
 155                                ST_NEW_TES_SAMPLER_VIEWS,
 156                                ST_NEW_TES_SAMPLERS,
 157                                ST_NEW_TES_IMAGES,
 158                                ST_NEW_TES_UBOS,
 159                                ST_NEW_TES_SSBOS,
 160                                ST_NEW_TES_ATOMICS);
 161       break;
 162
 163    case MESA_SHADER_GEOMETRY:
 164       states = &(st_program(prog))->affected_states;
 165
 166       *states = ST_NEW_GS_STATE |
 167                 ST_NEW_RASTERIZER;
 168
 169       set_affected_state_flags(states, prog,
 170                                ST_NEW_GS_CONSTANTS,
 171                                ST_NEW_GS_SAMPLER_VIEWS,
 172                                ST_NEW_GS_SAMPLERS,
 173                                ST_NEW_GS_IMAGES,
 174                                ST_NEW_GS_UBOS,
 175                                ST_NEW_GS_SSBOS,
 176                                ST_NEW_GS_ATOMICS);
 177       break;
 178
 179    case MESA_SHADER_FRAGMENT:
 180       states = &((struct st_program*)prog)->affected_states;
 181
 182       /* gl_FragCoord and glDrawPixels always use constants. */
 183       *states = ST_NEW_FS_STATE |
 184                 ST_NEW_SAMPLE_SHADING |
 185                 ST_NEW_FS_CONSTANTS;
 186
 187       set_affected_state_flags(states, prog,
 188                                ST_NEW_FS_CONSTANTS,
 189                                ST_NEW_FS_SAMPLER_VIEWS,
 190                                ST_NEW_FS_SAMPLERS,
 191                                ST_NEW_FS_IMAGES,
 192                                ST_NEW_FS_UBOS,
 193                                ST_NEW_FS_SSBOS,
 194                                ST_NEW_FS_ATOMICS);
 195       break;
 196
 197    case MESA_SHADER_COMPUTE:
 198       states = &((struct st_program*)prog)->affected_states;
 199
 200       *states = ST_NEW_CS_STATE;
 201
 202       set_affected_state_flags(states, prog,
 203                                ST_NEW_CS_CONSTANTS,
 204                                ST_NEW_CS_SAMPLER_VIEWS,
 205                                ST_NEW_CS_SAMPLERS,
 206                                ST_NEW_CS_IMAGES,
 207                                ST_NEW_CS_UBOS,
 208                                ST_NEW_CS_SSBOS,
 209                                ST_NEW_CS_ATOMICS);
 210       break;
 211
 212    default:
 213       unreachable("unhandled shader stage");
 214    }
 215 }
 216
 217
 218 /**
 219  * Delete a shader variant.  Note the caller must unlink the variant from
 220  * the linked list.
 221  */
 222 static void
 223 delete_variant(struct st_context *st, struct st_variant *v, GLenum target)
 224 {
 225    if (v->driver_shader) {
 226       if (target == GL_VERTEX_PROGRAM_ARB &&
 227           ((struct st_common_variant*)v)->key.is_draw_shader) {
 228          /* Draw shader. */
 229          draw_delete_vertex_shader(st->draw, v->driver_shader);
 230       } else if (st->has_shareable_shaders || v->st == st) {
 231          /* The shader's context matches the calling context, or we
 232           * don't care.
 233           */
 234          switch (target) {
 235          case GL_VERTEX_PROGRAM_ARB:
 236             st->pipe->delete_vs_state(st->pipe, v->driver_shader);
 237             break;
 238          case GL_TESS_CONTROL_PROGRAM_NV:
 239             st->pipe->delete_tcs_state(st->pipe, v->driver_shader);
 240             break;
 241          case GL_TESS_EVALUATION_PROGRAM_NV:
 242             st->pipe->delete_tes_state(st->pipe, v->driver_shader);
 243             break;
 244          case GL_GEOMETRY_PROGRAM_NV:
 245             st->pipe->delete_gs_state(st->pipe, v->driver_shader);
 246             break;
 247          case GL_FRAGMENT_PROGRAM_ARB:
 248             st->pipe->delete_fs_state(st->pipe, v->driver_shader);
 249             break;
 250          case GL_COMPUTE_PROGRAM_NV:
 251             st->pipe->delete_compute_state(st->pipe, v->driver_shader);
 252             break;
 253          default:
 254             unreachable("bad shader type in delete_basic_variant");
 255          }
 256       } else {
 257          /* We can't delete a shader with a context different from the one
 258           * that created it.  Add it to the creating context's zombie list.
 259           */
 260          enum pipe_shader_type type =
 261             pipe_shader_type_from_mesa(_mesa_program_enum_to_shader_stage(target));
 262
 263          st_save_zombie_shader(v->st, type, v->driver_shader);
 264       }
 265    }
 266
 267    free(v);
 268 }
 269
 270 static void
 271 st_unbind_program(struct st_context *st, struct st_program *p)
 272 {
 273    /* Unbind the shader in cso_context and re-bind in st/mesa. */
 274    switch (p->Base.info.stage) {
 275    case MESA_SHADER_VERTEX:
 276       cso_set_vertex_shader_handle(st->cso_context, NULL);
 277       st->dirty |= ST_NEW_VS_STATE;
 278       break;
 279    case MESA_SHADER_TESS_CTRL:
 280       cso_set_tessctrl_shader_handle(st->cso_context, NULL);
 281       st->dirty |= ST_NEW_TCS_STATE;
 282       break;
 283    case MESA_SHADER_TESS_EVAL:
 284       cso_set_tesseval_shader_handle(st->cso_context, NULL);
 285       st->dirty |= ST_NEW_TES_STATE;
 286       break;
 287    case MESA_SHADER_GEOMETRY:
 288       cso_set_geometry_shader_handle(st->cso_context, NULL);
 289       st->dirty |= ST_NEW_GS_STATE;
 290       break;
 291    case MESA_SHADER_FRAGMENT:
 292       cso_set_fragment_shader_handle(st->cso_context, NULL);
 293       st->dirty |= ST_NEW_FS_STATE;
 294       break;
 295    case MESA_SHADER_COMPUTE:
 296       cso_set_compute_shader_handle(st->cso_context, NULL);
 297       st->dirty |= ST_NEW_CS_STATE;
 298       break;
 299    default:
 300       unreachable("invalid shader type");
 301    }
 302 }
 303
 304 /**
 305  * Free all basic program variants.
 306  */
 307 void
 308 st_release_variants(struct st_context *st, struct st_program *p)
 309 {
 310    struct st_variant *v;
 311
 312    /* If we are releasing shaders, re-bind them, because we don't
 313     * know which shaders are bound in the driver.
 314     */
 315    if (p->variants)
 316       st_unbind_program(st, p);
 317
 318    for (v = p->variants; v; ) {
 319       struct st_variant *next = v->next;
 320       delete_variant(st, v, p->Base.Target);
 321       v = next;
 322    }
 323
 324    p->variants = NULL;
 325
 326    if (p->state.tokens) {
 327       ureg_free_tokens(p->state.tokens);
 328       p->state.tokens = NULL;
 329    }
 330
 331    /* Note: Any setup of ->ir.nir that has had pipe->create_*_state called on
 332     * it has resulted in the driver taking ownership of the NIR.  Those
 333     * callers should be NULLing out the nir field in any pipe_shader_state
 334     * that might have this called in order to indicate that.
 335     *
 336     * GLSL IR and ARB programs will have set gl_program->nir to the same
 337     * shader as ir->ir.nir, so it will be freed by _mesa_delete_program().
 338     */
 339 }
 340
 341 /**
 342  * Free all basic program variants and unref program.
 343  */
 344 void
 345 st_release_program(struct st_context *st, struct st_program **p)
 346 {
 347    if (!*p)
 348       return;
 349
 350    destroy_program_variants(st, &((*p)->Base));
 351    st_reference_prog(st, p, NULL);
 352 }
 353
 354 void
 355 st_finalize_nir_before_variants(struct nir_shader *nir)
 356 {
 357    NIR_PASS_V(nir, nir_opt_access);
 358
 359    NIR_PASS_V(nir, nir_split_var_copies);
 360    NIR_PASS_V(nir, nir_lower_var_copies);
 361    if (nir->options->lower_all_io_to_temps ||
 362        nir->options->lower_all_io_to_elements ||
 363        nir->info.stage == MESA_SHADER_VERTEX ||
 364        nir->info.stage == MESA_SHADER_GEOMETRY) {
 365       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
 366    } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
 367       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
 368    }
 369
 370    st_nir_assign_vs_in_locations(nir);
 371 }
 372
 373 /**
 374  * Translate ARB (asm) program to NIR
 375  */
 376 static nir_shader *
 377 st_translate_prog_to_nir(struct st_context *st, struct gl_program *prog,
 378                          gl_shader_stage stage)
 379 {
 380    struct pipe_screen *screen = st->pipe->screen;
 381    const struct gl_shader_compiler_options *options =
 382       &st->ctx->Const.ShaderCompilerOptions[stage];
 383
 384    /* Translate to NIR */
 385    nir_shader *nir = prog_to_nir(prog, options->NirOptions);
 386    NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
 387    nir_validate_shader(nir, "after st/ptn lower_regs_to_ssa");
 388
 389    NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, prog, screen);
 390    NIR_PASS_V(nir, nir_lower_system_values);
 391    NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
 392
 393    /* Optimise NIR */
 394    NIR_PASS_V(nir, nir_opt_constant_folding);
 395    st_nir_opts(nir);
 396    st_finalize_nir_before_variants(nir);
 397
 398    if (st->allow_st_finalize_nir_twice)
 399       st_finalize_nir(st, prog, NULL, nir, true);
 400
 401    nir_validate_shader(nir, "after st/glsl finalize_nir");
 402
 403    return nir;
 404 }
 405
 406 void
 407 st_prepare_vertex_program(struct st_program *stp)
 408 {
 409    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 410
 411    stvp->num_inputs = 0;
 412    memset(stvp->input_to_index, ~0, sizeof(stvp->input_to_index));
 413    memset(stvp->result_to_output, ~0, sizeof(stvp->result_to_output));
 414
 415    /* Determine number of inputs, the mappings between VERT_ATTRIB_x
 416     * and TGSI generic input indexes, plus input attrib semantic info.
 417     */
 418    for (unsigned attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
 419       if ((stp->Base.info.inputs_read & BITFIELD64_BIT(attr)) != 0) {
 420          stvp->input_to_index[attr] = stvp->num_inputs;
 421          stvp->index_to_input[stvp->num_inputs] = attr;
 422          stvp->num_inputs++;
 423
 424          if ((stp->Base.DualSlotInputs & BITFIELD64_BIT(attr)) != 0) {
 425             /* add placeholder for second part of a double attribute */
 426             stvp->index_to_input[stvp->num_inputs] = ST_DOUBLE_ATTRIB_PLACEHOLDER;
 427             stvp->num_inputs++;
 428          }
 429       }
 430    }
 431    /* pre-setup potentially unused edgeflag input */
 432    stvp->input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
 433    stvp->index_to_input[stvp->num_inputs] = VERT_ATTRIB_EDGEFLAG;
 434
 435    /* Compute mapping of vertex program outputs to slots. */
 436    unsigned num_outputs = 0;
 437    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 438       if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr))
 439          stvp->result_to_output[attr] = num_outputs++;
 440    }
 441    /* pre-setup potentially unused edgeflag output */
 442    stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs;
 443 }
 444
 445 void
 446 st_translate_stream_output_info(struct gl_program *prog)
 447 {
 448    struct gl_transform_feedback_info *info = prog->sh.LinkedTransformFeedback;
 449    if (!info)
 450       return;
 451
 452    /* Determine the (default) output register mapping for each output. */
 453    unsigned num_outputs = 0;
 454    ubyte output_mapping[VARYING_SLOT_TESS_MAX];
 455    memset(output_mapping, 0, sizeof(output_mapping));
 456
 457    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 458       if (prog->info.outputs_written & BITFIELD64_BIT(attr))
 459          output_mapping[attr] = num_outputs++;
 460    }
 461
 462    /* Translate stream output info. */
 463    struct pipe_stream_output_info *so_info =
 464       &((struct st_program*)prog)->state.stream_output;
 465
 466    for (unsigned i = 0; i < info->NumOutputs; i++) {
 467       so_info->output[i].register_index =
 468          output_mapping[info->Outputs[i].OutputRegister];
 469       so_info->output[i].start_component = info->Outputs[i].ComponentOffset;
 470       so_info->output[i].num_components = info->Outputs[i].NumComponents;
 471       so_info->output[i].output_buffer = info->Outputs[i].OutputBuffer;
 472       so_info->output[i].dst_offset = info->Outputs[i].DstOffset;
 473       so_info->output[i].stream = info->Outputs[i].StreamId;
 474    }
 475
 476    for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
 477       so_info->stride[i] = info->Buffers[i].Stride;
 478    }
 479    so_info->num_outputs = info->NumOutputs;
 480 }
 481
 482 /**
 483  * Translate a vertex program.
 484  */
 485 bool
 486 st_translate_vertex_program(struct st_context *st,
 487                             struct st_program *stp)
 488 {
 489    struct ureg_program *ureg;
 490    enum pipe_error error;
 491    unsigned num_outputs = 0;
 492    unsigned attr;
 493    ubyte output_semantic_name[VARYING_SLOT_MAX] = {0};
 494    ubyte output_semantic_index[VARYING_SLOT_MAX] = {0};
 495
 496    if (stp->Base.arb.IsPositionInvariant)
 497       _mesa_insert_mvp_code(st->ctx, &stp->Base);
 498
 499    /* ARB_vp: */
 500    if (!stp->glsl_to_tgsi) {
 501       _mesa_remove_output_reads(&stp->Base, PROGRAM_OUTPUT);
 502
 503       /* This determines which states will be updated when the assembly
 504        * shader is bound.
 505        */
 506       stp->affected_states = ST_NEW_VS_STATE |
 507                               ST_NEW_RASTERIZER |
 508                               ST_NEW_VERTEX_ARRAYS;
 509
 510       if (stp->Base.Parameters->NumParameters)
 511          stp->affected_states |= ST_NEW_VS_CONSTANTS;
 512
 513       /* Translate to NIR if preferred. */
 514       if (PIPE_SHADER_IR_NIR ==
 515           st->pipe->screen->get_shader_param(st->pipe->screen,
 516                                              PIPE_SHADER_VERTEX,
 517                                              PIPE_SHADER_CAP_PREFERRED_IR)) {
 518          assert(!stp->glsl_to_tgsi);
 519
 520          if (stp->Base.nir)
 521             ralloc_free(stp->Base.nir);
 522
 523          if (stp->serialized_nir) {
 524             free(stp->serialized_nir);
 525             stp->serialized_nir = NULL;
 526          }
 527
 528          stp->state.type = PIPE_SHADER_IR_NIR;
 529          stp->Base.nir = st_translate_prog_to_nir(st, &stp->Base,
 530                                                   MESA_SHADER_VERTEX);
 531
 532          /* We must update stp->Base.info after translation and before
 533           * st_prepare_vertex_program is called, because inputs_read
 534           * may become outdated after NIR optimization passes.
 535           *
 536           * For ffvp/ARB_vp inputs_read is populated based
 537           * on declared attributes without taking their usage into
 538           * consideration. When creating shader variants we expect
 539           * that their inputs_read would match the base ones for
 540           * input mapping to work properly.
 541           */
 542          nir_shader_gather_info(stp->Base.nir,
 543                                 nir_shader_get_entrypoint(stp->Base.nir));
 544          st_nir_assign_vs_in_locations(stp->Base.nir);
 545          stp->Base.info = stp->Base.nir->info;
 546
 547          /* For st_draw_feedback, we need to generate TGSI too if draw doesn't
 548           * use LLVM.
 549           */
 550          /* TODO: Draw can't handle lowered IO. */
 551          if (draw_has_llvm() && !stp->Base.info.io_lowered) {
 552             st_prepare_vertex_program(stp);
 553             return true;
 554          }
 555       }
 556    }
 557
 558    st_prepare_vertex_program(stp);
 559
 560    /* Get semantic names and indices. */
 561    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 562       if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr)) {
 563          unsigned slot = num_outputs++;
 564          unsigned semantic_name, semantic_index;
 565          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
 566                                       &semantic_name, &semantic_index);
 567          output_semantic_name[slot] = semantic_name;
 568          output_semantic_index[slot] = semantic_index;
 569       }
 570    }
 571    /* pre-setup potentially unused edgeflag output */
 572    output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
 573    output_semantic_index[num_outputs] = 0;
 574
 575    ureg = ureg_create_with_screen(PIPE_SHADER_VERTEX, st->pipe->screen);
 576    if (ureg == NULL)
 577       return false;
 578
 579    ureg_setup_shader_info(ureg, &stp->Base.info);
 580
 581    if (ST_DEBUG & DEBUG_MESA) {
 582       _mesa_print_program(&stp->Base);
 583       _mesa_print_program_parameters(st->ctx, &stp->Base);
 584       debug_printf("\n");
 585    }
 586
 587    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 588
 589    if (stp->glsl_to_tgsi) {
 590       error = st_translate_program(st->ctx,
 591                                    PIPE_SHADER_VERTEX,
 592                                    ureg,
 593                                    stp->glsl_to_tgsi,
 594                                    &stp->Base,
 595                                    /* inputs */
 596                                    stvp->num_inputs,
 597                                    stvp->input_to_index,
 598                                    NULL, /* inputSlotToAttr */
 599                                    NULL, /* input semantic name */
 600                                    NULL, /* input semantic index */
 601                                    NULL, /* interp mode */
 602                                    /* outputs */
 603                                    num_outputs,
 604                                    stvp->result_to_output,
 605                                    output_semantic_name,
 606                                    output_semantic_index);
 607
 608       st_translate_stream_output_info(&stp->Base);
 609
 610       free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
 611    } else
 612       error = st_translate_mesa_program(st->ctx,
 613                                         PIPE_SHADER_VERTEX,
 614                                         ureg,
 615                                         &stp->Base,
 616                                         /* inputs */
 617                                         stvp->num_inputs,
 618                                         stvp->input_to_index,
 619                                         NULL, /* input semantic name */
 620                                         NULL, /* input semantic index */
 621                                         NULL,
 622                                         /* outputs */
 623                                         num_outputs,
 624                                         stvp->result_to_output,
 625                                         output_semantic_name,
 626                                         output_semantic_index);
 627
 628    if (error) {
 629       debug_printf("%s: failed to translate Mesa program:\n", __func__);
 630       _mesa_print_program(&stp->Base);
 631       debug_assert(0);
 632       return false;
 633    }
 634
 635    stp->state.tokens = ureg_get_tokens(ureg, NULL);
 636    ureg_destroy(ureg);
 637
 638    if (stp->glsl_to_tgsi) {
 639       stp->glsl_to_tgsi = NULL;
 640       st_store_ir_in_disk_cache(st, &stp->Base, false);
 641    }
 642
 643    return stp->state.tokens != NULL;
 644 }
 645
 646 static struct nir_shader *
 647 get_nir_shader(struct st_context *st, struct st_program *stp)
 648 {
 649    if (stp->Base.nir) {
 650       nir_shader *nir = stp->Base.nir;
 651
 652       /* The first shader variant takes ownership of NIR, so that there is
 653        * no cloning. Additional shader variants are always generated from
 654        * serialized NIR to save memory.
 655        */
 656       stp->Base.nir = NULL;
 657       assert(stp->serialized_nir && stp->serialized_nir_size);
 658       return nir;
 659    }
 660
 661    struct blob_reader blob_reader;
 662    const struct nir_shader_compiler_options *options =
 663       st->ctx->Const.ShaderCompilerOptions[stp->Base.info.stage].NirOptions;
 664
 665    blob_reader_init(&blob_reader, stp->serialized_nir, stp->serialized_nir_size);
 666    return nir_deserialize(NULL, options, &blob_reader);
 667 }
 668
 669 static void
 670 lower_ucp(struct st_context *st,
 671           struct nir_shader *nir,
 672           unsigned ucp_enables,
 673           struct gl_program_parameter_list *params)
 674 {
 675    if (nir->info.outputs_written & VARYING_BIT_CLIP_DIST0)
 676       NIR_PASS_V(nir, nir_lower_clip_disable, ucp_enables);
 677    else {
 678       struct pipe_screen *screen = st->pipe->screen;
 679       bool can_compact = screen->get_param(screen,
 680                                            PIPE_CAP_NIR_COMPACT_ARRAYS);
 681       bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;
 682
 683       gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH];
 684       for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
 685          if (use_eye) {
 686             clipplane_state[i][0] = STATE_CLIPPLANE;
 687             clipplane_state[i][1] = i;
 688          } else {
 689             clipplane_state[i][0] = STATE_INTERNAL;
 690             clipplane_state[i][1] = STATE_CLIP_INTERNAL;
 691             clipplane_state[i][2] = i;
 692          }
 693          _mesa_add_state_reference(params, clipplane_state[i]);
 694       }
 695
 696       if (nir->info.stage == MESA_SHADER_VERTEX) {
 697          NIR_PASS_V(nir, nir_lower_clip_vs, ucp_enables,
 698                     true, can_compact, clipplane_state);
 699       } else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
 700          NIR_PASS_V(nir, nir_lower_clip_gs, ucp_enables,
 701                     can_compact, clipplane_state);
 702       }
 703
 704       NIR_PASS_V(nir, nir_lower_io_to_temporaries,
 705                  nir_shader_get_entrypoint(nir), true, false);
 706       NIR_PASS_V(nir, nir_lower_global_vars_to_local);
 707    }
 708 }
 709
 710 static const gl_state_index16 depth_range_state[STATE_LENGTH] =
 711    { STATE_DEPTH_RANGE };
 712
 713 static struct st_common_variant *
 714 st_create_vp_variant(struct st_context *st,
 715                      struct st_program *stvp,
 716                      const struct st_common_variant_key *key)
 717 {
 718    struct st_common_variant *vpv = CALLOC_STRUCT(st_common_variant);
 719    struct pipe_context *pipe = st->pipe;
 720    struct pipe_shader_state state = {0};
 721
 722    static const gl_state_index16 point_size_state[STATE_LENGTH] =
 723       { STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED, 0 };
 724    struct gl_program_parameter_list *params = stvp->Base.Parameters;
 725
 726    vpv->key = *key;
 727
 728    state.stream_output = stvp->state.stream_output;
 729
 730    if (stvp->state.type == PIPE_SHADER_IR_NIR &&
 731        (!key->is_draw_shader ||
 732         /* TODO: Draw can't handle lowered IO. */
 733         (draw_has_llvm() && !stvp->Base.info.io_lowered))) {
 734       bool finalize = false;
 735
 736       state.type = PIPE_SHADER_IR_NIR;
 737       state.ir.nir = get_nir_shader(st, stvp);
 738       if (key->clamp_color) {
 739          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
 740          finalize = true;
 741       }
 742       if (key->passthrough_edgeflags) {
 743          NIR_PASS_V(state.ir.nir, nir_lower_passthrough_edgeflags);
 744          finalize = true;
 745       }
 746
 747       if (key->lower_point_size) {
 748          _mesa_add_state_reference(params, point_size_state);
 749          NIR_PASS_V(state.ir.nir, nir_lower_point_size_mov,
 750                     point_size_state);
 751          finalize = true;
 752       }
 753
 754       if (key->lower_ucp) {
 755          lower_ucp(st, state.ir.nir, key->lower_ucp, params);
 756          finalize = true;
 757       }
 758
 759       if (finalize || !st->allow_st_finalize_nir_twice) {
 760          st_finalize_nir(st, &stvp->Base, stvp->shader_program, state.ir.nir,
 761                          true);
 762
 763          /* Some of the lowering above may have introduced new varyings */
 764          nir_shader_gather_info(state.ir.nir,
 765                                 nir_shader_get_entrypoint(state.ir.nir));
 766       }
 767
 768       if (ST_DEBUG & DEBUG_PRINT_IR)
 769          nir_print_shader(state.ir.nir, stderr);
 770
 771       if (key->is_draw_shader)
 772          vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
 773       else
 774          vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
 775
 776       return vpv;
 777    }
 778
 779    state.type = PIPE_SHADER_IR_TGSI;
 780    state.tokens = tgsi_dup_tokens(stvp->state.tokens);
 781
 782    /* Emulate features. */
 783    if (key->clamp_color || key->passthrough_edgeflags) {
 784       const struct tgsi_token *tokens;
 785       unsigned flags =
 786          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
 787          (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
 788
 789       tokens = tgsi_emulate(state.tokens, flags);
 790
 791       if (tokens) {
 792          tgsi_free_tokens(state.tokens);
 793          state.tokens = tokens;
 794       } else {
 795          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
 796       }
 797    }
 798
 799    if (key->lower_depth_clamp) {
 800       unsigned depth_range_const =
 801             _mesa_add_state_reference(params, depth_range_state);
 802
 803       const struct tgsi_token *tokens;
 804       tokens = st_tgsi_lower_depth_clamp(state.tokens, depth_range_const,
 805                                          key->clip_negative_one_to_one);
 806       if (tokens != state.tokens)
 807          tgsi_free_tokens(state.tokens);
 808       state.tokens = tokens;
 809    }
 810
 811    if (ST_DEBUG & DEBUG_PRINT_IR)
 812       tgsi_dump(state.tokens, 0);
 813
 814    if (key->is_draw_shader)
 815       vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
 816    else
 817       vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
 818
 819    if (state.tokens) {
 820       tgsi_free_tokens(state.tokens);
 821    }
 822
 823    return vpv;
 824 }
 825
 826
 827 /**
 828  * Find/create a vertex program variant.
 829  */
 830 struct st_common_variant *
 831 st_get_vp_variant(struct st_context *st,
 832                   struct st_program *stp,
 833                   const struct st_common_variant_key *key)
 834 {
 835    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 836    struct st_common_variant *vpv;
 837
 838    /* Search for existing variant */
 839    for (vpv = st_common_variant(stp->variants); vpv;
 840         vpv = st_common_variant(vpv->base.next)) {
 841       if (memcmp(&vpv->key, key, sizeof(*key)) == 0) {
 842          break;
 843       }
 844    }
 845
 846    if (!vpv) {
 847       /* create now */
 848       vpv = st_create_vp_variant(st, stp, key);
 849       if (vpv) {
 850          vpv->base.st = key->st;
 851
 852          unsigned num_inputs = stvp->num_inputs + key->passthrough_edgeflags;
 853          for (unsigned index = 0; index < num_inputs; ++index) {
 854             unsigned attr = stvp->index_to_input[index];
 855             if (attr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
 856                continue;
 857             vpv->vert_attrib_mask |= 1u << attr;
 858          }
 859
 860          /* insert into list */
 861          vpv->base.next = stp->variants;
 862          stp->variants = &vpv->base;
 863       }
 864    }
 865
 866    return vpv;
 867 }
 868
 869
 870 /**
 871  * Translate a Mesa fragment shader into a TGSI shader.
 872  */
 873 bool
 874 st_translate_fragment_program(struct st_context *st,
 875                               struct st_program *stfp)
 876 {
 877    /* Non-GLSL programs: */
 878    if (!stfp->glsl_to_tgsi) {
 879       _mesa_remove_output_reads(&stfp->Base, PROGRAM_OUTPUT);
 880       if (st->ctx->Const.GLSLFragCoordIsSysVal)
 881          _mesa_program_fragment_position_to_sysval(&stfp->Base);
 882
 883       /* This determines which states will be updated when the assembly
 884        * shader is bound.
 885        *
 886        * fragment.position and glDrawPixels always use constants.
 887        */
 888       stfp->affected_states = ST_NEW_FS_STATE |
 889                               ST_NEW_SAMPLE_SHADING |
 890                               ST_NEW_FS_CONSTANTS;
 891
 892       if (stfp->ati_fs) {
 893          /* Just set them for ATI_fs unconditionally. */
 894          stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 895                                   ST_NEW_FS_SAMPLERS;
 896       } else {
 897          /* ARB_fp */
 898          if (stfp->Base.SamplersUsed)
 899             stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 900                                      ST_NEW_FS_SAMPLERS;
 901       }
 902
 903       /* Translate to NIR. */
 904       if (!stfp->ati_fs &&
 905           PIPE_SHADER_IR_NIR ==
 906           st->pipe->screen->get_shader_param(st->pipe->screen,
 907                                              PIPE_SHADER_FRAGMENT,
 908                                              PIPE_SHADER_CAP_PREFERRED_IR)) {
 909          nir_shader *nir =
 910             st_translate_prog_to_nir(st, &stfp->Base, MESA_SHADER_FRAGMENT);
 911
 912          if (stfp->Base.nir)
 913             ralloc_free(stfp->Base.nir);
 914          if (stfp->serialized_nir) {
 915             free(stfp->serialized_nir);
 916             stfp->serialized_nir = NULL;
 917          }
 918          stfp->state.type = PIPE_SHADER_IR_NIR;
 919          stfp->Base.nir = nir;
 920          return true;
 921       }
 922    }
 923
 924    ubyte outputMapping[2 * FRAG_RESULT_MAX];
 925    ubyte inputMapping[VARYING_SLOT_MAX];
 926    ubyte inputSlotToAttr[VARYING_SLOT_MAX];
 927    ubyte interpMode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
 928    GLuint attr;
 929    GLbitfield64 inputsRead;
 930    struct ureg_program *ureg;
 931
 932    GLboolean write_all = GL_FALSE;
 933
 934    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
 935    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
 936    uint fs_num_inputs = 0;
 937
 938    ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
 939    ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
 940    uint fs_num_outputs = 0;
 941
 942    memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
 943
 944    /*
 945     * Convert Mesa program inputs to TGSI input register semantics.
 946     */
 947    inputsRead = stfp->Base.info.inputs_read;
 948    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 949       if ((inputsRead & BITFIELD64_BIT(attr)) != 0) {
 950          const GLuint slot = fs_num_inputs++;
 951
 952          inputMapping[attr] = slot;
 953          inputSlotToAttr[slot] = attr;
 954
 955          switch (attr) {
 956          case VARYING_SLOT_POS:
 957             input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
 958             input_semantic_index[slot] = 0;
 959             interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 960             break;
 961          case VARYING_SLOT_COL0:
 962             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 963             input_semantic_index[slot] = 0;
 964             interpMode[slot] = stfp->glsl_to_tgsi ?
 965                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 966             break;
 967          case VARYING_SLOT_COL1:
 968             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 969             input_semantic_index[slot] = 1;
 970             interpMode[slot] = stfp->glsl_to_tgsi ?
 971                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 972             break;
 973          case VARYING_SLOT_FOGC:
 974             input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
 975             input_semantic_index[slot] = 0;
 976             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 977             break;
 978          case VARYING_SLOT_FACE:
 979             input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
 980             input_semantic_index[slot] = 0;
 981             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 982             break;
 983          case VARYING_SLOT_PRIMITIVE_ID:
 984             input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
 985             input_semantic_index[slot] = 0;
 986             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 987             break;
 988          case VARYING_SLOT_LAYER:
 989             input_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
 990             input_semantic_index[slot] = 0;
 991             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 992             break;
 993          case VARYING_SLOT_VIEWPORT:
 994             input_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
 995             input_semantic_index[slot] = 0;
 996             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 997             break;
 998          case VARYING_SLOT_CLIP_DIST0:
 999             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
1000             input_semantic_index[slot] = 0;
1001             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
1002             break;
1003          case VARYING_SLOT_CLIP_DIST1:
1004             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
1005             input_semantic_index[slot] = 1;
1006             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
1007             break;
1008          case VARYING_SLOT_CULL_DIST0:
1009          case VARYING_SLOT_CULL_DIST1:
1010             /* these should have been lowered by GLSL */
1011             assert(0);
1012             break;
1013             /* In most cases, there is nothing special about these
1014              * inputs, so adopt a convention to use the generic
1015              * semantic name and the mesa VARYING_SLOT_ number as the
1016              * index.
1017              *
1018              * All that is required is that the vertex shader labels
1019              * its own outputs similarly, and that the vertex shader
1020              * generates at least every output required by the
1021              * fragment shader plus fixed-function hardware (such as
1022              * BFC).
1023              *
1024              * However, some drivers may need us to identify the PNTC and TEXi
1025              * varyings if, for example, their capability to replace them with
1026              * sprite coordinates is limited.
1027              */
1028          case VARYING_SLOT_PNTC:
1029             if (st->needs_texcoord_semantic) {
1030                input_semantic_name[slot] = TGSI_SEMANTIC_PCOORD;
1031                input_semantic_index[slot] = 0;
1032                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
1033                break;
1034             }
1035             /* fall through */
1036          case VARYING_SLOT_TEX0:
1037          case VARYING_SLOT_TEX1:
1038          case VARYING_SLOT_TEX2:
1039          case VARYING_SLOT_TEX3:
1040          case VARYING_SLOT_TEX4:
1041          case VARYING_SLOT_TEX5:
1042          case VARYING_SLOT_TEX6:
1043          case VARYING_SLOT_TEX7:
1044             if (st->needs_texcoord_semantic) {
1045                input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
1046                input_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
1047                interpMode[slot] = stfp->glsl_to_tgsi ?
1048                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
1049                break;
1050             }
1051             /* fall through */
1052          case VARYING_SLOT_VAR0:
1053          default:
1054             /* Semantic indices should be zero-based because drivers may choose
1055              * to assign a fixed slot determined by that index.
1056              * This is useful because ARB_separate_shader_objects uses location
1057              * qualifiers for linkage, and if the semantic index corresponds to
1058              * these locations, linkage passes in the driver become unecessary.
1059              *
1060              * If needs_texcoord_semantic is true, no semantic indices will be
1061              * consumed for the TEXi varyings, and we can base the locations of
1062              * the user varyings on VAR0.  Otherwise, we use TEX0 as base index.
1063              */
1064             assert(attr >= VARYING_SLOT_VAR0 || attr == VARYING_SLOT_PNTC ||
1065                    (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
1066             input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
1067             input_semantic_index[slot] = st_get_generic_varying_index(st, attr);
1068             if (attr == VARYING_SLOT_PNTC)
1069                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
1070             else {
1071                interpMode[slot] = stfp->glsl_to_tgsi ?
1072                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
1073             }
1074             break;
1075          }
1076       }
1077       else {
1078          inputMapping[attr] = -1;
1079       }
1080    }
1081
1082    /*
1083     * Semantics and mapping for outputs
1084     */
1085    GLbitfield64 outputsWritten = stfp->Base.info.outputs_written;
1086
1087    /* if z is written, emit that first */
1088    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
1089       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
1090       fs_output_semantic_index[fs_num_outputs] = 0;
1091       outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
1092       fs_num_outputs++;
1093       outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
1094    }
1095
1096    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
1097       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
1098       fs_output_semantic_index[fs_num_outputs] = 0;
1099       outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
1100       fs_num_outputs++;
1101       outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
1102    }
1103
1104    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
1105       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_SAMPLEMASK;
1106       fs_output_semantic_index[fs_num_outputs] = 0;
1107       outputMapping[FRAG_RESULT_SAMPLE_MASK] = fs_num_outputs;
1108       fs_num_outputs++;
1109       outputsWritten &= ~(1 << FRAG_RESULT_SAMPLE_MASK);
1110    }
1111
1112    /* handle remaining outputs (color) */
1113    for (attr = 0; attr < ARRAY_SIZE(outputMapping); attr++) {
1114       const GLbitfield64 written = attr < FRAG_RESULT_MAX ? outputsWritten :
1115          stfp->Base.SecondaryOutputsWritten;
1116       const unsigned loc = attr % FRAG_RESULT_MAX;
1117
1118       if (written & BITFIELD64_BIT(loc)) {
1119          switch (loc) {
1120          case FRAG_RESULT_DEPTH:
1121          case FRAG_RESULT_STENCIL:
1122          case FRAG_RESULT_SAMPLE_MASK:
1123             /* handled above */
1124             assert(0);
1125             break;
1126          case FRAG_RESULT_COLOR:
1127             write_all = GL_TRUE; /* fallthrough */
1128          default: {
1129             int index;
1130             assert(loc == FRAG_RESULT_COLOR ||
1131                    (FRAG_RESULT_DATA0 <= loc && loc < FRAG_RESULT_MAX));
1132
1133             index = (loc == FRAG_RESULT_COLOR) ? 0 : (loc - FRAG_RESULT_DATA0);
1134
1135             if (attr >= FRAG_RESULT_MAX) {
1136                /* Secondary color for dual source blending. */
1137                assert(index == 0);
1138                index++;
1139             }
1140
1141             fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
1142             fs_output_semantic_index[fs_num_outputs] = index;
1143             outputMapping[attr] = fs_num_outputs;
1144             break;
1145          }
1146          }
1147
1148          fs_num_outputs++;
1149       }
1150    }
1151
1152    ureg = ureg_create_with_screen(PIPE_SHADER_FRAGMENT, st->pipe->screen);
1153    if (ureg == NULL)
1154       return false;
1155
1156    ureg_setup_shader_info(ureg, &stfp->Base.info);
1157
1158    if (ST_DEBUG & DEBUG_MESA) {
1159       _mesa_print_program(&stfp->Base);
1160       _mesa_print_program_parameters(st->ctx, &stfp->Base);
1161       debug_printf("\n");
1162    }
1163    if (write_all == GL_TRUE)
1164       ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1);
1165
1166    if (stfp->glsl_to_tgsi) {
1167       st_translate_program(st->ctx,
1168                            PIPE_SHADER_FRAGMENT,
1169                            ureg,
1170                            stfp->glsl_to_tgsi,
1171                            &stfp->Base,
1172                            /* inputs */
1173                            fs_num_inputs,
1174                            inputMapping,
1175                            inputSlotToAttr,
1176                            input_semantic_name,
1177                            input_semantic_index,
1178                            interpMode,
1179                            /* outputs */
1180                            fs_num_outputs,
1181                            outputMapping,
1182                            fs_output_semantic_name,
1183                            fs_output_semantic_index);
1184
1185       free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
1186    } else if (stfp->ati_fs)
1187       st_translate_atifs_program(ureg,
1188                                  stfp->ati_fs,
1189                                  &stfp->Base,
1190                                  /* inputs */
1191                                  fs_num_inputs,
1192                                  inputMapping,
1193                                  input_semantic_name,
1194                                  input_semantic_index,
1195                                  interpMode,
1196                                  /* outputs */
1197                                  fs_num_outputs,
1198                                  outputMapping,
1199                                  fs_output_semantic_name,
1200                                  fs_output_semantic_index);
1201    else
1202       st_translate_mesa_program(st->ctx,
1203                                 PIPE_SHADER_FRAGMENT,
1204                                 ureg,
1205                                 &stfp->Base,
1206                                 /* inputs */
1207                                 fs_num_inputs,
1208                                 inputMapping,
1209                                 input_semantic_name,
1210                                 input_semantic_index,
1211                                 interpMode,
1212                                 /* outputs */
1213                                 fs_num_outputs,
1214                                 outputMapping,
1215                                 fs_output_semantic_name,
1216                                 fs_output_semantic_index);
1217
1218    stfp->state.tokens = ureg_get_tokens(ureg, NULL);
1219    ureg_destroy(ureg);
1220
1221    if (stfp->glsl_to_tgsi) {
1222       stfp->glsl_to_tgsi = NULL;
1223       st_store_ir_in_disk_cache(st, &stfp->Base, false);
1224    }
1225
1226    return stfp->state.tokens != NULL;
1227 }
1228
1229 static struct st_fp_variant *
1230 st_create_fp_variant(struct st_context *st,
1231                      struct st_program *stfp,
1232                      const struct st_fp_variant_key *key)
1233 {
1234    struct pipe_context *pipe = st->pipe;
1235    struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
1236    struct pipe_shader_state state = {0};
1237    struct gl_program_parameter_list *params = stfp->Base.Parameters;
1238    static const gl_state_index16 texcoord_state[STATE_LENGTH] =
1239       { STATE_INTERNAL, STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
1240    static const gl_state_index16 scale_state[STATE_LENGTH] =
1241       { STATE_INTERNAL, STATE_PT_SCALE };
1242    static const gl_state_index16 bias_state[STATE_LENGTH] =
1243       { STATE_INTERNAL, STATE_PT_BIAS };
1244    static const gl_state_index16 alpha_ref_state[STATE_LENGTH] =
1245       { STATE_INTERNAL, STATE_ALPHA_REF };
1246
1247    if (!variant)
1248       return NULL;
1249
1250    if (stfp->state.type == PIPE_SHADER_IR_NIR) {
1251       bool finalize = false;
1252
1253       state.type = PIPE_SHADER_IR_NIR;
1254       state.ir.nir = get_nir_shader(st, stfp);
1255
1256       if (key->clamp_color) {
1257          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1258          finalize = true;
1259       }
1260
1261       if (key->lower_flatshade) {
1262          NIR_PASS_V(state.ir.nir, nir_lower_flatshade);
1263          finalize = true;
1264       }
1265
1266       if (key->lower_alpha_func != COMPARE_FUNC_NEVER) {
1267          _mesa_add_state_reference(params, alpha_ref_state);
1268          NIR_PASS_V(state.ir.nir, nir_lower_alpha_test, key->lower_alpha_func,
1269                     false, alpha_ref_state);
1270          finalize = true;
1271       }
1272
1273       if (key->lower_two_sided_color) {
1274          bool face_sysval = st->ctx->Const.GLSLFrontFacingIsSysVal;
1275          NIR_PASS_V(state.ir.nir, nir_lower_two_sided_color, face_sysval);
1276          finalize = true;
1277       }
1278
1279       if (key->persample_shading) {
1280           nir_shader *shader = state.ir.nir;
1281           nir_foreach_shader_in_variable(var, shader)
1282              var->data.sample = true;
1283           finalize = true;
1284       }
1285
1286       assert(!(key->bitmap && key->drawpixels));
1287
1288       /* glBitmap */
1289       if (key->bitmap) {
1290          nir_lower_bitmap_options options = {0};
1291
1292          variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1293          options.sampler = variant->bitmap_sampler;
1294          options.swizzle_xxxx = st->bitmap.tex_format == PIPE_FORMAT_R8_UNORM;
1295
1296          NIR_PASS_V(state.ir.nir, nir_lower_bitmap, &options);
1297          finalize = true;
1298       }
1299
1300       /* glDrawPixels (color only) */
1301       if (key->drawpixels) {
1302          nir_lower_drawpixels_options options = {{0}};
1303          unsigned samplers_used = stfp->Base.SamplersUsed;
1304
1305          /* Find the first unused slot. */
1306          variant->drawpix_sampler = ffs(~samplers_used) - 1;
1307          options.drawpix_sampler = variant->drawpix_sampler;
1308          samplers_used |= (1 << variant->drawpix_sampler);
1309
1310          options.pixel_maps = key->pixelMaps;
1311          if (key->pixelMaps) {
1312             variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1313             options.pixelmap_sampler = variant->pixelmap_sampler;
1314          }
1315
1316          options.scale_and_bias = key->scaleAndBias;
1317          if (key->scaleAndBias) {
1318             _mesa_add_state_reference(params, scale_state);
1319             memcpy(options.scale_state_tokens, scale_state,
1320                    sizeof(options.scale_state_tokens));
1321             _mesa_add_state_reference(params, bias_state);
1322             memcpy(options.bias_state_tokens, bias_state,
1323                    sizeof(options.bias_state_tokens));
1324          }
1325
1326          _mesa_add_state_reference(params, texcoord_state);
1327          memcpy(options.texcoord_state_tokens, texcoord_state,
1328                 sizeof(options.texcoord_state_tokens));
1329
1330          NIR_PASS_V(state.ir.nir, nir_lower_drawpixels, &options);
1331          finalize = true;
1332       }
1333
1334       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1335                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1336                    key->external.lower_ayuv || key->external.lower_xyuv)) {
1337
1338          st_nir_lower_samplers(pipe->screen, state.ir.nir,
1339                                stfp->shader_program, &stfp->Base);
1340
1341          nir_lower_tex_options options = {0};
1342          options.lower_y_uv_external = key->external.lower_nv12;
1343          options.lower_y_u_v_external = key->external.lower_iyuv;
1344          options.lower_xy_uxvx_external = key->external.lower_xy_uxvx;
1345          options.lower_yx_xuxv_external = key->external.lower_yx_xuxv;
1346          options.lower_ayuv_external = key->external.lower_ayuv;
1347          options.lower_xyuv_external = key->external.lower_xyuv;
1348          NIR_PASS_V(state.ir.nir, nir_lower_tex, &options);
1349          finalize = true;
1350       }
1351
1352       if (finalize || !st->allow_st_finalize_nir_twice) {
1353          st_finalize_nir(st, &stfp->Base, stfp->shader_program, state.ir.nir,
1354                          false);
1355       }
1356
1357       /* This pass needs to happen *after* nir_lower_sampler */
1358       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1359                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1360                    key->external.lower_ayuv || key->external.lower_xyuv)) {
1361          NIR_PASS_V(state.ir.nir, st_nir_lower_tex_src_plane,
1362                     ~stfp->Base.SamplersUsed,
1363                     key->external.lower_nv12 || key->external.lower_xy_uxvx ||
1364                        key->external.lower_yx_xuxv,
1365                     key->external.lower_iyuv);
1366          finalize = true;
1367       }
1368
1369       if (finalize || !st->allow_st_finalize_nir_twice) {
1370          /* Some of the lowering above may have introduced new varyings */
1371          nir_shader_gather_info(state.ir.nir,
1372                                 nir_shader_get_entrypoint(state.ir.nir));
1373
1374          struct pipe_screen *screen = pipe->screen;
1375          if (screen->finalize_nir)
1376             screen->finalize_nir(screen, state.ir.nir, false);
1377       }
1378
1379       if (ST_DEBUG & DEBUG_PRINT_IR)
1380          nir_print_shader(state.ir.nir, stderr);
1381
1382       variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1383       variant->key = *key;
1384
1385       return variant;
1386    }
1387
1388    state.tokens = stfp->state.tokens;
1389
1390    assert(!(key->bitmap && key->drawpixels));
1391
1392    /* Fix texture targets and add fog for ATI_fs */
1393    if (stfp->ati_fs) {
1394       const struct tgsi_token *tokens = st_fixup_atifs(state.tokens, key);
1395
1396       if (tokens)
1397          state.tokens = tokens;
1398       else
1399          fprintf(stderr, "mesa: cannot post-process ATI_fs\n");
1400    }
1401
1402    /* Emulate features. */
1403    if (key->clamp_color || key->persample_shading) {
1404       const struct tgsi_token *tokens;
1405       unsigned flags =
1406          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
1407          (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
1408
1409       tokens = tgsi_emulate(state.tokens, flags);
1410
1411       if (tokens) {
1412          if (state.tokens != stfp->state.tokens)
1413             tgsi_free_tokens(state.tokens);
1414          state.tokens = tokens;
1415       } else
1416          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
1417    }
1418
1419    /* glBitmap */
1420    if (key->bitmap) {
1421       const struct tgsi_token *tokens;
1422
1423       variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1424
1425       tokens = st_get_bitmap_shader(state.tokens,
1426                                     st->internal_target,
1427                                     variant->bitmap_sampler,
1428                                     st->needs_texcoord_semantic,
1429                                     st->bitmap.tex_format ==
1430                                     PIPE_FORMAT_R8_UNORM);
1431
1432       if (tokens) {
1433          if (state.tokens != stfp->state.tokens)
1434             tgsi_free_tokens(state.tokens);
1435          state.tokens = tokens;
1436       } else
1437          fprintf(stderr, "mesa: cannot create a shader for glBitmap\n");
1438    }
1439
1440    /* glDrawPixels (color only) */
1441    if (key->drawpixels) {
1442       const struct tgsi_token *tokens;
1443       unsigned scale_const = 0, bias_const = 0, texcoord_const = 0;
1444
1445       /* Find the first unused slot. */
1446       variant->drawpix_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1447
1448       if (key->pixelMaps) {
1449          unsigned samplers_used = stfp->Base.SamplersUsed |
1450                                   (1 << variant->drawpix_sampler);
1451
1452          variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1453       }
1454
1455       if (key->scaleAndBias) {
1456          scale_const = _mesa_add_state_reference(params, scale_state);
1457          bias_const = _mesa_add_state_reference(params, bias_state);
1458       }
1459
1460       texcoord_const = _mesa_add_state_reference(params, texcoord_state);
1461
1462       tokens = st_get_drawpix_shader(state.tokens,
1463                                      st->needs_texcoord_semantic,
1464                                      key->scaleAndBias, scale_const,
1465                                      bias_const, key->pixelMaps,
1466                                      variant->drawpix_sampler,
1467                                      variant->pixelmap_sampler,
1468                                      texcoord_const, st->internal_target);
1469
1470       if (tokens) {
1471          if (state.tokens != stfp->state.tokens)
1472             tgsi_free_tokens(state.tokens);
1473          state.tokens = tokens;
1474       } else
1475          fprintf(stderr, "mesa: cannot create a shader for glDrawPixels\n");
1476    }
1477
1478    if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1479                 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1480       const struct tgsi_token *tokens;
1481
1482       /* samplers inserted would conflict, but this should be unpossible: */
1483       assert(!(key->bitmap || key->drawpixels));
1484
1485       tokens = st_tgsi_lower_yuv(state.tokens,
1486                                  ~stfp->Base.SamplersUsed,
1487                                  key->external.lower_nv12 ||
1488                                     key->external.lower_xy_uxvx ||
1489                                     key->external.lower_yx_xuxv,
1490                                  key->external.lower_iyuv);
1491       if (tokens) {
1492          if (state.tokens != stfp->state.tokens)
1493             tgsi_free_tokens(state.tokens);
1494          state.tokens = tokens;
1495       } else {
1496          fprintf(stderr, "mesa: cannot create a shader for samplerExternalOES\n");
1497       }
1498    }
1499
1500    if (key->lower_depth_clamp) {
1501       unsigned depth_range_const = _mesa_add_state_reference(params, depth_range_state);
1502
1503       const struct tgsi_token *tokens;
1504       tokens = st_tgsi_lower_depth_clamp_fs(state.tokens, depth_range_const);
1505       if (state.tokens != stfp->state.tokens)
1506          tgsi_free_tokens(state.tokens);
1507       state.tokens = tokens;
1508    }
1509
1510    if (ST_DEBUG & DEBUG_PRINT_IR)
1511       tgsi_dump(state.tokens, 0);
1512
1513    /* fill in variant */
1514    variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1515    variant->key = *key;
1516
1517    if (state.tokens != stfp->state.tokens)
1518       tgsi_free_tokens(state.tokens);
1519    return variant;
1520 }
1521
1522 /**
1523  * Translate fragment program if needed.
1524  */
1525 struct st_fp_variant *
1526 st_get_fp_variant(struct st_context *st,
1527                   struct st_program *stfp,
1528                   const struct st_fp_variant_key *key)
1529 {
1530    struct st_fp_variant *fpv;
1531
1532    /* Search for existing variant */
1533    for (fpv = st_fp_variant(stfp->variants); fpv;
1534         fpv = st_fp_variant(fpv->base.next)) {
1535       if (memcmp(&fpv->key, key, sizeof(*key)) == 0) {
1536          break;
1537       }
1538    }
1539
1540    if (!fpv) {
1541       /* create new */
1542       fpv = st_create_fp_variant(st, stfp, key);
1543       if (fpv) {
1544          fpv->base.st = key->st;
1545
1546          if (key->bitmap || key->drawpixels) {
1547             /* Regular variants should always come before the
1548              * bitmap & drawpixels variants, (unless there
1549              * are no regular variants) so that
1550              * st_update_fp can take a fast path when
1551              * shader_has_one_variant is set.
1552              */
1553             if (!stfp->variants) {
1554                stfp->variants = &fpv->base;
1555             } else {
1556                /* insert into list after the first one */
1557                fpv->base.next = stfp->variants->next;
1558                stfp->variants->next = &fpv->base;
1559             }
1560          } else {
1561             /* insert into list */
1562             fpv->base.next = stfp->variants;
1563             stfp->variants = &fpv->base;
1564          }
1565       }
1566    }
1567
1568    return fpv;
1569 }
1570
1571 /**
1572  * Translate a program. This is common code for geometry and tessellation
1573  * shaders.
1574  */
1575 bool
1576 st_translate_common_program(struct st_context *st,
1577                             struct st_program *stp)
1578 {
1579    struct gl_program *prog = &stp->Base;
1580    enum pipe_shader_type stage =
1581       pipe_shader_type_from_mesa(stp->Base.info.stage);
1582    struct ureg_program *ureg = ureg_create_with_screen(stage, st->pipe->screen);
1583
1584    if (ureg == NULL)
1585       return false;
1586
1587    ureg_setup_shader_info(ureg, &stp->Base.info);
1588
1589    ubyte inputSlotToAttr[VARYING_SLOT_TESS_MAX];
1590    ubyte inputMapping[VARYING_SLOT_TESS_MAX];
1591    ubyte outputMapping[VARYING_SLOT_TESS_MAX];
1592    GLuint attr;
1593
1594    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
1595    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
1596    uint num_inputs = 0;
1597
1598    ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
1599    ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
1600    uint num_outputs = 0;
1601
1602    GLint i;
1603
1604    memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr));
1605    memset(inputMapping, 0, sizeof(inputMapping));
1606    memset(outputMapping, 0, sizeof(outputMapping));
1607    memset(&stp->state, 0, sizeof(stp->state));
1608
1609    /*
1610     * Convert Mesa program inputs to TGSI input register semantics.
1611     */
1612    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1613       if ((prog->info.inputs_read & BITFIELD64_BIT(attr)) == 0)
1614          continue;
1615
1616       unsigned slot = num_inputs++;
1617
1618       inputMapping[attr] = slot;
1619       inputSlotToAttr[slot] = attr;
1620
1621       unsigned semantic_name, semantic_index;
1622       tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1623                                    &semantic_name, &semantic_index);
1624       input_semantic_name[slot] = semantic_name;
1625       input_semantic_index[slot] = semantic_index;
1626    }
1627
1628    /* Also add patch inputs. */
1629    for (attr = 0; attr < 32; attr++) {
1630       if (prog->info.patch_inputs_read & (1u << attr)) {
1631          GLuint slot = num_inputs++;
1632          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1633
1634          inputMapping[patch_attr] = slot;
1635          inputSlotToAttr[slot] = patch_attr;
1636          input_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1637          input_semantic_index[slot] = attr;
1638       }
1639    }
1640
1641    /* initialize output semantics to defaults */
1642    for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
1643       output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
1644       output_semantic_index[i] = 0;
1645    }
1646
1647    /*
1648     * Determine number of outputs, the (default) output register
1649     * mapping and the semantic information for each output.
1650     */
1651    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1652       if (prog->info.outputs_written & BITFIELD64_BIT(attr)) {
1653          GLuint slot = num_outputs++;
1654
1655          outputMapping[attr] = slot;
1656
1657          unsigned semantic_name, semantic_index;
1658          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1659                                       &semantic_name, &semantic_index);
1660          output_semantic_name[slot] = semantic_name;
1661          output_semantic_index[slot] = semantic_index;
1662       }
1663    }
1664
1665    /* Also add patch outputs. */
1666    for (attr = 0; attr < 32; attr++) {
1667       if (prog->info.patch_outputs_written & (1u << attr)) {
1668          GLuint slot = num_outputs++;
1669          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1670
1671          outputMapping[patch_attr] = slot;
1672          output_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1673          output_semantic_index[slot] = attr;
1674       }
1675    }
1676
1677    st_translate_program(st->ctx,
1678                         stage,
1679                         ureg,
1680                         stp->glsl_to_tgsi,
1681                         prog,
1682                         /* inputs */
1683                         num_inputs,
1684                         inputMapping,
1685                         inputSlotToAttr,
1686                         input_semantic_name,
1687                         input_semantic_index,
1688                         NULL,
1689                         /* outputs */
1690                         num_outputs,
1691                         outputMapping,
1692                         output_semantic_name,
1693                         output_semantic_index);
1694
1695    stp->state.tokens = ureg_get_tokens(ureg, NULL);
1696
1697    ureg_destroy(ureg);
1698
1699    st_translate_stream_output_info(prog);
1700
1701    st_store_ir_in_disk_cache(st, prog, false);
1702
1703    if (ST_DEBUG & DEBUG_PRINT_IR && ST_DEBUG & DEBUG_MESA)
1704       _mesa_print_program(prog);
1705
1706    free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
1707    stp->glsl_to_tgsi = NULL;
1708    return true;
1709 }
1710
1711
1712 /**
1713  * Get/create a basic program variant.
1714  */
1715 struct st_variant *
1716 st_get_common_variant(struct st_context *st,
1717                       struct st_program *prog,
1718                       const struct st_common_variant_key *key)
1719 {
1720    struct pipe_context *pipe = st->pipe;
1721    struct st_variant *v;
1722    struct pipe_shader_state state = {0};
1723    struct gl_program_parameter_list *params = prog->Base.Parameters;
1724
1725    /* Search for existing variant */
1726    for (v = prog->variants; v; v = v->next) {
1727       if (memcmp(&st_common_variant(v)->key, key, sizeof(*key)) == 0)
1728          break;
1729    }
1730
1731    if (!v) {
1732       /* create new */
1733       v = (struct st_variant*)CALLOC_STRUCT(st_common_variant);
1734       if (v) {
1735          if (prog->state.type == PIPE_SHADER_IR_NIR) {
1736             bool finalize = false;
1737
1738             state.type = PIPE_SHADER_IR_NIR;
1739             state.ir.nir = get_nir_shader(st, prog);
1740
1741             if (key->clamp_color) {
1742                NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1743                finalize = true;
1744             }
1745
1746             if (key->lower_ucp) {
1747                lower_ucp(st, state.ir.nir, key->lower_ucp, params);
1748                finalize = true;
1749             }
1750
1751             state.stream_output = prog->state.stream_output;
1752
1753             if (finalize || !st->allow_st_finalize_nir_twice) {
1754                st_finalize_nir(st, &prog->Base, prog->shader_program,
1755                                state.ir.nir, true);
1756             }
1757
1758             if (ST_DEBUG & DEBUG_PRINT_IR)
1759                nir_print_shader(state.ir.nir, stderr);
1760          } else {
1761             if (key->lower_depth_clamp) {
1762                struct gl_program_parameter_list *params = prog->Base.Parameters;
1763
1764                unsigned depth_range_const =
1765                      _mesa_add_state_reference(params, depth_range_state);
1766
1767                const struct tgsi_token *tokens;
1768                tokens =
1769                      st_tgsi_lower_depth_clamp(prog->state.tokens,
1770                                                depth_range_const,
1771                                                key->clip_negative_one_to_one);
1772
1773                if (tokens != prog->state.tokens)
1774                   tgsi_free_tokens(prog->state.tokens);
1775
1776                prog->state.tokens = tokens;
1777             }
1778             state = prog->state;
1779
1780             if (ST_DEBUG & DEBUG_PRINT_IR)
1781                tgsi_dump(state.tokens, 0);
1782          }
1783          /* fill in new variant */
1784          switch (prog->Base.info.stage) {
1785          case MESA_SHADER_TESS_CTRL:
1786             v->driver_shader = pipe->create_tcs_state(pipe, &state);
1787             break;
1788          case MESA_SHADER_TESS_EVAL:
1789             v->driver_shader = pipe->create_tes_state(pipe, &state);
1790             break;
1791          case MESA_SHADER_GEOMETRY:
1792             v->driver_shader = pipe->create_gs_state(pipe, &state);
1793             break;
1794          case MESA_SHADER_COMPUTE: {
1795             struct pipe_compute_state cs = {0};
1796             cs.ir_type = state.type;
1797             cs.req_local_mem = prog->Base.info.cs.shared_size;
1798
1799             if (state.type == PIPE_SHADER_IR_NIR)
1800                cs.prog = state.ir.nir;
1801             else
1802                cs.prog = state.tokens;
1803
1804             v->driver_shader = pipe->create_compute_state(pipe, &cs);
1805             break;
1806          }
1807          default:
1808             assert(!"unhandled shader type");
1809             free(v);
1810             return NULL;
1811          }
1812
1813          st_common_variant(v)->key = *key;
1814          v->st = key->st;
1815
1816          /* insert into list */
1817          v->next = prog->variants;
1818          prog->variants = v;
1819       }
1820    }
1821
1822    return v;
1823 }
1824
1825
1826 /**
1827  * Vert/Geom/Frag programs have per-context variants.  Free all the
1828  * variants attached to the given program which match the given context.
1829  */
1830 static void
1831 destroy_program_variants(struct st_context *st, struct gl_program *target)
1832 {
1833    if (!target || target == &_mesa_DummyProgram)
1834       return;
1835
1836    struct st_program *p = st_program(target);
1837    struct st_variant *v, **prevPtr = &p->variants;
1838    bool unbound = false;
1839
1840    for (v = p->variants; v; ) {
1841       struct st_variant *next = v->next;
1842       if (v->st == st) {
1843          if (!unbound) {
1844             st_unbind_program(st, p);
1845             unbound = true;
1846          }
1847
1848          /* unlink from list */
1849          *prevPtr = next;
1850          /* destroy this variant */
1851          delete_variant(st, v, target->Target);
1852       }
1853       else {
1854          prevPtr = &v->next;
1855       }
1856       v = next;
1857    }
1858 }
1859
1860
1861 /**
1862  * Callback for _mesa_HashWalk.  Free all the shader's program variants
1863  * which match the given context.
1864  */
1865 static void
1866 destroy_shader_program_variants_cb(GLuint key, void *data, void *userData)
1867 {
1868    struct st_context *st = (struct st_context *) userData;
1869    struct gl_shader *shader = (struct gl_shader *) data;
1870
1871    switch (shader->Type) {
1872    case GL_SHADER_PROGRAM_MESA:
1873       {
1874          struct gl_shader_program *shProg = (struct gl_shader_program *) data;
1875          GLuint i;
1876
1877          for (i = 0; i < ARRAY_SIZE(shProg->_LinkedShaders); i++) {
1878             if (shProg->_LinkedShaders[i])
1879                destroy_program_variants(st, shProg->_LinkedShaders[i]->Program);
1880          }
1881       }
1882       break;
1883    case GL_VERTEX_SHADER:
1884    case GL_FRAGMENT_SHADER:
1885    case GL_GEOMETRY_SHADER:
1886    case GL_TESS_CONTROL_SHADER:
1887    case GL_TESS_EVALUATION_SHADER:
1888    case GL_COMPUTE_SHADER:
1889       break;
1890    default:
1891       assert(0);
1892    }
1893 }
1894
1895
1896 /**
1897  * Callback for _mesa_HashWalk.  Free all the program variants which match
1898  * the given context.
1899  */
1900 static void
1901 destroy_program_variants_cb(GLuint key, void *data, void *userData)
1902 {
1903    struct st_context *st = (struct st_context *) userData;
1904    struct gl_program *program = (struct gl_program *) data;
1905    destroy_program_variants(st, program);
1906 }
1907
1908
1909 /**
1910  * Walk over all shaders and programs to delete any variants which
1911  * belong to the given context.
1912  * This is called during context tear-down.
1913  */
1914 void
1915 st_destroy_program_variants(struct st_context *st)
1916 {
1917    /* If shaders can be shared with other contexts, the last context will
1918     * call DeleteProgram on all shaders, releasing everything.
1919     */
1920    if (st->has_shareable_shaders)
1921       return;
1922
1923    /* ARB vert/frag program */
1924    _mesa_HashWalk(st->ctx->Shared->Programs,
1925                   destroy_program_variants_cb, st);
1926
1927    /* GLSL vert/frag/geom shaders */
1928    _mesa_HashWalk(st->ctx->Shared->ShaderObjects,
1929                   destroy_shader_program_variants_cb, st);
1930 }
1931
1932
1933 /**
1934  * Compile one shader variant.
1935  */
1936 static void
1937 st_precompile_shader_variant(struct st_context *st,
1938                              struct gl_program *prog)
1939 {
1940    switch (prog->Target) {
1941    case GL_VERTEX_PROGRAM_ARB: {
1942       struct st_program *p = (struct st_program *)prog;
1943       struct st_common_variant_key key;
1944
1945       memset(&key, 0, sizeof(key));
1946
1947       key.st = st->has_shareable_shaders ? NULL : st;
1948       st_get_vp_variant(st, p, &key);
1949       break;
1950    }
1951
1952    case GL_FRAGMENT_PROGRAM_ARB: {
1953       struct st_program *p = (struct st_program *)prog;
1954       struct st_fp_variant_key key;
1955
1956       memset(&key, 0, sizeof(key));
1957
1958       key.st = st->has_shareable_shaders ? NULL : st;
1959       st_get_fp_variant(st, p, &key);
1960       break;
1961    }
1962
1963    case GL_TESS_CONTROL_PROGRAM_NV:
1964    case GL_TESS_EVALUATION_PROGRAM_NV:
1965    case GL_GEOMETRY_PROGRAM_NV:
1966    case GL_COMPUTE_PROGRAM_NV: {
1967       struct st_program *p = st_program(prog);
1968       struct st_common_variant_key key;
1969
1970       memset(&key, 0, sizeof(key));
1971
1972       key.st = st->has_shareable_shaders ? NULL : st;
1973       st_get_common_variant(st, p, &key);
1974       break;
1975    }
1976
1977    default:
1978       assert(0);
1979    }
1980 }
1981
1982 void
1983 st_serialize_nir(struct st_program *stp)
1984 {
1985    if (!stp->serialized_nir) {
1986       struct blob blob;
1987       size_t size;
1988
1989       blob_init(&blob);
1990       nir_serialize(&blob, stp->Base.nir, false);
1991       blob_finish_get_buffer(&blob, &stp->serialized_nir, &size);
1992       stp->serialized_nir_size = size;
1993    }
1994 }
1995
1996 void
1997 st_finalize_program(struct st_context *st, struct gl_program *prog)
1998 {
1999    if (st->current_program[prog->info.stage] == prog) {
2000       if (prog->info.stage == MESA_SHADER_VERTEX)
2001          st->dirty |= ST_NEW_VERTEX_PROGRAM(st, (struct st_program *)prog);
2002       else
2003          st->dirty |= ((struct st_program *)prog)->affected_states;
2004    }
2005
2006    if (prog->nir) {
2007       nir_sweep(prog->nir);
2008
2009       /* This is only needed for ARB_vp/fp programs and when the disk cache
2010        * is disabled. If the disk cache is enabled, GLSL programs are
2011        * serialized in write_nir_to_cache.
2012        */
2013       st_serialize_nir(st_program(prog));
2014    }
2015
2016    /* Create Gallium shaders now instead of on demand. */
2017    if (ST_DEBUG & DEBUG_PRECOMPILE ||
2018        st->shader_has_one_variant[prog->info.stage])
2019       st_precompile_shader_variant(st, prog);
2020 }