src/mesa/state_tracker/st_program.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   *   Brian Paul
  31   */
  32
  33
  34 #include "main/errors.h"
  35 #include "util/imports.h"
  36 #include "main/hash.h"
  37 #include "main/mtypes.h"
  38 #include "program/prog_parameter.h"
  39 #include "program/prog_print.h"
  40 #include "program/prog_to_nir.h"
  41 #include "program/programopt.h"
  42
  43 #include "compiler/nir/nir.h"
  44 #include "compiler/nir/nir_serialize.h"
  45 #include "draw/draw_context.h"
  46
  47 #include "pipe/p_context.h"
  48 #include "pipe/p_defines.h"
  49 #include "pipe/p_shader_tokens.h"
  50 #include "draw/draw_context.h"
  51 #include "tgsi/tgsi_dump.h"
  52 #include "tgsi/tgsi_emulate.h"
  53 #include "tgsi/tgsi_parse.h"
  54 #include "tgsi/tgsi_ureg.h"
  55
  56 #include "st_debug.h"
  57 #include "st_cb_bitmap.h"
  58 #include "st_cb_drawpixels.h"
  59 #include "st_context.h"
  60 #include "st_tgsi_lower_depth_clamp.h"
  61 #include "st_tgsi_lower_yuv.h"
  62 #include "st_program.h"
  63 #include "st_mesa_to_tgsi.h"
  64 #include "st_atifs_to_tgsi.h"
  65 #include "st_nir.h"
  66 #include "st_shader_cache.h"
  67 #include "st_util.h"
  68 #include "cso_cache/cso_context.h"
  69
  70
  71
  72 static void
  73 set_affected_state_flags(uint64_t *states,
  74                          struct gl_program *prog,
  75                          uint64_t new_constants,
  76                          uint64_t new_sampler_views,
  77                          uint64_t new_samplers,
  78                          uint64_t new_images,
  79                          uint64_t new_ubos,
  80                          uint64_t new_ssbos,
  81                          uint64_t new_atomics)
  82 {
  83    if (prog->Parameters->NumParameters)
  84       *states |= new_constants;
  85
  86    if (prog->info.num_textures)
  87       *states |= new_sampler_views | new_samplers;
  88
  89    if (prog->info.num_images)
  90       *states |= new_images;
  91
  92    if (prog->info.num_ubos)
  93       *states |= new_ubos;
  94
  95    if (prog->info.num_ssbos)
  96       *states |= new_ssbos;
  97
  98    if (prog->info.num_abos)
  99       *states |= new_atomics;
 100 }
 101
 102 /**
 103  * This determines which states will be updated when the shader is bound.
 104  */
 105 void
 106 st_set_prog_affected_state_flags(struct gl_program *prog)
 107 {
 108    uint64_t *states;
 109
 110    switch (prog->info.stage) {
 111    case MESA_SHADER_VERTEX:
 112       states = &((struct st_program*)prog)->affected_states;
 113
 114       *states = ST_NEW_VS_STATE |
 115                 ST_NEW_RASTERIZER |
 116                 ST_NEW_VERTEX_ARRAYS;
 117
 118       set_affected_state_flags(states, prog,
 119                                ST_NEW_VS_CONSTANTS,
 120                                ST_NEW_VS_SAMPLER_VIEWS,
 121                                ST_NEW_VS_SAMPLERS,
 122                                ST_NEW_VS_IMAGES,
 123                                ST_NEW_VS_UBOS,
 124                                ST_NEW_VS_SSBOS,
 125                                ST_NEW_VS_ATOMICS);
 126       break;
 127
 128    case MESA_SHADER_TESS_CTRL:
 129       states = &(st_program(prog))->affected_states;
 130
 131       *states = ST_NEW_TCS_STATE;
 132
 133       set_affected_state_flags(states, prog,
 134                                ST_NEW_TCS_CONSTANTS,
 135                                ST_NEW_TCS_SAMPLER_VIEWS,
 136                                ST_NEW_TCS_SAMPLERS,
 137                                ST_NEW_TCS_IMAGES,
 138                                ST_NEW_TCS_UBOS,
 139                                ST_NEW_TCS_SSBOS,
 140                                ST_NEW_TCS_ATOMICS);
 141       break;
 142
 143    case MESA_SHADER_TESS_EVAL:
 144       states = &(st_program(prog))->affected_states;
 145
 146       *states = ST_NEW_TES_STATE |
 147                 ST_NEW_RASTERIZER;
 148
 149       set_affected_state_flags(states, prog,
 150                                ST_NEW_TES_CONSTANTS,
 151                                ST_NEW_TES_SAMPLER_VIEWS,
 152                                ST_NEW_TES_SAMPLERS,
 153                                ST_NEW_TES_IMAGES,
 154                                ST_NEW_TES_UBOS,
 155                                ST_NEW_TES_SSBOS,
 156                                ST_NEW_TES_ATOMICS);
 157       break;
 158
 159    case MESA_SHADER_GEOMETRY:
 160       states = &(st_program(prog))->affected_states;
 161
 162       *states = ST_NEW_GS_STATE |
 163                 ST_NEW_RASTERIZER;
 164
 165       set_affected_state_flags(states, prog,
 166                                ST_NEW_GS_CONSTANTS,
 167                                ST_NEW_GS_SAMPLER_VIEWS,
 168                                ST_NEW_GS_SAMPLERS,
 169                                ST_NEW_GS_IMAGES,
 170                                ST_NEW_GS_UBOS,
 171                                ST_NEW_GS_SSBOS,
 172                                ST_NEW_GS_ATOMICS);
 173       break;
 174
 175    case MESA_SHADER_FRAGMENT:
 176       states = &((struct st_program*)prog)->affected_states;
 177
 178       /* gl_FragCoord and glDrawPixels always use constants. */
 179       *states = ST_NEW_FS_STATE |
 180                 ST_NEW_SAMPLE_SHADING |
 181                 ST_NEW_FS_CONSTANTS;
 182
 183       set_affected_state_flags(states, prog,
 184                                ST_NEW_FS_CONSTANTS,
 185                                ST_NEW_FS_SAMPLER_VIEWS,
 186                                ST_NEW_FS_SAMPLERS,
 187                                ST_NEW_FS_IMAGES,
 188                                ST_NEW_FS_UBOS,
 189                                ST_NEW_FS_SSBOS,
 190                                ST_NEW_FS_ATOMICS);
 191       break;
 192
 193    case MESA_SHADER_COMPUTE:
 194       states = &((struct st_program*)prog)->affected_states;
 195
 196       *states = ST_NEW_CS_STATE;
 197
 198       set_affected_state_flags(states, prog,
 199                                ST_NEW_CS_CONSTANTS,
 200                                ST_NEW_CS_SAMPLER_VIEWS,
 201                                ST_NEW_CS_SAMPLERS,
 202                                ST_NEW_CS_IMAGES,
 203                                ST_NEW_CS_UBOS,
 204                                ST_NEW_CS_SSBOS,
 205                                ST_NEW_CS_ATOMICS);
 206       break;
 207
 208    default:
 209       unreachable("unhandled shader stage");
 210    }
 211 }
 212
 213
 214 /**
 215  * Delete a shader variant.  Note the caller must unlink the variant from
 216  * the linked list.
 217  */
 218 static void
 219 delete_variant(struct st_context *st, struct st_variant *v, GLenum target)
 220 {
 221    if (v->driver_shader) {
 222       if (target == GL_VERTEX_PROGRAM_ARB &&
 223           ((struct st_common_variant*)v)->key.is_draw_shader) {
 224          /* Draw shader. */
 225          draw_delete_vertex_shader(st->draw, v->driver_shader);
 226       } else if (st->has_shareable_shaders || v->st == st) {
 227          /* The shader's context matches the calling context, or we
 228           * don't care.
 229           */
 230          switch (target) {
 231          case GL_VERTEX_PROGRAM_ARB:
 232             st->pipe->delete_vs_state(st->pipe, v->driver_shader);
 233             break;
 234          case GL_TESS_CONTROL_PROGRAM_NV:
 235             st->pipe->delete_tcs_state(st->pipe, v->driver_shader);
 236             break;
 237          case GL_TESS_EVALUATION_PROGRAM_NV:
 238             st->pipe->delete_tes_state(st->pipe, v->driver_shader);
 239             break;
 240          case GL_GEOMETRY_PROGRAM_NV:
 241             st->pipe->delete_gs_state(st->pipe, v->driver_shader);
 242             break;
 243          case GL_FRAGMENT_PROGRAM_ARB:
 244             st->pipe->delete_fs_state(st->pipe, v->driver_shader);
 245             break;
 246          case GL_COMPUTE_PROGRAM_NV:
 247             st->pipe->delete_compute_state(st->pipe, v->driver_shader);
 248             break;
 249          default:
 250             unreachable("bad shader type in delete_basic_variant");
 251          }
 252       } else {
 253          /* We can't delete a shader with a context different from the one
 254           * that created it.  Add it to the creating context's zombie list.
 255           */
 256          enum pipe_shader_type type =
 257             pipe_shader_type_from_mesa(_mesa_program_enum_to_shader_stage(target));
 258
 259          st_save_zombie_shader(v->st, type, v->driver_shader);
 260       }
 261    }
 262
 263    free(v);
 264 }
 265
 266 static void
 267 st_unbind_program(struct st_context *st, struct st_program *p)
 268 {
 269    /* Unbind the shader in cso_context and re-bind in st/mesa. */
 270    switch (p->Base.info.stage) {
 271    case MESA_SHADER_VERTEX:
 272       cso_set_vertex_shader_handle(st->cso_context, NULL);
 273       st->dirty |= ST_NEW_VS_STATE;
 274       break;
 275    case MESA_SHADER_TESS_CTRL:
 276       cso_set_tessctrl_shader_handle(st->cso_context, NULL);
 277       st->dirty |= ST_NEW_TCS_STATE;
 278       break;
 279    case MESA_SHADER_TESS_EVAL:
 280       cso_set_tesseval_shader_handle(st->cso_context, NULL);
 281       st->dirty |= ST_NEW_TES_STATE;
 282       break;
 283    case MESA_SHADER_GEOMETRY:
 284       cso_set_geometry_shader_handle(st->cso_context, NULL);
 285       st->dirty |= ST_NEW_GS_STATE;
 286       break;
 287    case MESA_SHADER_FRAGMENT:
 288       cso_set_fragment_shader_handle(st->cso_context, NULL);
 289       st->dirty |= ST_NEW_FS_STATE;
 290       break;
 291    case MESA_SHADER_COMPUTE:
 292       cso_set_compute_shader_handle(st->cso_context, NULL);
 293       st->dirty |= ST_NEW_CS_STATE;
 294       break;
 295    default:
 296       unreachable("invalid shader type");
 297    }
 298 }
 299
 300 /**
 301  * Free all basic program variants.
 302  */
 303 void
 304 st_release_variants(struct st_context *st, struct st_program *p)
 305 {
 306    struct st_variant *v;
 307
 308    /* If we are releasing shaders, re-bind them, because we don't
 309     * know which shaders are bound in the driver.
 310     */
 311    if (p->variants)
 312       st_unbind_program(st, p);
 313
 314    for (v = p->variants; v; ) {
 315       struct st_variant *next = v->next;
 316       delete_variant(st, v, p->Base.Target);
 317       v = next;
 318    }
 319
 320    p->variants = NULL;
 321
 322    if (p->state.tokens) {
 323       ureg_free_tokens(p->state.tokens);
 324       p->state.tokens = NULL;
 325    }
 326
 327    /* Note: Any setup of ->ir.nir that has had pipe->create_*_state called on
 328     * it has resulted in the driver taking ownership of the NIR.  Those
 329     * callers should be NULLing out the nir field in any pipe_shader_state
 330     * that might have this called in order to indicate that.
 331     *
 332     * GLSL IR and ARB programs will have set gl_program->nir to the same
 333     * shader as ir->ir.nir, so it will be freed by _mesa_delete_program().
 334     */
 335 }
 336
 337 /**
 338  * Free all basic program variants and unref program.
 339  */
 340 void
 341 st_release_program(struct st_context *st, struct st_program **p)
 342 {
 343    if (!*p)
 344       return;
 345
 346    st_release_variants(st, *p);
 347    st_reference_prog(st, p, NULL);
 348 }
 349
 350 void
 351 st_finalize_nir_before_variants(struct nir_shader *nir)
 352 {
 353    NIR_PASS_V(nir, nir_opt_access);
 354
 355    NIR_PASS_V(nir, nir_split_var_copies);
 356    NIR_PASS_V(nir, nir_lower_var_copies);
 357    if (nir->options->lower_all_io_to_temps ||
 358        nir->options->lower_all_io_to_elements ||
 359        nir->info.stage == MESA_SHADER_VERTEX ||
 360        nir->info.stage == MESA_SHADER_GEOMETRY) {
 361       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
 362    } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
 363       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
 364    }
 365
 366    st_nir_assign_vs_in_locations(nir);
 367 }
 368
 369 /**
 370  * Translate ARB (asm) program to NIR
 371  */
 372 static nir_shader *
 373 st_translate_prog_to_nir(struct st_context *st, struct gl_program *prog,
 374                          gl_shader_stage stage)
 375 {
 376    struct pipe_screen *screen = st->pipe->screen;
 377    const struct gl_shader_compiler_options *options =
 378       &st->ctx->Const.ShaderCompilerOptions[stage];
 379
 380    /* Translate to NIR */
 381    nir_shader *nir = prog_to_nir(prog, options->NirOptions);
 382    NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
 383    nir_validate_shader(nir, "after st/ptn lower_regs_to_ssa");
 384
 385    NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, prog, screen);
 386    NIR_PASS_V(nir, nir_lower_system_values);
 387
 388    /* Optimise NIR */
 389    NIR_PASS_V(nir, nir_opt_constant_folding);
 390    st_nir_opts(nir);
 391    st_finalize_nir_before_variants(nir);
 392
 393    if (st->allow_st_finalize_nir_twice)
 394       st_finalize_nir(st, prog, NULL, nir, true);
 395
 396    nir_validate_shader(nir, "after st/glsl finalize_nir");
 397
 398    return nir;
 399 }
 400
 401 void
 402 st_prepare_vertex_program(struct st_program *stp)
 403 {
 404    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 405
 406    stvp->num_inputs = 0;
 407    memset(stvp->input_to_index, ~0, sizeof(stvp->input_to_index));
 408    memset(stvp->result_to_output, ~0, sizeof(stvp->result_to_output));
 409
 410    /* Determine number of inputs, the mappings between VERT_ATTRIB_x
 411     * and TGSI generic input indexes, plus input attrib semantic info.
 412     */
 413    for (unsigned attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
 414       if ((stp->Base.info.inputs_read & BITFIELD64_BIT(attr)) != 0) {
 415          stvp->input_to_index[attr] = stvp->num_inputs;
 416          stvp->index_to_input[stvp->num_inputs] = attr;
 417          stvp->num_inputs++;
 418
 419          if ((stp->Base.DualSlotInputs & BITFIELD64_BIT(attr)) != 0) {
 420             /* add placeholder for second part of a double attribute */
 421             stvp->index_to_input[stvp->num_inputs] = ST_DOUBLE_ATTRIB_PLACEHOLDER;
 422             stvp->num_inputs++;
 423          }
 424       }
 425    }
 426    /* pre-setup potentially unused edgeflag input */
 427    stvp->input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
 428    stvp->index_to_input[stvp->num_inputs] = VERT_ATTRIB_EDGEFLAG;
 429
 430    /* Compute mapping of vertex program outputs to slots. */
 431    unsigned num_outputs = 0;
 432    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 433       if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr))
 434          stvp->result_to_output[attr] = num_outputs++;
 435    }
 436    /* pre-setup potentially unused edgeflag output */
 437    stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs;
 438 }
 439
 440 void
 441 st_translate_stream_output_info(struct gl_program *prog)
 442 {
 443    struct gl_transform_feedback_info *info = prog->sh.LinkedTransformFeedback;
 444    if (!info)
 445       return;
 446
 447    /* Determine the (default) output register mapping for each output. */
 448    unsigned num_outputs = 0;
 449    ubyte output_mapping[VARYING_SLOT_TESS_MAX];
 450    memset(output_mapping, 0, sizeof(output_mapping));
 451
 452    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 453       if (prog->info.outputs_written & BITFIELD64_BIT(attr))
 454          output_mapping[attr] = num_outputs++;
 455    }
 456
 457    /* Translate stream output info. */
 458    struct pipe_stream_output_info *so_info =
 459       &((struct st_program*)prog)->state.stream_output;
 460
 461    for (unsigned i = 0; i < info->NumOutputs; i++) {
 462       so_info->output[i].register_index =
 463          output_mapping[info->Outputs[i].OutputRegister];
 464       so_info->output[i].start_component = info->Outputs[i].ComponentOffset;
 465       so_info->output[i].num_components = info->Outputs[i].NumComponents;
 466       so_info->output[i].output_buffer = info->Outputs[i].OutputBuffer;
 467       so_info->output[i].dst_offset = info->Outputs[i].DstOffset;
 468       so_info->output[i].stream = info->Outputs[i].StreamId;
 469    }
 470
 471    for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
 472       so_info->stride[i] = info->Buffers[i].Stride;
 473    }
 474    so_info->num_outputs = info->NumOutputs;
 475 }
 476
 477 /**
 478  * Translate a vertex program.
 479  */
 480 bool
 481 st_translate_vertex_program(struct st_context *st,
 482                             struct st_program *stp)
 483 {
 484    struct ureg_program *ureg;
 485    enum pipe_error error;
 486    unsigned num_outputs = 0;
 487    unsigned attr;
 488    ubyte output_semantic_name[VARYING_SLOT_MAX] = {0};
 489    ubyte output_semantic_index[VARYING_SLOT_MAX] = {0};
 490
 491    if (stp->Base.arb.IsPositionInvariant)
 492       _mesa_insert_mvp_code(st->ctx, &stp->Base);
 493
 494    st_prepare_vertex_program(stp);
 495
 496    /* ARB_vp: */
 497    if (!stp->glsl_to_tgsi) {
 498       _mesa_remove_output_reads(&stp->Base, PROGRAM_OUTPUT);
 499
 500       /* This determines which states will be updated when the assembly
 501        * shader is bound.
 502        */
 503       stp->affected_states = ST_NEW_VS_STATE |
 504                               ST_NEW_RASTERIZER |
 505                               ST_NEW_VERTEX_ARRAYS;
 506
 507       if (stp->Base.Parameters->NumParameters)
 508          stp->affected_states |= ST_NEW_VS_CONSTANTS;
 509
 510       /* Translate to NIR if preferred. */
 511       if (st->pipe->screen->get_shader_param(st->pipe->screen,
 512                                              PIPE_SHADER_VERTEX,
 513                                              PIPE_SHADER_CAP_PREFERRED_IR)) {
 514          assert(!stp->glsl_to_tgsi);
 515
 516          if (stp->Base.nir)
 517             ralloc_free(stp->Base.nir);
 518
 519          if (stp->serialized_nir) {
 520             free(stp->serialized_nir);
 521             stp->serialized_nir = NULL;
 522          }
 523
 524          stp->state.type = PIPE_SHADER_IR_NIR;
 525          stp->Base.nir = st_translate_prog_to_nir(st, &stp->Base,
 526                                                   MESA_SHADER_VERTEX);
 527          /* For st_draw_feedback, we need to generate TGSI too if draw doesn't
 528           * use LLVM.
 529           */
 530          if (draw_has_llvm())
 531             return true;
 532       }
 533    }
 534
 535    /* Get semantic names and indices. */
 536    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 537       if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr)) {
 538          unsigned slot = num_outputs++;
 539          unsigned semantic_name, semantic_index;
 540          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
 541                                       &semantic_name, &semantic_index);
 542          output_semantic_name[slot] = semantic_name;
 543          output_semantic_index[slot] = semantic_index;
 544       }
 545    }
 546    /* pre-setup potentially unused edgeflag output */
 547    output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
 548    output_semantic_index[num_outputs] = 0;
 549
 550    ureg = ureg_create_with_screen(PIPE_SHADER_VERTEX, st->pipe->screen);
 551    if (ureg == NULL)
 552       return false;
 553
 554    if (stp->Base.info.clip_distance_array_size)
 555       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
 556                     stp->Base.info.clip_distance_array_size);
 557    if (stp->Base.info.cull_distance_array_size)
 558       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
 559                     stp->Base.info.cull_distance_array_size);
 560
 561    if (ST_DEBUG & DEBUG_MESA) {
 562       _mesa_print_program(&stp->Base);
 563       _mesa_print_program_parameters(st->ctx, &stp->Base);
 564       debug_printf("\n");
 565    }
 566
 567    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 568
 569    if (stp->glsl_to_tgsi) {
 570       error = st_translate_program(st->ctx,
 571                                    PIPE_SHADER_VERTEX,
 572                                    ureg,
 573                                    stp->glsl_to_tgsi,
 574                                    &stp->Base,
 575                                    /* inputs */
 576                                    stvp->num_inputs,
 577                                    stvp->input_to_index,
 578                                    NULL, /* inputSlotToAttr */
 579                                    NULL, /* input semantic name */
 580                                    NULL, /* input semantic index */
 581                                    NULL, /* interp mode */
 582                                    /* outputs */
 583                                    num_outputs,
 584                                    stvp->result_to_output,
 585                                    output_semantic_name,
 586                                    output_semantic_index);
 587
 588       st_translate_stream_output_info(&stp->Base);
 589
 590       free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
 591    } else
 592       error = st_translate_mesa_program(st->ctx,
 593                                         PIPE_SHADER_VERTEX,
 594                                         ureg,
 595                                         &stp->Base,
 596                                         /* inputs */
 597                                         stvp->num_inputs,
 598                                         stvp->input_to_index,
 599                                         NULL, /* input semantic name */
 600                                         NULL, /* input semantic index */
 601                                         NULL,
 602                                         /* outputs */
 603                                         num_outputs,
 604                                         stvp->result_to_output,
 605                                         output_semantic_name,
 606                                         output_semantic_index);
 607
 608    if (error) {
 609       debug_printf("%s: failed to translate Mesa program:\n", __func__);
 610       _mesa_print_program(&stp->Base);
 611       debug_assert(0);
 612       return false;
 613    }
 614
 615    stp->state.tokens = ureg_get_tokens(ureg, NULL);
 616    ureg_destroy(ureg);
 617
 618    if (stp->glsl_to_tgsi) {
 619       stp->glsl_to_tgsi = NULL;
 620       st_store_ir_in_disk_cache(st, &stp->Base, false);
 621    }
 622
 623    return stp->state.tokens != NULL;
 624 }
 625
 626 static struct nir_shader *
 627 get_nir_shader(struct st_context *st, struct st_program *stp)
 628 {
 629    if (stp->Base.nir) {
 630       nir_shader *nir = stp->Base.nir;
 631
 632       /* The first shader variant takes ownership of NIR, so that there is
 633        * no cloning. Additional shader variants are always generated from
 634        * serialized NIR to save memory.
 635        */
 636       stp->Base.nir = NULL;
 637       assert(stp->serialized_nir && stp->serialized_nir_size);
 638       return nir;
 639    }
 640
 641    struct blob_reader blob_reader;
 642    const struct nir_shader_compiler_options *options =
 643       st->ctx->Const.ShaderCompilerOptions[stp->Base.info.stage].NirOptions;
 644
 645    blob_reader_init(&blob_reader, stp->serialized_nir, stp->serialized_nir_size);
 646    return nir_deserialize(NULL, options, &blob_reader);
 647 }
 648
 649 static const gl_state_index16 depth_range_state[STATE_LENGTH] =
 650    { STATE_DEPTH_RANGE };
 651
 652 static struct st_common_variant *
 653 st_create_vp_variant(struct st_context *st,
 654                      struct st_program *stvp,
 655                      const struct st_common_variant_key *key)
 656 {
 657    struct st_common_variant *vpv = CALLOC_STRUCT(st_common_variant);
 658    struct pipe_context *pipe = st->pipe;
 659    struct pipe_screen *screen = pipe->screen;
 660    struct pipe_shader_state state = {0};
 661
 662    static const gl_state_index16 point_size_state[STATE_LENGTH] =
 663       { STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED, 0 };
 664    struct gl_program_parameter_list *params = stvp->Base.Parameters;
 665
 666    vpv->key = *key;
 667
 668    state.stream_output = stvp->state.stream_output;
 669
 670    if (stvp->state.type == PIPE_SHADER_IR_NIR &&
 671        (!key->is_draw_shader || draw_has_llvm())) {
 672       bool finalize = false;
 673
 674       state.type = PIPE_SHADER_IR_NIR;
 675       state.ir.nir = get_nir_shader(st, stvp);
 676       if (key->clamp_color) {
 677          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
 678          finalize = true;
 679       }
 680       if (key->passthrough_edgeflags) {
 681          NIR_PASS_V(state.ir.nir, nir_lower_passthrough_edgeflags);
 682          finalize = true;
 683       }
 684
 685       if (key->lower_point_size) {
 686          _mesa_add_state_reference(params, point_size_state);
 687          NIR_PASS_V(state.ir.nir, nir_lower_point_size_mov,
 688                     point_size_state);
 689          finalize = true;
 690       }
 691
 692       if (key->lower_ucp) {
 693          bool can_compact = screen->get_param(screen,
 694                                               PIPE_CAP_NIR_COMPACT_ARRAYS);
 695
 696          bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;
 697          gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH];
 698          for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
 699             if (use_eye) {
 700                clipplane_state[i][0] = STATE_CLIPPLANE;
 701                clipplane_state[i][1] = i;
 702             } else {
 703                clipplane_state[i][0] = STATE_INTERNAL;
 704                clipplane_state[i][1] = STATE_CLIP_INTERNAL;
 705                clipplane_state[i][2] = i;
 706             }
 707             _mesa_add_state_reference(params, clipplane_state[i]);
 708          }
 709
 710          NIR_PASS_V(state.ir.nir, nir_lower_clip_vs, key->lower_ucp,
 711                     true, can_compact, clipplane_state);
 712          NIR_PASS_V(state.ir.nir, nir_lower_io_to_temporaries,
 713                     nir_shader_get_entrypoint(state.ir.nir), true, false);
 714          NIR_PASS_V(state.ir.nir, nir_lower_global_vars_to_local);
 715          finalize = true;
 716       }
 717
 718       if (finalize || !st->allow_st_finalize_nir_twice) {
 719          st_finalize_nir(st, &stvp->Base, stvp->shader_program, state.ir.nir,
 720                          true);
 721
 722          /* Some of the lowering above may have introduced new varyings */
 723          nir_shader_gather_info(state.ir.nir,
 724                                 nir_shader_get_entrypoint(state.ir.nir));
 725       }
 726
 727       if (ST_DEBUG & DEBUG_PRINT_IR)
 728          nir_print_shader(state.ir.nir, stderr);
 729
 730       if (key->is_draw_shader)
 731          vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
 732       else
 733          vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
 734
 735       return vpv;
 736    }
 737
 738    state.type = PIPE_SHADER_IR_TGSI;
 739    state.tokens = tgsi_dup_tokens(stvp->state.tokens);
 740
 741    /* Emulate features. */
 742    if (key->clamp_color || key->passthrough_edgeflags) {
 743       const struct tgsi_token *tokens;
 744       unsigned flags =
 745          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
 746          (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
 747
 748       tokens = tgsi_emulate(state.tokens, flags);
 749
 750       if (tokens) {
 751          tgsi_free_tokens(state.tokens);
 752          state.tokens = tokens;
 753       } else {
 754          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
 755       }
 756    }
 757
 758    if (key->lower_depth_clamp) {
 759       unsigned depth_range_const =
 760             _mesa_add_state_reference(params, depth_range_state);
 761
 762       const struct tgsi_token *tokens;
 763       tokens = st_tgsi_lower_depth_clamp(state.tokens, depth_range_const,
 764                                          key->clip_negative_one_to_one);
 765       if (tokens != state.tokens)
 766          tgsi_free_tokens(state.tokens);
 767       state.tokens = tokens;
 768    }
 769
 770    if (ST_DEBUG & DEBUG_PRINT_IR)
 771       tgsi_dump(state.tokens, 0);
 772
 773    if (key->is_draw_shader)
 774       vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
 775    else
 776       vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
 777
 778    if (state.tokens) {
 779       tgsi_free_tokens(state.tokens);
 780    }
 781
 782    return vpv;
 783 }
 784
 785
 786 /**
 787  * Find/create a vertex program variant.
 788  */
 789 struct st_common_variant *
 790 st_get_vp_variant(struct st_context *st,
 791                   struct st_program *stp,
 792                   const struct st_common_variant_key *key)
 793 {
 794    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 795    struct st_common_variant *vpv;
 796
 797    /* Search for existing variant */
 798    for (vpv = st_common_variant(stp->variants); vpv;
 799         vpv = st_common_variant(vpv->base.next)) {
 800       if (memcmp(&vpv->key, key, sizeof(*key)) == 0) {
 801          break;
 802       }
 803    }
 804
 805    if (!vpv) {
 806       /* create now */
 807       vpv = st_create_vp_variant(st, stp, key);
 808       if (vpv) {
 809          vpv->base.st = key->st;
 810
 811          unsigned num_inputs = stvp->num_inputs + key->passthrough_edgeflags;
 812          for (unsigned index = 0; index < num_inputs; ++index) {
 813             unsigned attr = stvp->index_to_input[index];
 814             if (attr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
 815                continue;
 816             vpv->vert_attrib_mask |= 1u << attr;
 817          }
 818
 819          /* insert into list */
 820          vpv->base.next = stp->variants;
 821          stp->variants = &vpv->base;
 822       }
 823    }
 824
 825    return vpv;
 826 }
 827
 828
 829 /**
 830  * Translate a Mesa fragment shader into a TGSI shader.
 831  */
 832 bool
 833 st_translate_fragment_program(struct st_context *st,
 834                               struct st_program *stfp)
 835 {
 836    /* Non-GLSL programs: */
 837    if (!stfp->glsl_to_tgsi) {
 838       _mesa_remove_output_reads(&stfp->Base, PROGRAM_OUTPUT);
 839       if (st->ctx->Const.GLSLFragCoordIsSysVal)
 840          _mesa_program_fragment_position_to_sysval(&stfp->Base);
 841
 842       /* This determines which states will be updated when the assembly
 843        * shader is bound.
 844        *
 845        * fragment.position and glDrawPixels always use constants.
 846        */
 847       stfp->affected_states = ST_NEW_FS_STATE |
 848                               ST_NEW_SAMPLE_SHADING |
 849                               ST_NEW_FS_CONSTANTS;
 850
 851       if (stfp->ati_fs) {
 852          /* Just set them for ATI_fs unconditionally. */
 853          stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 854                                   ST_NEW_FS_SAMPLERS;
 855       } else {
 856          /* ARB_fp */
 857          if (stfp->Base.SamplersUsed)
 858             stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 859                                      ST_NEW_FS_SAMPLERS;
 860       }
 861
 862       /* Translate to NIR. */
 863       if (!stfp->ati_fs &&
 864           st->pipe->screen->get_shader_param(st->pipe->screen,
 865                                              PIPE_SHADER_FRAGMENT,
 866                                              PIPE_SHADER_CAP_PREFERRED_IR)) {
 867          nir_shader *nir =
 868             st_translate_prog_to_nir(st, &stfp->Base, MESA_SHADER_FRAGMENT);
 869
 870          if (stfp->Base.nir)
 871             ralloc_free(stfp->Base.nir);
 872          if (stfp->serialized_nir) {
 873             free(stfp->serialized_nir);
 874             stfp->serialized_nir = NULL;
 875          }
 876          stfp->state.type = PIPE_SHADER_IR_NIR;
 877          stfp->Base.nir = nir;
 878          return true;
 879       }
 880    }
 881
 882    ubyte outputMapping[2 * FRAG_RESULT_MAX];
 883    ubyte inputMapping[VARYING_SLOT_MAX];
 884    ubyte inputSlotToAttr[VARYING_SLOT_MAX];
 885    ubyte interpMode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
 886    GLuint attr;
 887    GLbitfield64 inputsRead;
 888    struct ureg_program *ureg;
 889
 890    GLboolean write_all = GL_FALSE;
 891
 892    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
 893    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
 894    uint fs_num_inputs = 0;
 895
 896    ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
 897    ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
 898    uint fs_num_outputs = 0;
 899
 900    memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
 901
 902    /*
 903     * Convert Mesa program inputs to TGSI input register semantics.
 904     */
 905    inputsRead = stfp->Base.info.inputs_read;
 906    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 907       if ((inputsRead & BITFIELD64_BIT(attr)) != 0) {
 908          const GLuint slot = fs_num_inputs++;
 909
 910          inputMapping[attr] = slot;
 911          inputSlotToAttr[slot] = attr;
 912
 913          switch (attr) {
 914          case VARYING_SLOT_POS:
 915             input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
 916             input_semantic_index[slot] = 0;
 917             interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 918             break;
 919          case VARYING_SLOT_COL0:
 920             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 921             input_semantic_index[slot] = 0;
 922             interpMode[slot] = stfp->glsl_to_tgsi ?
 923                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 924             break;
 925          case VARYING_SLOT_COL1:
 926             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 927             input_semantic_index[slot] = 1;
 928             interpMode[slot] = stfp->glsl_to_tgsi ?
 929                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 930             break;
 931          case VARYING_SLOT_FOGC:
 932             input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
 933             input_semantic_index[slot] = 0;
 934             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 935             break;
 936          case VARYING_SLOT_FACE:
 937             input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
 938             input_semantic_index[slot] = 0;
 939             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 940             break;
 941          case VARYING_SLOT_PRIMITIVE_ID:
 942             input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
 943             input_semantic_index[slot] = 0;
 944             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 945             break;
 946          case VARYING_SLOT_LAYER:
 947             input_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
 948             input_semantic_index[slot] = 0;
 949             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 950             break;
 951          case VARYING_SLOT_VIEWPORT:
 952             input_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
 953             input_semantic_index[slot] = 0;
 954             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 955             break;
 956          case VARYING_SLOT_CLIP_DIST0:
 957             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
 958             input_semantic_index[slot] = 0;
 959             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 960             break;
 961          case VARYING_SLOT_CLIP_DIST1:
 962             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
 963             input_semantic_index[slot] = 1;
 964             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 965             break;
 966          case VARYING_SLOT_CULL_DIST0:
 967          case VARYING_SLOT_CULL_DIST1:
 968             /* these should have been lowered by GLSL */
 969             assert(0);
 970             break;
 971             /* In most cases, there is nothing special about these
 972              * inputs, so adopt a convention to use the generic
 973              * semantic name and the mesa VARYING_SLOT_ number as the
 974              * index.
 975              *
 976              * All that is required is that the vertex shader labels
 977              * its own outputs similarly, and that the vertex shader
 978              * generates at least every output required by the
 979              * fragment shader plus fixed-function hardware (such as
 980              * BFC).
 981              *
 982              * However, some drivers may need us to identify the PNTC and TEXi
 983              * varyings if, for example, their capability to replace them with
 984              * sprite coordinates is limited.
 985              */
 986          case VARYING_SLOT_PNTC:
 987             if (st->needs_texcoord_semantic) {
 988                input_semantic_name[slot] = TGSI_SEMANTIC_PCOORD;
 989                input_semantic_index[slot] = 0;
 990                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 991                break;
 992             }
 993             /* fall through */
 994          case VARYING_SLOT_TEX0:
 995          case VARYING_SLOT_TEX1:
 996          case VARYING_SLOT_TEX2:
 997          case VARYING_SLOT_TEX3:
 998          case VARYING_SLOT_TEX4:
 999          case VARYING_SLOT_TEX5:
1000          case VARYING_SLOT_TEX6:
1001          case VARYING_SLOT_TEX7:
1002             if (st->needs_texcoord_semantic) {
1003                input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
1004                input_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
1005                interpMode[slot] = stfp->glsl_to_tgsi ?
1006                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
1007                break;
1008             }
1009             /* fall through */
1010          case VARYING_SLOT_VAR0:
1011          default:
1012             /* Semantic indices should be zero-based because drivers may choose
1013              * to assign a fixed slot determined by that index.
1014              * This is useful because ARB_separate_shader_objects uses location
1015              * qualifiers for linkage, and if the semantic index corresponds to
1016              * these locations, linkage passes in the driver become unecessary.
1017              *
1018              * If needs_texcoord_semantic is true, no semantic indices will be
1019              * consumed for the TEXi varyings, and we can base the locations of
1020              * the user varyings on VAR0.  Otherwise, we use TEX0 as base index.
1021              */
1022             assert(attr >= VARYING_SLOT_VAR0 || attr == VARYING_SLOT_PNTC ||
1023                    (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
1024             input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
1025             input_semantic_index[slot] = st_get_generic_varying_index(st, attr);
1026             if (attr == VARYING_SLOT_PNTC)
1027                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
1028             else {
1029                interpMode[slot] = stfp->glsl_to_tgsi ?
1030                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
1031             }
1032             break;
1033          }
1034       }
1035       else {
1036          inputMapping[attr] = -1;
1037       }
1038    }
1039
1040    /*
1041     * Semantics and mapping for outputs
1042     */
1043    GLbitfield64 outputsWritten = stfp->Base.info.outputs_written;
1044
1045    /* if z is written, emit that first */
1046    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
1047       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
1048       fs_output_semantic_index[fs_num_outputs] = 0;
1049       outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
1050       fs_num_outputs++;
1051       outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
1052    }
1053
1054    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
1055       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
1056       fs_output_semantic_index[fs_num_outputs] = 0;
1057       outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
1058       fs_num_outputs++;
1059       outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
1060    }
1061
1062    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
1063       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_SAMPLEMASK;
1064       fs_output_semantic_index[fs_num_outputs] = 0;
1065       outputMapping[FRAG_RESULT_SAMPLE_MASK] = fs_num_outputs;
1066       fs_num_outputs++;
1067       outputsWritten &= ~(1 << FRAG_RESULT_SAMPLE_MASK);
1068    }
1069
1070    /* handle remaining outputs (color) */
1071    for (attr = 0; attr < ARRAY_SIZE(outputMapping); attr++) {
1072       const GLbitfield64 written = attr < FRAG_RESULT_MAX ? outputsWritten :
1073          stfp->Base.SecondaryOutputsWritten;
1074       const unsigned loc = attr % FRAG_RESULT_MAX;
1075
1076       if (written & BITFIELD64_BIT(loc)) {
1077          switch (loc) {
1078          case FRAG_RESULT_DEPTH:
1079          case FRAG_RESULT_STENCIL:
1080          case FRAG_RESULT_SAMPLE_MASK:
1081             /* handled above */
1082             assert(0);
1083             break;
1084          case FRAG_RESULT_COLOR:
1085             write_all = GL_TRUE; /* fallthrough */
1086          default: {
1087             int index;
1088             assert(loc == FRAG_RESULT_COLOR ||
1089                    (FRAG_RESULT_DATA0 <= loc && loc < FRAG_RESULT_MAX));
1090
1091             index = (loc == FRAG_RESULT_COLOR) ? 0 : (loc - FRAG_RESULT_DATA0);
1092
1093             if (attr >= FRAG_RESULT_MAX) {
1094                /* Secondary color for dual source blending. */
1095                assert(index == 0);
1096                index++;
1097             }
1098
1099             fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
1100             fs_output_semantic_index[fs_num_outputs] = index;
1101             outputMapping[attr] = fs_num_outputs;
1102             break;
1103          }
1104          }
1105
1106          fs_num_outputs++;
1107       }
1108    }
1109
1110    ureg = ureg_create_with_screen(PIPE_SHADER_FRAGMENT, st->pipe->screen);
1111    if (ureg == NULL)
1112       return false;
1113
1114    if (ST_DEBUG & DEBUG_MESA) {
1115       _mesa_print_program(&stfp->Base);
1116       _mesa_print_program_parameters(st->ctx, &stfp->Base);
1117       debug_printf("\n");
1118    }
1119    if (write_all == GL_TRUE)
1120       ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1);
1121
1122    if (stfp->Base.info.fs.depth_layout != FRAG_DEPTH_LAYOUT_NONE) {
1123       switch (stfp->Base.info.fs.depth_layout) {
1124       case FRAG_DEPTH_LAYOUT_ANY:
1125          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1126                        TGSI_FS_DEPTH_LAYOUT_ANY);
1127          break;
1128       case FRAG_DEPTH_LAYOUT_GREATER:
1129          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1130                        TGSI_FS_DEPTH_LAYOUT_GREATER);
1131          break;
1132       case FRAG_DEPTH_LAYOUT_LESS:
1133          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1134                        TGSI_FS_DEPTH_LAYOUT_LESS);
1135          break;
1136       case FRAG_DEPTH_LAYOUT_UNCHANGED:
1137          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1138                        TGSI_FS_DEPTH_LAYOUT_UNCHANGED);
1139          break;
1140       default:
1141          assert(0);
1142       }
1143    }
1144
1145    if (stfp->glsl_to_tgsi) {
1146       st_translate_program(st->ctx,
1147                            PIPE_SHADER_FRAGMENT,
1148                            ureg,
1149                            stfp->glsl_to_tgsi,
1150                            &stfp->Base,
1151                            /* inputs */
1152                            fs_num_inputs,
1153                            inputMapping,
1154                            inputSlotToAttr,
1155                            input_semantic_name,
1156                            input_semantic_index,
1157                            interpMode,
1158                            /* outputs */
1159                            fs_num_outputs,
1160                            outputMapping,
1161                            fs_output_semantic_name,
1162                            fs_output_semantic_index);
1163
1164       free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
1165    } else if (stfp->ati_fs)
1166       st_translate_atifs_program(ureg,
1167                                  stfp->ati_fs,
1168                                  &stfp->Base,
1169                                  /* inputs */
1170                                  fs_num_inputs,
1171                                  inputMapping,
1172                                  input_semantic_name,
1173                                  input_semantic_index,
1174                                  interpMode,
1175                                  /* outputs */
1176                                  fs_num_outputs,
1177                                  outputMapping,
1178                                  fs_output_semantic_name,
1179                                  fs_output_semantic_index);
1180    else
1181       st_translate_mesa_program(st->ctx,
1182                                 PIPE_SHADER_FRAGMENT,
1183                                 ureg,
1184                                 &stfp->Base,
1185                                 /* inputs */
1186                                 fs_num_inputs,
1187                                 inputMapping,
1188                                 input_semantic_name,
1189                                 input_semantic_index,
1190                                 interpMode,
1191                                 /* outputs */
1192                                 fs_num_outputs,
1193                                 outputMapping,
1194                                 fs_output_semantic_name,
1195                                 fs_output_semantic_index);
1196
1197    stfp->state.tokens = ureg_get_tokens(ureg, NULL);
1198    ureg_destroy(ureg);
1199
1200    if (stfp->glsl_to_tgsi) {
1201       stfp->glsl_to_tgsi = NULL;
1202       st_store_ir_in_disk_cache(st, &stfp->Base, false);
1203    }
1204
1205    return stfp->state.tokens != NULL;
1206 }
1207
1208 static struct st_fp_variant *
1209 st_create_fp_variant(struct st_context *st,
1210                      struct st_program *stfp,
1211                      const struct st_fp_variant_key *key)
1212 {
1213    struct pipe_context *pipe = st->pipe;
1214    struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
1215    struct pipe_shader_state state = {0};
1216    struct gl_program_parameter_list *params = stfp->Base.Parameters;
1217    static const gl_state_index16 texcoord_state[STATE_LENGTH] =
1218       { STATE_INTERNAL, STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
1219    static const gl_state_index16 scale_state[STATE_LENGTH] =
1220       { STATE_INTERNAL, STATE_PT_SCALE };
1221    static const gl_state_index16 bias_state[STATE_LENGTH] =
1222       { STATE_INTERNAL, STATE_PT_BIAS };
1223    static const gl_state_index16 alpha_ref_state[STATE_LENGTH] =
1224       { STATE_INTERNAL, STATE_ALPHA_REF };
1225
1226    if (!variant)
1227       return NULL;
1228
1229    if (stfp->state.type == PIPE_SHADER_IR_NIR) {
1230       bool finalize = false;
1231
1232       state.type = PIPE_SHADER_IR_NIR;
1233       state.ir.nir = get_nir_shader(st, stfp);
1234
1235       if (key->clamp_color) {
1236          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1237          finalize = true;
1238       }
1239
1240       if (key->lower_flatshade) {
1241          NIR_PASS_V(state.ir.nir, nir_lower_flatshade);
1242          finalize = true;
1243       }
1244
1245       if (key->lower_alpha_func != COMPARE_FUNC_NEVER) {
1246          _mesa_add_state_reference(params, alpha_ref_state);
1247          NIR_PASS_V(state.ir.nir, nir_lower_alpha_test, key->lower_alpha_func,
1248                     false, alpha_ref_state);
1249          finalize = true;
1250       }
1251
1252       if (key->lower_two_sided_color) {
1253          NIR_PASS_V(state.ir.nir, nir_lower_two_sided_color);
1254          finalize = true;
1255       }
1256
1257       if (key->persample_shading) {
1258           nir_shader *shader = state.ir.nir;
1259           nir_foreach_variable(var, &shader->inputs)
1260              var->data.sample = true;
1261           finalize = true;
1262       }
1263
1264       assert(!(key->bitmap && key->drawpixels));
1265
1266       /* glBitmap */
1267       if (key->bitmap) {
1268          nir_lower_bitmap_options options = {0};
1269
1270          variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1271          options.sampler = variant->bitmap_sampler;
1272          options.swizzle_xxxx = st->bitmap.tex_format == PIPE_FORMAT_R8_UNORM;
1273
1274          NIR_PASS_V(state.ir.nir, nir_lower_bitmap, &options);
1275          finalize = true;
1276       }
1277
1278       /* glDrawPixels (color only) */
1279       if (key->drawpixels) {
1280          nir_lower_drawpixels_options options = {{0}};
1281          unsigned samplers_used = stfp->Base.SamplersUsed;
1282
1283          /* Find the first unused slot. */
1284          variant->drawpix_sampler = ffs(~samplers_used) - 1;
1285          options.drawpix_sampler = variant->drawpix_sampler;
1286          samplers_used |= (1 << variant->drawpix_sampler);
1287
1288          options.pixel_maps = key->pixelMaps;
1289          if (key->pixelMaps) {
1290             variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1291             options.pixelmap_sampler = variant->pixelmap_sampler;
1292          }
1293
1294          options.scale_and_bias = key->scaleAndBias;
1295          if (key->scaleAndBias) {
1296             _mesa_add_state_reference(params, scale_state);
1297             memcpy(options.scale_state_tokens, scale_state,
1298                    sizeof(options.scale_state_tokens));
1299             _mesa_add_state_reference(params, bias_state);
1300             memcpy(options.bias_state_tokens, bias_state,
1301                    sizeof(options.bias_state_tokens));
1302          }
1303
1304          _mesa_add_state_reference(params, texcoord_state);
1305          memcpy(options.texcoord_state_tokens, texcoord_state,
1306                 sizeof(options.texcoord_state_tokens));
1307
1308          NIR_PASS_V(state.ir.nir, nir_lower_drawpixels, &options);
1309          finalize = true;
1310       }
1311
1312       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1313                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1314                    key->external.lower_ayuv || key->external.lower_xyuv)) {
1315
1316          st_nir_lower_samplers(pipe->screen, state.ir.nir,
1317                                stfp->shader_program, &stfp->Base);
1318
1319          nir_lower_tex_options options = {0};
1320          options.lower_y_uv_external = key->external.lower_nv12;
1321          options.lower_y_u_v_external = key->external.lower_iyuv;
1322          options.lower_xy_uxvx_external = key->external.lower_xy_uxvx;
1323          options.lower_yx_xuxv_external = key->external.lower_yx_xuxv;
1324          options.lower_ayuv_external = key->external.lower_ayuv;
1325          options.lower_xyuv_external = key->external.lower_xyuv;
1326          NIR_PASS_V(state.ir.nir, nir_lower_tex, &options);
1327          finalize = true;
1328       }
1329
1330       if (finalize || !st->allow_st_finalize_nir_twice) {
1331          st_finalize_nir(st, &stfp->Base, stfp->shader_program, state.ir.nir,
1332                          false);
1333       }
1334
1335       /* This pass needs to happen *after* nir_lower_sampler */
1336       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1337                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1338                    key->external.lower_ayuv || key->external.lower_xyuv)) {
1339          NIR_PASS_V(state.ir.nir, st_nir_lower_tex_src_plane,
1340                     ~stfp->Base.SamplersUsed,
1341                     key->external.lower_nv12 || key->external.lower_xy_uxvx ||
1342                        key->external.lower_yx_xuxv,
1343                     key->external.lower_iyuv);
1344          finalize = true;
1345       }
1346
1347       if (finalize || !st->allow_st_finalize_nir_twice) {
1348          /* Some of the lowering above may have introduced new varyings */
1349          nir_shader_gather_info(state.ir.nir,
1350                                 nir_shader_get_entrypoint(state.ir.nir));
1351
1352          struct pipe_screen *screen = pipe->screen;
1353          if (screen->finalize_nir)
1354             screen->finalize_nir(screen, state.ir.nir, false);
1355       }
1356
1357       if (ST_DEBUG & DEBUG_PRINT_IR)
1358          nir_print_shader(state.ir.nir, stderr);
1359
1360       variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1361       variant->key = *key;
1362
1363       return variant;
1364    }
1365
1366    state.tokens = stfp->state.tokens;
1367
1368    assert(!(key->bitmap && key->drawpixels));
1369
1370    /* Fix texture targets and add fog for ATI_fs */
1371    if (stfp->ati_fs) {
1372       const struct tgsi_token *tokens = st_fixup_atifs(state.tokens, key);
1373
1374       if (tokens)
1375          state.tokens = tokens;
1376       else
1377          fprintf(stderr, "mesa: cannot post-process ATI_fs\n");
1378    }
1379
1380    /* Emulate features. */
1381    if (key->clamp_color || key->persample_shading) {
1382       const struct tgsi_token *tokens;
1383       unsigned flags =
1384          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
1385          (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
1386
1387       tokens = tgsi_emulate(state.tokens, flags);
1388
1389       if (tokens) {
1390          if (state.tokens != stfp->state.tokens)
1391             tgsi_free_tokens(state.tokens);
1392          state.tokens = tokens;
1393       } else
1394          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
1395    }
1396
1397    /* glBitmap */
1398    if (key->bitmap) {
1399       const struct tgsi_token *tokens;
1400
1401       variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1402
1403       tokens = st_get_bitmap_shader(state.tokens,
1404                                     st->internal_target,
1405                                     variant->bitmap_sampler,
1406                                     st->needs_texcoord_semantic,
1407                                     st->bitmap.tex_format ==
1408                                     PIPE_FORMAT_R8_UNORM);
1409
1410       if (tokens) {
1411          if (state.tokens != stfp->state.tokens)
1412             tgsi_free_tokens(state.tokens);
1413          state.tokens = tokens;
1414       } else
1415          fprintf(stderr, "mesa: cannot create a shader for glBitmap\n");
1416    }
1417
1418    /* glDrawPixels (color only) */
1419    if (key->drawpixels) {
1420       const struct tgsi_token *tokens;
1421       unsigned scale_const = 0, bias_const = 0, texcoord_const = 0;
1422
1423       /* Find the first unused slot. */
1424       variant->drawpix_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1425
1426       if (key->pixelMaps) {
1427          unsigned samplers_used = stfp->Base.SamplersUsed |
1428                                   (1 << variant->drawpix_sampler);
1429
1430          variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1431       }
1432
1433       if (key->scaleAndBias) {
1434          scale_const = _mesa_add_state_reference(params, scale_state);
1435          bias_const = _mesa_add_state_reference(params, bias_state);
1436       }
1437
1438       texcoord_const = _mesa_add_state_reference(params, texcoord_state);
1439
1440       tokens = st_get_drawpix_shader(state.tokens,
1441                                      st->needs_texcoord_semantic,
1442                                      key->scaleAndBias, scale_const,
1443                                      bias_const, key->pixelMaps,
1444                                      variant->drawpix_sampler,
1445                                      variant->pixelmap_sampler,
1446                                      texcoord_const, st->internal_target);
1447
1448       if (tokens) {
1449          if (state.tokens != stfp->state.tokens)
1450             tgsi_free_tokens(state.tokens);
1451          state.tokens = tokens;
1452       } else
1453          fprintf(stderr, "mesa: cannot create a shader for glDrawPixels\n");
1454    }
1455
1456    if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1457                 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1458       const struct tgsi_token *tokens;
1459
1460       /* samplers inserted would conflict, but this should be unpossible: */
1461       assert(!(key->bitmap || key->drawpixels));
1462
1463       tokens = st_tgsi_lower_yuv(state.tokens,
1464                                  ~stfp->Base.SamplersUsed,
1465                                  key->external.lower_nv12 ||
1466                                     key->external.lower_xy_uxvx ||
1467                                     key->external.lower_yx_xuxv,
1468                                  key->external.lower_iyuv);
1469       if (tokens) {
1470          if (state.tokens != stfp->state.tokens)
1471             tgsi_free_tokens(state.tokens);
1472          state.tokens = tokens;
1473       } else {
1474          fprintf(stderr, "mesa: cannot create a shader for samplerExternalOES\n");
1475       }
1476    }
1477
1478    if (key->lower_depth_clamp) {
1479       unsigned depth_range_const = _mesa_add_state_reference(params, depth_range_state);
1480
1481       const struct tgsi_token *tokens;
1482       tokens = st_tgsi_lower_depth_clamp_fs(state.tokens, depth_range_const);
1483       if (state.tokens != stfp->state.tokens)
1484          tgsi_free_tokens(state.tokens);
1485       state.tokens = tokens;
1486    }
1487
1488    if (ST_DEBUG & DEBUG_PRINT_IR)
1489       tgsi_dump(state.tokens, 0);
1490
1491    /* fill in variant */
1492    variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1493    variant->key = *key;
1494
1495    if (state.tokens != stfp->state.tokens)
1496       tgsi_free_tokens(state.tokens);
1497    return variant;
1498 }
1499
1500 /**
1501  * Translate fragment program if needed.
1502  */
1503 struct st_fp_variant *
1504 st_get_fp_variant(struct st_context *st,
1505                   struct st_program *stfp,
1506                   const struct st_fp_variant_key *key)
1507 {
1508    struct st_fp_variant *fpv;
1509
1510    /* Search for existing variant */
1511    for (fpv = st_fp_variant(stfp->variants); fpv;
1512         fpv = st_fp_variant(fpv->base.next)) {
1513       if (memcmp(&fpv->key, key, sizeof(*key)) == 0) {
1514          break;
1515       }
1516    }
1517
1518    if (!fpv) {
1519       /* create new */
1520       fpv = st_create_fp_variant(st, stfp, key);
1521       if (fpv) {
1522          fpv->base.st = key->st;
1523
1524          if (key->bitmap || key->drawpixels) {
1525             /* Regular variants should always come before the
1526              * bitmap & drawpixels variants, (unless there
1527              * are no regular variants) so that
1528              * st_update_fp can take a fast path when
1529              * shader_has_one_variant is set.
1530              */
1531             if (!stfp->variants) {
1532                stfp->variants = &fpv->base;
1533             } else {
1534                /* insert into list after the first one */
1535                fpv->base.next = stfp->variants->next;
1536                stfp->variants->next = &fpv->base;
1537             }
1538          } else {
1539             /* insert into list */
1540             fpv->base.next = stfp->variants;
1541             stfp->variants = &fpv->base;
1542          }
1543       }
1544    }
1545
1546    return fpv;
1547 }
1548
1549 /**
1550  * Translate a program. This is common code for geometry and tessellation
1551  * shaders.
1552  */
1553 bool
1554 st_translate_common_program(struct st_context *st,
1555                             struct st_program *stp)
1556 {
1557    struct gl_program *prog = &stp->Base;
1558    enum pipe_shader_type stage =
1559       pipe_shader_type_from_mesa(stp->Base.info.stage);
1560    struct ureg_program *ureg = ureg_create_with_screen(stage, st->pipe->screen);
1561
1562    if (ureg == NULL)
1563       return false;
1564
1565    switch (stage) {
1566    case PIPE_SHADER_TESS_CTRL:
1567       ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT,
1568                     stp->Base.info.tess.tcs_vertices_out);
1569       break;
1570
1571    case PIPE_SHADER_TESS_EVAL:
1572       if (stp->Base.info.tess.primitive_mode == GL_ISOLINES)
1573          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, GL_LINES);
1574       else
1575          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE,
1576                        stp->Base.info.tess.primitive_mode);
1577
1578       STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL);
1579       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 ==
1580                     PIPE_TESS_SPACING_FRACTIONAL_ODD);
1581       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 ==
1582                     PIPE_TESS_SPACING_FRACTIONAL_EVEN);
1583
1584       ureg_property(ureg, TGSI_PROPERTY_TES_SPACING,
1585                     (stp->Base.info.tess.spacing + 1) % 3);
1586
1587       ureg_property(ureg, TGSI_PROPERTY_TES_VERTEX_ORDER_CW,
1588                     !stp->Base.info.tess.ccw);
1589       ureg_property(ureg, TGSI_PROPERTY_TES_POINT_MODE,
1590                     stp->Base.info.tess.point_mode);
1591       break;
1592
1593    case PIPE_SHADER_GEOMETRY:
1594       ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM,
1595                     stp->Base.info.gs.input_primitive);
1596       ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM,
1597                     stp->Base.info.gs.output_primitive);
1598       ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
1599                     stp->Base.info.gs.vertices_out);
1600       ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS,
1601                     stp->Base.info.gs.invocations);
1602       break;
1603
1604    default:
1605       break;
1606    }
1607
1608    ubyte inputSlotToAttr[VARYING_SLOT_TESS_MAX];
1609    ubyte inputMapping[VARYING_SLOT_TESS_MAX];
1610    ubyte outputMapping[VARYING_SLOT_TESS_MAX];
1611    GLuint attr;
1612
1613    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
1614    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
1615    uint num_inputs = 0;
1616
1617    ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
1618    ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
1619    uint num_outputs = 0;
1620
1621    GLint i;
1622
1623    memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr));
1624    memset(inputMapping, 0, sizeof(inputMapping));
1625    memset(outputMapping, 0, sizeof(outputMapping));
1626    memset(&stp->state, 0, sizeof(stp->state));
1627
1628    if (prog->info.clip_distance_array_size)
1629       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
1630                     prog->info.clip_distance_array_size);
1631    if (prog->info.cull_distance_array_size)
1632       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
1633                     prog->info.cull_distance_array_size);
1634
1635    /*
1636     * Convert Mesa program inputs to TGSI input register semantics.
1637     */
1638    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1639       if ((prog->info.inputs_read & BITFIELD64_BIT(attr)) == 0)
1640          continue;
1641
1642       unsigned slot = num_inputs++;
1643
1644       inputMapping[attr] = slot;
1645       inputSlotToAttr[slot] = attr;
1646
1647       unsigned semantic_name, semantic_index;
1648       tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1649                                    &semantic_name, &semantic_index);
1650       input_semantic_name[slot] = semantic_name;
1651       input_semantic_index[slot] = semantic_index;
1652    }
1653
1654    /* Also add patch inputs. */
1655    for (attr = 0; attr < 32; attr++) {
1656       if (prog->info.patch_inputs_read & (1u << attr)) {
1657          GLuint slot = num_inputs++;
1658          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1659
1660          inputMapping[patch_attr] = slot;
1661          inputSlotToAttr[slot] = patch_attr;
1662          input_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1663          input_semantic_index[slot] = attr;
1664       }
1665    }
1666
1667    /* initialize output semantics to defaults */
1668    for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
1669       output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
1670       output_semantic_index[i] = 0;
1671    }
1672
1673    /*
1674     * Determine number of outputs, the (default) output register
1675     * mapping and the semantic information for each output.
1676     */
1677    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1678       if (prog->info.outputs_written & BITFIELD64_BIT(attr)) {
1679          GLuint slot = num_outputs++;
1680
1681          outputMapping[attr] = slot;
1682
1683          unsigned semantic_name, semantic_index;
1684          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1685                                       &semantic_name, &semantic_index);
1686          output_semantic_name[slot] = semantic_name;
1687          output_semantic_index[slot] = semantic_index;
1688       }
1689    }
1690
1691    /* Also add patch outputs. */
1692    for (attr = 0; attr < 32; attr++) {
1693       if (prog->info.patch_outputs_written & (1u << attr)) {
1694          GLuint slot = num_outputs++;
1695          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1696
1697          outputMapping[patch_attr] = slot;
1698          output_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1699          output_semantic_index[slot] = attr;
1700       }
1701    }
1702
1703    st_translate_program(st->ctx,
1704                         stage,
1705                         ureg,
1706                         stp->glsl_to_tgsi,
1707                         prog,
1708                         /* inputs */
1709                         num_inputs,
1710                         inputMapping,
1711                         inputSlotToAttr,
1712                         input_semantic_name,
1713                         input_semantic_index,
1714                         NULL,
1715                         /* outputs */
1716                         num_outputs,
1717                         outputMapping,
1718                         output_semantic_name,
1719                         output_semantic_index);
1720
1721    stp->state.tokens = ureg_get_tokens(ureg, NULL);
1722
1723    ureg_destroy(ureg);
1724
1725    st_translate_stream_output_info(prog);
1726
1727    st_store_ir_in_disk_cache(st, prog, false);
1728
1729    if (ST_DEBUG & DEBUG_PRINT_IR && ST_DEBUG & DEBUG_MESA)
1730       _mesa_print_program(prog);
1731
1732    free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
1733    stp->glsl_to_tgsi = NULL;
1734    return true;
1735 }
1736
1737
1738 /**
1739  * Get/create a basic program variant.
1740  */
1741 struct st_variant *
1742 st_get_common_variant(struct st_context *st,
1743                       struct st_program *prog,
1744                       const struct st_common_variant_key *key)
1745 {
1746    struct pipe_context *pipe = st->pipe;
1747    struct st_variant *v;
1748    struct pipe_shader_state state = {0};
1749
1750    /* Search for existing variant */
1751    for (v = prog->variants; v; v = v->next) {
1752       if (memcmp(&st_common_variant(v)->key, key, sizeof(*key)) == 0)
1753          break;
1754    }
1755
1756    if (!v) {
1757       /* create new */
1758       v = (struct st_variant*)CALLOC_STRUCT(st_common_variant);
1759       if (v) {
1760          if (prog->state.type == PIPE_SHADER_IR_NIR) {
1761             bool finalize = false;
1762
1763             state.type = PIPE_SHADER_IR_NIR;
1764             state.ir.nir = get_nir_shader(st, prog);
1765
1766             if (key->clamp_color) {
1767                NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1768                finalize = true;
1769             }
1770
1771             state.stream_output = prog->state.stream_output;
1772
1773             if (finalize || !st->allow_st_finalize_nir_twice) {
1774                st_finalize_nir(st, &prog->Base, prog->shader_program,
1775                                state.ir.nir, true);
1776             }
1777
1778             if (ST_DEBUG & DEBUG_PRINT_IR)
1779                nir_print_shader(state.ir.nir, stderr);
1780          } else {
1781             if (key->lower_depth_clamp) {
1782                struct gl_program_parameter_list *params = prog->Base.Parameters;
1783
1784                unsigned depth_range_const =
1785                      _mesa_add_state_reference(params, depth_range_state);
1786
1787                const struct tgsi_token *tokens;
1788                tokens =
1789                      st_tgsi_lower_depth_clamp(prog->state.tokens,
1790                                                depth_range_const,
1791                                                key->clip_negative_one_to_one);
1792
1793                if (tokens != prog->state.tokens)
1794                   tgsi_free_tokens(prog->state.tokens);
1795
1796                prog->state.tokens = tokens;
1797             }
1798             state = prog->state;
1799
1800             if (ST_DEBUG & DEBUG_PRINT_IR)
1801                tgsi_dump(state.tokens, 0);
1802          }
1803          /* fill in new variant */
1804          switch (prog->Base.info.stage) {
1805          case MESA_SHADER_TESS_CTRL:
1806             v->driver_shader = pipe->create_tcs_state(pipe, &state);
1807             break;
1808          case MESA_SHADER_TESS_EVAL:
1809             v->driver_shader = pipe->create_tes_state(pipe, &state);
1810             break;
1811          case MESA_SHADER_GEOMETRY:
1812             v->driver_shader = pipe->create_gs_state(pipe, &state);
1813             break;
1814          case MESA_SHADER_COMPUTE: {
1815             struct pipe_compute_state cs = {0};
1816             cs.ir_type = state.type;
1817             cs.req_local_mem = prog->Base.info.cs.shared_size;
1818
1819             if (state.type == PIPE_SHADER_IR_NIR)
1820                cs.prog = state.ir.nir;
1821             else
1822                cs.prog = state.tokens;
1823
1824             v->driver_shader = pipe->create_compute_state(pipe, &cs);
1825             break;
1826          }
1827          default:
1828             assert(!"unhandled shader type");
1829             free(v);
1830             return NULL;
1831          }
1832
1833          st_common_variant(v)->key = *key;
1834          v->st = key->st;
1835
1836          /* insert into list */
1837          v->next = prog->variants;
1838          prog->variants = v;
1839       }
1840    }
1841
1842    return v;
1843 }
1844
1845
1846 /**
1847  * Vert/Geom/Frag programs have per-context variants.  Free all the
1848  * variants attached to the given program which match the given context.
1849  */
1850 static void
1851 destroy_program_variants(struct st_context *st, struct gl_program *target)
1852 {
1853    if (!target || target == &_mesa_DummyProgram)
1854       return;
1855
1856    struct st_program *p = st_program(target);
1857    struct st_variant *v, **prevPtr = &p->variants;
1858    bool unbound = false;
1859
1860    for (v = p->variants; v; ) {
1861       struct st_variant *next = v->next;
1862       if (v->st == st) {
1863          if (!unbound) {
1864             st_unbind_program(st, p);
1865             unbound = true;
1866          }
1867
1868          /* unlink from list */
1869          *prevPtr = next;
1870          /* destroy this variant */
1871          delete_variant(st, v, target->Target);
1872       }
1873       else {
1874          prevPtr = &v->next;
1875       }
1876       v = next;
1877    }
1878 }
1879
1880
1881 /**
1882  * Callback for _mesa_HashWalk.  Free all the shader's program variants
1883  * which match the given context.
1884  */
1885 static void
1886 destroy_shader_program_variants_cb(GLuint key, void *data, void *userData)
1887 {
1888    struct st_context *st = (struct st_context *) userData;
1889    struct gl_shader *shader = (struct gl_shader *) data;
1890
1891    switch (shader->Type) {
1892    case GL_SHADER_PROGRAM_MESA:
1893       {
1894          struct gl_shader_program *shProg = (struct gl_shader_program *) data;
1895          GLuint i;
1896
1897          for (i = 0; i < ARRAY_SIZE(shProg->_LinkedShaders); i++) {
1898             if (shProg->_LinkedShaders[i])
1899                destroy_program_variants(st, shProg->_LinkedShaders[i]->Program);
1900          }
1901       }
1902       break;
1903    case GL_VERTEX_SHADER:
1904    case GL_FRAGMENT_SHADER:
1905    case GL_GEOMETRY_SHADER:
1906    case GL_TESS_CONTROL_SHADER:
1907    case GL_TESS_EVALUATION_SHADER:
1908    case GL_COMPUTE_SHADER:
1909       break;
1910    default:
1911       assert(0);
1912    }
1913 }
1914
1915
1916 /**
1917  * Callback for _mesa_HashWalk.  Free all the program variants which match
1918  * the given context.
1919  */
1920 static void
1921 destroy_program_variants_cb(GLuint key, void *data, void *userData)
1922 {
1923    struct st_context *st = (struct st_context *) userData;
1924    struct gl_program *program = (struct gl_program *) data;
1925    destroy_program_variants(st, program);
1926 }
1927
1928
1929 /**
1930  * Walk over all shaders and programs to delete any variants which
1931  * belong to the given context.
1932  * This is called during context tear-down.
1933  */
1934 void
1935 st_destroy_program_variants(struct st_context *st)
1936 {
1937    /* If shaders can be shared with other contexts, the last context will
1938     * call DeleteProgram on all shaders, releasing everything.
1939     */
1940    if (st->has_shareable_shaders)
1941       return;
1942
1943    /* ARB vert/frag program */
1944    _mesa_HashWalk(st->ctx->Shared->Programs,
1945                   destroy_program_variants_cb, st);
1946
1947    /* GLSL vert/frag/geom shaders */
1948    _mesa_HashWalk(st->ctx->Shared->ShaderObjects,
1949                   destroy_shader_program_variants_cb, st);
1950 }
1951
1952
1953 /**
1954  * Compile one shader variant.
1955  */
1956 static void
1957 st_precompile_shader_variant(struct st_context *st,
1958                              struct gl_program *prog)
1959 {
1960    switch (prog->Target) {
1961    case GL_VERTEX_PROGRAM_ARB: {
1962       struct st_program *p = (struct st_program *)prog;
1963       struct st_common_variant_key key;
1964
1965       memset(&key, 0, sizeof(key));
1966
1967       key.st = st->has_shareable_shaders ? NULL : st;
1968       st_get_vp_variant(st, p, &key);
1969       break;
1970    }
1971
1972    case GL_FRAGMENT_PROGRAM_ARB: {
1973       struct st_program *p = (struct st_program *)prog;
1974       struct st_fp_variant_key key;
1975
1976       memset(&key, 0, sizeof(key));
1977
1978       key.st = st->has_shareable_shaders ? NULL : st;
1979       st_get_fp_variant(st, p, &key);
1980       break;
1981    }
1982
1983    case GL_TESS_CONTROL_PROGRAM_NV:
1984    case GL_TESS_EVALUATION_PROGRAM_NV:
1985    case GL_GEOMETRY_PROGRAM_NV:
1986    case GL_COMPUTE_PROGRAM_NV: {
1987       struct st_program *p = st_program(prog);
1988       struct st_common_variant_key key;
1989
1990       memset(&key, 0, sizeof(key));
1991
1992       key.st = st->has_shareable_shaders ? NULL : st;
1993       st_get_common_variant(st, p, &key);
1994       break;
1995    }
1996
1997    default:
1998       assert(0);
1999    }
2000 }
2001
2002 void
2003 st_serialize_nir(struct st_program *stp)
2004 {
2005    if (!stp->serialized_nir) {
2006       struct blob blob;
2007       size_t size;
2008
2009       blob_init(&blob);
2010       nir_serialize(&blob, stp->Base.nir, false);
2011       blob_finish_get_buffer(&blob, &stp->serialized_nir, &size);
2012       stp->serialized_nir_size = size;
2013    }
2014 }
2015
2016 void
2017 st_finalize_program(struct st_context *st, struct gl_program *prog)
2018 {
2019    if (st->current_program[prog->info.stage] == prog) {
2020       if (prog->info.stage == MESA_SHADER_VERTEX)
2021          st->dirty |= ST_NEW_VERTEX_PROGRAM(st, (struct st_program *)prog);
2022       else
2023          st->dirty |= ((struct st_program *)prog)->affected_states;
2024    }
2025
2026    if (prog->nir) {
2027       nir_sweep(prog->nir);
2028
2029       /* This is only needed for ARB_vp/fp programs and when the disk cache
2030        * is disabled. If the disk cache is enabled, GLSL programs are
2031        * serialized in write_nir_to_cache.
2032        */
2033       st_serialize_nir(st_program(prog));
2034    }
2035
2036    /* Create Gallium shaders now instead of on demand. */
2037    if (ST_DEBUG & DEBUG_PRECOMPILE ||
2038        st->shader_has_one_variant[prog->info.stage])
2039       st_precompile_shader_variant(st, prog);
2040 }