src/mesa/state_tracker/st_program.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   *   Brian Paul
  31   */
  32
  33
  34 #include "main/errors.h"
  35
  36 #include "main/hash.h"
  37 #include "main/mtypes.h"
  38 #include "program/prog_parameter.h"
  39 #include "program/prog_print.h"
  40 #include "program/prog_to_nir.h"
  41 #include "program/programopt.h"
  42
  43 #include "compiler/nir/nir.h"
  44 #include "compiler/nir/nir_serialize.h"
  45 #include "draw/draw_context.h"
  46
  47 #include "pipe/p_context.h"
  48 #include "pipe/p_defines.h"
  49 #include "pipe/p_shader_tokens.h"
  50 #include "draw/draw_context.h"
  51 #include "tgsi/tgsi_dump.h"
  52 #include "tgsi/tgsi_emulate.h"
  53 #include "tgsi/tgsi_parse.h"
  54 #include "tgsi/tgsi_ureg.h"
  55
  56 #include "util/u_memory.h"
  57
  58 #include "st_debug.h"
  59 #include "st_cb_bitmap.h"
  60 #include "st_cb_drawpixels.h"
  61 #include "st_context.h"
  62 #include "st_tgsi_lower_depth_clamp.h"
  63 #include "st_tgsi_lower_yuv.h"
  64 #include "st_program.h"
  65 #include "st_mesa_to_tgsi.h"
  66 #include "st_atifs_to_tgsi.h"
  67 #include "st_nir.h"
  68 #include "st_shader_cache.h"
  69 #include "st_util.h"
  70 #include "cso_cache/cso_context.h"
  71
  72
  73 static void
  74 destroy_program_variants(struct st_context *st, struct gl_program *target);
  75
  76 static void
  77 set_affected_state_flags(uint64_t *states,
  78                          struct gl_program *prog,
  79                          uint64_t new_constants,
  80                          uint64_t new_sampler_views,
  81                          uint64_t new_samplers,
  82                          uint64_t new_images,
  83                          uint64_t new_ubos,
  84                          uint64_t new_ssbos,
  85                          uint64_t new_atomics)
  86 {
  87    if (prog->Parameters->NumParameters)
  88       *states |= new_constants;
  89
  90    if (prog->info.num_textures)
  91       *states |= new_sampler_views | new_samplers;
  92
  93    if (prog->info.num_images)
  94       *states |= new_images;
  95
  96    if (prog->info.num_ubos)
  97       *states |= new_ubos;
  98
  99    if (prog->info.num_ssbos)
 100       *states |= new_ssbos;
 101
 102    if (prog->info.num_abos)
 103       *states |= new_atomics;
 104 }
 105
 106 /**
 107  * This determines which states will be updated when the shader is bound.
 108  */
 109 void
 110 st_set_prog_affected_state_flags(struct gl_program *prog)
 111 {
 112    uint64_t *states;
 113
 114    switch (prog->info.stage) {
 115    case MESA_SHADER_VERTEX:
 116       states = &((struct st_program*)prog)->affected_states;
 117
 118       *states = ST_NEW_VS_STATE |
 119                 ST_NEW_RASTERIZER |
 120                 ST_NEW_VERTEX_ARRAYS;
 121
 122       set_affected_state_flags(states, prog,
 123                                ST_NEW_VS_CONSTANTS,
 124                                ST_NEW_VS_SAMPLER_VIEWS,
 125                                ST_NEW_VS_SAMPLERS,
 126                                ST_NEW_VS_IMAGES,
 127                                ST_NEW_VS_UBOS,
 128                                ST_NEW_VS_SSBOS,
 129                                ST_NEW_VS_ATOMICS);
 130       break;
 131
 132    case MESA_SHADER_TESS_CTRL:
 133       states = &(st_program(prog))->affected_states;
 134
 135       *states = ST_NEW_TCS_STATE;
 136
 137       set_affected_state_flags(states, prog,
 138                                ST_NEW_TCS_CONSTANTS,
 139                                ST_NEW_TCS_SAMPLER_VIEWS,
 140                                ST_NEW_TCS_SAMPLERS,
 141                                ST_NEW_TCS_IMAGES,
 142                                ST_NEW_TCS_UBOS,
 143                                ST_NEW_TCS_SSBOS,
 144                                ST_NEW_TCS_ATOMICS);
 145       break;
 146
 147    case MESA_SHADER_TESS_EVAL:
 148       states = &(st_program(prog))->affected_states;
 149
 150       *states = ST_NEW_TES_STATE |
 151                 ST_NEW_RASTERIZER;
 152
 153       set_affected_state_flags(states, prog,
 154                                ST_NEW_TES_CONSTANTS,
 155                                ST_NEW_TES_SAMPLER_VIEWS,
 156                                ST_NEW_TES_SAMPLERS,
 157                                ST_NEW_TES_IMAGES,
 158                                ST_NEW_TES_UBOS,
 159                                ST_NEW_TES_SSBOS,
 160                                ST_NEW_TES_ATOMICS);
 161       break;
 162
 163    case MESA_SHADER_GEOMETRY:
 164       states = &(st_program(prog))->affected_states;
 165
 166       *states = ST_NEW_GS_STATE |
 167                 ST_NEW_RASTERIZER;
 168
 169       set_affected_state_flags(states, prog,
 170                                ST_NEW_GS_CONSTANTS,
 171                                ST_NEW_GS_SAMPLER_VIEWS,
 172                                ST_NEW_GS_SAMPLERS,
 173                                ST_NEW_GS_IMAGES,
 174                                ST_NEW_GS_UBOS,
 175                                ST_NEW_GS_SSBOS,
 176                                ST_NEW_GS_ATOMICS);
 177       break;
 178
 179    case MESA_SHADER_FRAGMENT:
 180       states = &((struct st_program*)prog)->affected_states;
 181
 182       /* gl_FragCoord and glDrawPixels always use constants. */
 183       *states = ST_NEW_FS_STATE |
 184                 ST_NEW_SAMPLE_SHADING |
 185                 ST_NEW_FS_CONSTANTS;
 186
 187       set_affected_state_flags(states, prog,
 188                                ST_NEW_FS_CONSTANTS,
 189                                ST_NEW_FS_SAMPLER_VIEWS,
 190                                ST_NEW_FS_SAMPLERS,
 191                                ST_NEW_FS_IMAGES,
 192                                ST_NEW_FS_UBOS,
 193                                ST_NEW_FS_SSBOS,
 194                                ST_NEW_FS_ATOMICS);
 195       break;
 196
 197    case MESA_SHADER_COMPUTE:
 198       states = &((struct st_program*)prog)->affected_states;
 199
 200       *states = ST_NEW_CS_STATE;
 201
 202       set_affected_state_flags(states, prog,
 203                                ST_NEW_CS_CONSTANTS,
 204                                ST_NEW_CS_SAMPLER_VIEWS,
 205                                ST_NEW_CS_SAMPLERS,
 206                                ST_NEW_CS_IMAGES,
 207                                ST_NEW_CS_UBOS,
 208                                ST_NEW_CS_SSBOS,
 209                                ST_NEW_CS_ATOMICS);
 210       break;
 211
 212    default:
 213       unreachable("unhandled shader stage");
 214    }
 215 }
 216
 217
 218 /**
 219  * Delete a shader variant.  Note the caller must unlink the variant from
 220  * the linked list.
 221  */
 222 static void
 223 delete_variant(struct st_context *st, struct st_variant *v, GLenum target)
 224 {
 225    if (v->driver_shader) {
 226       if (target == GL_VERTEX_PROGRAM_ARB &&
 227           ((struct st_common_variant*)v)->key.is_draw_shader) {
 228          /* Draw shader. */
 229          draw_delete_vertex_shader(st->draw, v->driver_shader);
 230       } else if (st->has_shareable_shaders || v->st == st) {
 231          /* The shader's context matches the calling context, or we
 232           * don't care.
 233           */
 234          switch (target) {
 235          case GL_VERTEX_PROGRAM_ARB:
 236             st->pipe->delete_vs_state(st->pipe, v->driver_shader);
 237             break;
 238          case GL_TESS_CONTROL_PROGRAM_NV:
 239             st->pipe->delete_tcs_state(st->pipe, v->driver_shader);
 240             break;
 241          case GL_TESS_EVALUATION_PROGRAM_NV:
 242             st->pipe->delete_tes_state(st->pipe, v->driver_shader);
 243             break;
 244          case GL_GEOMETRY_PROGRAM_NV:
 245             st->pipe->delete_gs_state(st->pipe, v->driver_shader);
 246             break;
 247          case GL_FRAGMENT_PROGRAM_ARB:
 248             st->pipe->delete_fs_state(st->pipe, v->driver_shader);
 249             break;
 250          case GL_COMPUTE_PROGRAM_NV:
 251             st->pipe->delete_compute_state(st->pipe, v->driver_shader);
 252             break;
 253          default:
 254             unreachable("bad shader type in delete_basic_variant");
 255          }
 256       } else {
 257          /* We can't delete a shader with a context different from the one
 258           * that created it.  Add it to the creating context's zombie list.
 259           */
 260          enum pipe_shader_type type =
 261             pipe_shader_type_from_mesa(_mesa_program_enum_to_shader_stage(target));
 262
 263          st_save_zombie_shader(v->st, type, v->driver_shader);
 264       }
 265    }
 266
 267    free(v);
 268 }
 269
 270 static void
 271 st_unbind_program(struct st_context *st, struct st_program *p)
 272 {
 273    /* Unbind the shader in cso_context and re-bind in st/mesa. */
 274    switch (p->Base.info.stage) {
 275    case MESA_SHADER_VERTEX:
 276       cso_set_vertex_shader_handle(st->cso_context, NULL);
 277       st->dirty |= ST_NEW_VS_STATE;
 278       break;
 279    case MESA_SHADER_TESS_CTRL:
 280       cso_set_tessctrl_shader_handle(st->cso_context, NULL);
 281       st->dirty |= ST_NEW_TCS_STATE;
 282       break;
 283    case MESA_SHADER_TESS_EVAL:
 284       cso_set_tesseval_shader_handle(st->cso_context, NULL);
 285       st->dirty |= ST_NEW_TES_STATE;
 286       break;
 287    case MESA_SHADER_GEOMETRY:
 288       cso_set_geometry_shader_handle(st->cso_context, NULL);
 289       st->dirty |= ST_NEW_GS_STATE;
 290       break;
 291    case MESA_SHADER_FRAGMENT:
 292       cso_set_fragment_shader_handle(st->cso_context, NULL);
 293       st->dirty |= ST_NEW_FS_STATE;
 294       break;
 295    case MESA_SHADER_COMPUTE:
 296       cso_set_compute_shader_handle(st->cso_context, NULL);
 297       st->dirty |= ST_NEW_CS_STATE;
 298       break;
 299    default:
 300       unreachable("invalid shader type");
 301    }
 302 }
 303
 304 /**
 305  * Free all basic program variants.
 306  */
 307 void
 308 st_release_variants(struct st_context *st, struct st_program *p)
 309 {
 310    struct st_variant *v;
 311
 312    /* If we are releasing shaders, re-bind them, because we don't
 313     * know which shaders are bound in the driver.
 314     */
 315    if (p->variants)
 316       st_unbind_program(st, p);
 317
 318    for (v = p->variants; v; ) {
 319       struct st_variant *next = v->next;
 320       delete_variant(st, v, p->Base.Target);
 321       v = next;
 322    }
 323
 324    p->variants = NULL;
 325
 326    if (p->state.tokens) {
 327       ureg_free_tokens(p->state.tokens);
 328       p->state.tokens = NULL;
 329    }
 330
 331    /* Note: Any setup of ->ir.nir that has had pipe->create_*_state called on
 332     * it has resulted in the driver taking ownership of the NIR.  Those
 333     * callers should be NULLing out the nir field in any pipe_shader_state
 334     * that might have this called in order to indicate that.
 335     *
 336     * GLSL IR and ARB programs will have set gl_program->nir to the same
 337     * shader as ir->ir.nir, so it will be freed by _mesa_delete_program().
 338     */
 339 }
 340
 341 /**
 342  * Free all basic program variants and unref program.
 343  */
 344 void
 345 st_release_program(struct st_context *st, struct st_program **p)
 346 {
 347    if (!*p)
 348       return;
 349
 350    destroy_program_variants(st, &((*p)->Base));
 351    st_reference_prog(st, p, NULL);
 352 }
 353
 354 void
 355 st_finalize_nir_before_variants(struct nir_shader *nir)
 356 {
 357    NIR_PASS_V(nir, nir_opt_access);
 358
 359    NIR_PASS_V(nir, nir_split_var_copies);
 360    NIR_PASS_V(nir, nir_lower_var_copies);
 361    if (nir->options->lower_all_io_to_temps ||
 362        nir->options->lower_all_io_to_elements ||
 363        nir->info.stage == MESA_SHADER_VERTEX ||
 364        nir->info.stage == MESA_SHADER_GEOMETRY) {
 365       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
 366    } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
 367       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
 368    }
 369
 370    st_nir_assign_vs_in_locations(nir);
 371 }
 372
 373 /**
 374  * Translate ARB (asm) program to NIR
 375  */
 376 static nir_shader *
 377 st_translate_prog_to_nir(struct st_context *st, struct gl_program *prog,
 378                          gl_shader_stage stage)
 379 {
 380    struct pipe_screen *screen = st->pipe->screen;
 381    const struct gl_shader_compiler_options *options =
 382       &st->ctx->Const.ShaderCompilerOptions[stage];
 383
 384    /* Translate to NIR */
 385    nir_shader *nir = prog_to_nir(prog, options->NirOptions);
 386    NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
 387    nir_validate_shader(nir, "after st/ptn lower_regs_to_ssa");
 388
 389    NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, prog, screen);
 390    NIR_PASS_V(nir, nir_lower_system_values);
 391    NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
 392
 393    /* Optimise NIR */
 394    NIR_PASS_V(nir, nir_opt_constant_folding);
 395    st_nir_opts(nir);
 396    st_finalize_nir_before_variants(nir);
 397
 398    if (st->allow_st_finalize_nir_twice)
 399       st_finalize_nir(st, prog, NULL, nir, true);
 400
 401    nir_validate_shader(nir, "after st/glsl finalize_nir");
 402
 403    return nir;
 404 }
 405
 406 void
 407 st_prepare_vertex_program(struct st_program *stp)
 408 {
 409    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 410
 411    stvp->num_inputs = 0;
 412    memset(stvp->input_to_index, ~0, sizeof(stvp->input_to_index));
 413    memset(stvp->result_to_output, ~0, sizeof(stvp->result_to_output));
 414
 415    /* Determine number of inputs, the mappings between VERT_ATTRIB_x
 416     * and TGSI generic input indexes, plus input attrib semantic info.
 417     */
 418    for (unsigned attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
 419       if ((stp->Base.info.inputs_read & BITFIELD64_BIT(attr)) != 0) {
 420          stvp->input_to_index[attr] = stvp->num_inputs;
 421          stvp->index_to_input[stvp->num_inputs] = attr;
 422          stvp->num_inputs++;
 423
 424          if ((stp->Base.DualSlotInputs & BITFIELD64_BIT(attr)) != 0) {
 425             /* add placeholder for second part of a double attribute */
 426             stvp->index_to_input[stvp->num_inputs] = ST_DOUBLE_ATTRIB_PLACEHOLDER;
 427             stvp->num_inputs++;
 428          }
 429       }
 430    }
 431    /* pre-setup potentially unused edgeflag input */
 432    stvp->input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
 433    stvp->index_to_input[stvp->num_inputs] = VERT_ATTRIB_EDGEFLAG;
 434
 435    /* Compute mapping of vertex program outputs to slots. */
 436    unsigned num_outputs = 0;
 437    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 438       if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr))
 439          stvp->result_to_output[attr] = num_outputs++;
 440    }
 441    /* pre-setup potentially unused edgeflag output */
 442    stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs;
 443 }
 444
 445 void
 446 st_translate_stream_output_info(struct gl_program *prog)
 447 {
 448    struct gl_transform_feedback_info *info = prog->sh.LinkedTransformFeedback;
 449    if (!info)
 450       return;
 451
 452    /* Determine the (default) output register mapping for each output. */
 453    unsigned num_outputs = 0;
 454    ubyte output_mapping[VARYING_SLOT_TESS_MAX];
 455    memset(output_mapping, 0, sizeof(output_mapping));
 456
 457    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 458       if (prog->info.outputs_written & BITFIELD64_BIT(attr))
 459          output_mapping[attr] = num_outputs++;
 460    }
 461
 462    /* Translate stream output info. */
 463    struct pipe_stream_output_info *so_info =
 464       &((struct st_program*)prog)->state.stream_output;
 465
 466    for (unsigned i = 0; i < info->NumOutputs; i++) {
 467       so_info->output[i].register_index =
 468          output_mapping[info->Outputs[i].OutputRegister];
 469       so_info->output[i].start_component = info->Outputs[i].ComponentOffset;
 470       so_info->output[i].num_components = info->Outputs[i].NumComponents;
 471       so_info->output[i].output_buffer = info->Outputs[i].OutputBuffer;
 472       so_info->output[i].dst_offset = info->Outputs[i].DstOffset;
 473       so_info->output[i].stream = info->Outputs[i].StreamId;
 474    }
 475
 476    for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
 477       so_info->stride[i] = info->Buffers[i].Stride;
 478    }
 479    so_info->num_outputs = info->NumOutputs;
 480 }
 481
 482 /**
 483  * Translate a vertex program.
 484  */
 485 bool
 486 st_translate_vertex_program(struct st_context *st,
 487                             struct st_program *stp)
 488 {
 489    struct ureg_program *ureg;
 490    enum pipe_error error;
 491    unsigned num_outputs = 0;
 492    unsigned attr;
 493    ubyte output_semantic_name[VARYING_SLOT_MAX] = {0};
 494    ubyte output_semantic_index[VARYING_SLOT_MAX] = {0};
 495
 496    if (stp->Base.arb.IsPositionInvariant)
 497       _mesa_insert_mvp_code(st->ctx, &stp->Base);
 498
 499    /* ARB_vp: */
 500    if (!stp->glsl_to_tgsi) {
 501       _mesa_remove_output_reads(&stp->Base, PROGRAM_OUTPUT);
 502
 503       /* This determines which states will be updated when the assembly
 504        * shader is bound.
 505        */
 506       stp->affected_states = ST_NEW_VS_STATE |
 507                               ST_NEW_RASTERIZER |
 508                               ST_NEW_VERTEX_ARRAYS;
 509
 510       if (stp->Base.Parameters->NumParameters)
 511          stp->affected_states |= ST_NEW_VS_CONSTANTS;
 512
 513       /* Translate to NIR if preferred. */
 514       if (st->pipe->screen->get_shader_param(st->pipe->screen,
 515                                              PIPE_SHADER_VERTEX,
 516                                              PIPE_SHADER_CAP_PREFERRED_IR)) {
 517          assert(!stp->glsl_to_tgsi);
 518
 519          if (stp->Base.nir)
 520             ralloc_free(stp->Base.nir);
 521
 522          if (stp->serialized_nir) {
 523             free(stp->serialized_nir);
 524             stp->serialized_nir = NULL;
 525          }
 526
 527          stp->state.type = PIPE_SHADER_IR_NIR;
 528          stp->Base.nir = st_translate_prog_to_nir(st, &stp->Base,
 529                                                   MESA_SHADER_VERTEX);
 530
 531          /* We must update stp->Base.info after translation and before
 532           * st_prepare_vertex_program is called, because inputs_read
 533           * may become outdated after NIR optimization passes.
 534           *
 535           * For ffvp/ARB_vp inputs_read is populated based
 536           * on declared attributes without taking their usage into
 537           * consideration. When creating shader variants we expect
 538           * that their inputs_read would match the base ones for
 539           * input mapping to work properly.
 540           */
 541          nir_shader_gather_info(stp->Base.nir,
 542                                 nir_shader_get_entrypoint(stp->Base.nir));
 543          st_nir_assign_vs_in_locations(stp->Base.nir);
 544          stp->Base.info = stp->Base.nir->info;
 545
 546          /* For st_draw_feedback, we need to generate TGSI too if draw doesn't
 547           * use LLVM.
 548           */
 549          if (draw_has_llvm()) {
 550             st_prepare_vertex_program(stp);
 551             return true;
 552          }
 553       }
 554    }
 555
 556    st_prepare_vertex_program(stp);
 557
 558    /* Get semantic names and indices. */
 559    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 560       if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr)) {
 561          unsigned slot = num_outputs++;
 562          unsigned semantic_name, semantic_index;
 563          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
 564                                       &semantic_name, &semantic_index);
 565          output_semantic_name[slot] = semantic_name;
 566          output_semantic_index[slot] = semantic_index;
 567       }
 568    }
 569    /* pre-setup potentially unused edgeflag output */
 570    output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
 571    output_semantic_index[num_outputs] = 0;
 572
 573    ureg = ureg_create_with_screen(PIPE_SHADER_VERTEX, st->pipe->screen);
 574    if (ureg == NULL)
 575       return false;
 576
 577    if (stp->Base.info.clip_distance_array_size)
 578       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
 579                     stp->Base.info.clip_distance_array_size);
 580    if (stp->Base.info.cull_distance_array_size)
 581       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
 582                     stp->Base.info.cull_distance_array_size);
 583
 584    if (ST_DEBUG & DEBUG_MESA) {
 585       _mesa_print_program(&stp->Base);
 586       _mesa_print_program_parameters(st->ctx, &stp->Base);
 587       debug_printf("\n");
 588    }
 589
 590    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 591
 592    if (stp->glsl_to_tgsi) {
 593       error = st_translate_program(st->ctx,
 594                                    PIPE_SHADER_VERTEX,
 595                                    ureg,
 596                                    stp->glsl_to_tgsi,
 597                                    &stp->Base,
 598                                    /* inputs */
 599                                    stvp->num_inputs,
 600                                    stvp->input_to_index,
 601                                    NULL, /* inputSlotToAttr */
 602                                    NULL, /* input semantic name */
 603                                    NULL, /* input semantic index */
 604                                    NULL, /* interp mode */
 605                                    /* outputs */
 606                                    num_outputs,
 607                                    stvp->result_to_output,
 608                                    output_semantic_name,
 609                                    output_semantic_index);
 610
 611       st_translate_stream_output_info(&stp->Base);
 612
 613       free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
 614    } else
 615       error = st_translate_mesa_program(st->ctx,
 616                                         PIPE_SHADER_VERTEX,
 617                                         ureg,
 618                                         &stp->Base,
 619                                         /* inputs */
 620                                         stvp->num_inputs,
 621                                         stvp->input_to_index,
 622                                         NULL, /* input semantic name */
 623                                         NULL, /* input semantic index */
 624                                         NULL,
 625                                         /* outputs */
 626                                         num_outputs,
 627                                         stvp->result_to_output,
 628                                         output_semantic_name,
 629                                         output_semantic_index);
 630
 631    if (error) {
 632       debug_printf("%s: failed to translate Mesa program:\n", __func__);
 633       _mesa_print_program(&stp->Base);
 634       debug_assert(0);
 635       return false;
 636    }
 637
 638    stp->state.tokens = ureg_get_tokens(ureg, NULL);
 639    ureg_destroy(ureg);
 640
 641    if (stp->glsl_to_tgsi) {
 642       stp->glsl_to_tgsi = NULL;
 643       st_store_ir_in_disk_cache(st, &stp->Base, false);
 644    }
 645
 646    return stp->state.tokens != NULL;
 647 }
 648
 649 static struct nir_shader *
 650 get_nir_shader(struct st_context *st, struct st_program *stp)
 651 {
 652    if (stp->Base.nir) {
 653       nir_shader *nir = stp->Base.nir;
 654
 655       /* The first shader variant takes ownership of NIR, so that there is
 656        * no cloning. Additional shader variants are always generated from
 657        * serialized NIR to save memory.
 658        */
 659       stp->Base.nir = NULL;
 660       assert(stp->serialized_nir && stp->serialized_nir_size);
 661       return nir;
 662    }
 663
 664    struct blob_reader blob_reader;
 665    const struct nir_shader_compiler_options *options =
 666       st->ctx->Const.ShaderCompilerOptions[stp->Base.info.stage].NirOptions;
 667
 668    blob_reader_init(&blob_reader, stp->serialized_nir, stp->serialized_nir_size);
 669    return nir_deserialize(NULL, options, &blob_reader);
 670 }
 671
 672 static void
 673 lower_ucp(struct st_context *st,
 674           struct nir_shader *nir,
 675           unsigned ucp_enables,
 676           struct gl_program_parameter_list *params)
 677 {
 678    if (nir->info.outputs_written & VARYING_BIT_CLIP_DIST0)
 679       NIR_PASS_V(nir, nir_lower_clip_disable, ucp_enables);
 680    else {
 681       struct pipe_screen *screen = st->pipe->screen;
 682       bool can_compact = screen->get_param(screen,
 683                                            PIPE_CAP_NIR_COMPACT_ARRAYS);
 684       bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;
 685
 686       gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH];
 687       for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
 688          if (use_eye) {
 689             clipplane_state[i][0] = STATE_CLIPPLANE;
 690             clipplane_state[i][1] = i;
 691          } else {
 692             clipplane_state[i][0] = STATE_INTERNAL;
 693             clipplane_state[i][1] = STATE_CLIP_INTERNAL;
 694             clipplane_state[i][2] = i;
 695          }
 696          _mesa_add_state_reference(params, clipplane_state[i]);
 697       }
 698
 699       if (nir->info.stage == MESA_SHADER_VERTEX) {
 700          NIR_PASS_V(nir, nir_lower_clip_vs, ucp_enables,
 701                     true, can_compact, clipplane_state);
 702       } else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
 703          NIR_PASS_V(nir, nir_lower_clip_gs, ucp_enables,
 704                     can_compact, clipplane_state);
 705       }
 706
 707       NIR_PASS_V(nir, nir_lower_io_to_temporaries,
 708                  nir_shader_get_entrypoint(nir), true, false);
 709       NIR_PASS_V(nir, nir_lower_global_vars_to_local);
 710    }
 711 }
 712
 713 static const gl_state_index16 depth_range_state[STATE_LENGTH] =
 714    { STATE_DEPTH_RANGE };
 715
 716 static struct st_common_variant *
 717 st_create_vp_variant(struct st_context *st,
 718                      struct st_program *stvp,
 719                      const struct st_common_variant_key *key)
 720 {
 721    struct st_common_variant *vpv = CALLOC_STRUCT(st_common_variant);
 722    struct pipe_context *pipe = st->pipe;
 723    struct pipe_shader_state state = {0};
 724
 725    static const gl_state_index16 point_size_state[STATE_LENGTH] =
 726       { STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED, 0 };
 727    struct gl_program_parameter_list *params = stvp->Base.Parameters;
 728
 729    vpv->key = *key;
 730
 731    state.stream_output = stvp->state.stream_output;
 732
 733    if (stvp->state.type == PIPE_SHADER_IR_NIR &&
 734        (!key->is_draw_shader || draw_has_llvm())) {
 735       bool finalize = false;
 736
 737       state.type = PIPE_SHADER_IR_NIR;
 738       state.ir.nir = get_nir_shader(st, stvp);
 739       if (key->clamp_color) {
 740          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
 741          finalize = true;
 742       }
 743       if (key->passthrough_edgeflags) {
 744          NIR_PASS_V(state.ir.nir, nir_lower_passthrough_edgeflags);
 745          finalize = true;
 746       }
 747
 748       if (key->lower_point_size) {
 749          _mesa_add_state_reference(params, point_size_state);
 750          NIR_PASS_V(state.ir.nir, nir_lower_point_size_mov,
 751                     point_size_state);
 752          finalize = true;
 753       }
 754
 755       if (key->lower_ucp) {
 756          lower_ucp(st, state.ir.nir, key->lower_ucp, params);
 757          finalize = true;
 758       }
 759
 760       if (finalize || !st->allow_st_finalize_nir_twice) {
 761          st_finalize_nir(st, &stvp->Base, stvp->shader_program, state.ir.nir,
 762                          true);
 763
 764          /* Some of the lowering above may have introduced new varyings */
 765          nir_shader_gather_info(state.ir.nir,
 766                                 nir_shader_get_entrypoint(state.ir.nir));
 767       }
 768
 769       if (ST_DEBUG & DEBUG_PRINT_IR)
 770          nir_print_shader(state.ir.nir, stderr);
 771
 772       if (key->is_draw_shader)
 773          vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
 774       else
 775          vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
 776
 777       return vpv;
 778    }
 779
 780    state.type = PIPE_SHADER_IR_TGSI;
 781    state.tokens = tgsi_dup_tokens(stvp->state.tokens);
 782
 783    /* Emulate features. */
 784    if (key->clamp_color || key->passthrough_edgeflags) {
 785       const struct tgsi_token *tokens;
 786       unsigned flags =
 787          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
 788          (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
 789
 790       tokens = tgsi_emulate(state.tokens, flags);
 791
 792       if (tokens) {
 793          tgsi_free_tokens(state.tokens);
 794          state.tokens = tokens;
 795       } else {
 796          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
 797       }
 798    }
 799
 800    if (key->lower_depth_clamp) {
 801       unsigned depth_range_const =
 802             _mesa_add_state_reference(params, depth_range_state);
 803
 804       const struct tgsi_token *tokens;
 805       tokens = st_tgsi_lower_depth_clamp(state.tokens, depth_range_const,
 806                                          key->clip_negative_one_to_one);
 807       if (tokens != state.tokens)
 808          tgsi_free_tokens(state.tokens);
 809       state.tokens = tokens;
 810    }
 811
 812    if (ST_DEBUG & DEBUG_PRINT_IR)
 813       tgsi_dump(state.tokens, 0);
 814
 815    if (key->is_draw_shader)
 816       vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
 817    else
 818       vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
 819
 820    if (state.tokens) {
 821       tgsi_free_tokens(state.tokens);
 822    }
 823
 824    return vpv;
 825 }
 826
 827
 828 /**
 829  * Find/create a vertex program variant.
 830  */
 831 struct st_common_variant *
 832 st_get_vp_variant(struct st_context *st,
 833                   struct st_program *stp,
 834                   const struct st_common_variant_key *key)
 835 {
 836    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 837    struct st_common_variant *vpv;
 838
 839    /* Search for existing variant */
 840    for (vpv = st_common_variant(stp->variants); vpv;
 841         vpv = st_common_variant(vpv->base.next)) {
 842       if (memcmp(&vpv->key, key, sizeof(*key)) == 0) {
 843          break;
 844       }
 845    }
 846
 847    if (!vpv) {
 848       /* create now */
 849       vpv = st_create_vp_variant(st, stp, key);
 850       if (vpv) {
 851          vpv->base.st = key->st;
 852
 853          unsigned num_inputs = stvp->num_inputs + key->passthrough_edgeflags;
 854          for (unsigned index = 0; index < num_inputs; ++index) {
 855             unsigned attr = stvp->index_to_input[index];
 856             if (attr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
 857                continue;
 858             vpv->vert_attrib_mask |= 1u << attr;
 859          }
 860
 861          /* insert into list */
 862          vpv->base.next = stp->variants;
 863          stp->variants = &vpv->base;
 864       }
 865    }
 866
 867    return vpv;
 868 }
 869
 870
 871 /**
 872  * Translate a Mesa fragment shader into a TGSI shader.
 873  */
 874 bool
 875 st_translate_fragment_program(struct st_context *st,
 876                               struct st_program *stfp)
 877 {
 878    /* Non-GLSL programs: */
 879    if (!stfp->glsl_to_tgsi) {
 880       _mesa_remove_output_reads(&stfp->Base, PROGRAM_OUTPUT);
 881       if (st->ctx->Const.GLSLFragCoordIsSysVal)
 882          _mesa_program_fragment_position_to_sysval(&stfp->Base);
 883
 884       /* This determines which states will be updated when the assembly
 885        * shader is bound.
 886        *
 887        * fragment.position and glDrawPixels always use constants.
 888        */
 889       stfp->affected_states = ST_NEW_FS_STATE |
 890                               ST_NEW_SAMPLE_SHADING |
 891                               ST_NEW_FS_CONSTANTS;
 892
 893       if (stfp->ati_fs) {
 894          /* Just set them for ATI_fs unconditionally. */
 895          stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 896                                   ST_NEW_FS_SAMPLERS;
 897       } else {
 898          /* ARB_fp */
 899          if (stfp->Base.SamplersUsed)
 900             stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 901                                      ST_NEW_FS_SAMPLERS;
 902       }
 903
 904       /* Translate to NIR. */
 905       if (!stfp->ati_fs &&
 906           st->pipe->screen->get_shader_param(st->pipe->screen,
 907                                              PIPE_SHADER_FRAGMENT,
 908                                              PIPE_SHADER_CAP_PREFERRED_IR)) {
 909          nir_shader *nir =
 910             st_translate_prog_to_nir(st, &stfp->Base, MESA_SHADER_FRAGMENT);
 911
 912          if (stfp->Base.nir)
 913             ralloc_free(stfp->Base.nir);
 914          if (stfp->serialized_nir) {
 915             free(stfp->serialized_nir);
 916             stfp->serialized_nir = NULL;
 917          }
 918          stfp->state.type = PIPE_SHADER_IR_NIR;
 919          stfp->Base.nir = nir;
 920          return true;
 921       }
 922    }
 923
 924    ubyte outputMapping[2 * FRAG_RESULT_MAX];
 925    ubyte inputMapping[VARYING_SLOT_MAX];
 926    ubyte inputSlotToAttr[VARYING_SLOT_MAX];
 927    ubyte interpMode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
 928    GLuint attr;
 929    GLbitfield64 inputsRead;
 930    struct ureg_program *ureg;
 931
 932    GLboolean write_all = GL_FALSE;
 933
 934    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
 935    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
 936    uint fs_num_inputs = 0;
 937
 938    ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
 939    ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
 940    uint fs_num_outputs = 0;
 941
 942    memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
 943
 944    /*
 945     * Convert Mesa program inputs to TGSI input register semantics.
 946     */
 947    inputsRead = stfp->Base.info.inputs_read;
 948    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 949       if ((inputsRead & BITFIELD64_BIT(attr)) != 0) {
 950          const GLuint slot = fs_num_inputs++;
 951
 952          inputMapping[attr] = slot;
 953          inputSlotToAttr[slot] = attr;
 954
 955          switch (attr) {
 956          case VARYING_SLOT_POS:
 957             input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
 958             input_semantic_index[slot] = 0;
 959             interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 960             break;
 961          case VARYING_SLOT_COL0:
 962             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 963             input_semantic_index[slot] = 0;
 964             interpMode[slot] = stfp->glsl_to_tgsi ?
 965                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 966             break;
 967          case VARYING_SLOT_COL1:
 968             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 969             input_semantic_index[slot] = 1;
 970             interpMode[slot] = stfp->glsl_to_tgsi ?
 971                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 972             break;
 973          case VARYING_SLOT_FOGC:
 974             input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
 975             input_semantic_index[slot] = 0;
 976             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 977             break;
 978          case VARYING_SLOT_FACE:
 979             input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
 980             input_semantic_index[slot] = 0;
 981             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 982             break;
 983          case VARYING_SLOT_PRIMITIVE_ID:
 984             input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
 985             input_semantic_index[slot] = 0;
 986             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 987             break;
 988          case VARYING_SLOT_LAYER:
 989             input_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
 990             input_semantic_index[slot] = 0;
 991             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 992             break;
 993          case VARYING_SLOT_VIEWPORT:
 994             input_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
 995             input_semantic_index[slot] = 0;
 996             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 997             break;
 998          case VARYING_SLOT_CLIP_DIST0:
 999             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
1000             input_semantic_index[slot] = 0;
1001             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
1002             break;
1003          case VARYING_SLOT_CLIP_DIST1:
1004             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
1005             input_semantic_index[slot] = 1;
1006             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
1007             break;
1008          case VARYING_SLOT_CULL_DIST0:
1009          case VARYING_SLOT_CULL_DIST1:
1010             /* these should have been lowered by GLSL */
1011             assert(0);
1012             break;
1013             /* In most cases, there is nothing special about these
1014              * inputs, so adopt a convention to use the generic
1015              * semantic name and the mesa VARYING_SLOT_ number as the
1016              * index.
1017              *
1018              * All that is required is that the vertex shader labels
1019              * its own outputs similarly, and that the vertex shader
1020              * generates at least every output required by the
1021              * fragment shader plus fixed-function hardware (such as
1022              * BFC).
1023              *
1024              * However, some drivers may need us to identify the PNTC and TEXi
1025              * varyings if, for example, their capability to replace them with
1026              * sprite coordinates is limited.
1027              */
1028          case VARYING_SLOT_PNTC:
1029             if (st->needs_texcoord_semantic) {
1030                input_semantic_name[slot] = TGSI_SEMANTIC_PCOORD;
1031                input_semantic_index[slot] = 0;
1032                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
1033                break;
1034             }
1035             /* fall through */
1036          case VARYING_SLOT_TEX0:
1037          case VARYING_SLOT_TEX1:
1038          case VARYING_SLOT_TEX2:
1039          case VARYING_SLOT_TEX3:
1040          case VARYING_SLOT_TEX4:
1041          case VARYING_SLOT_TEX5:
1042          case VARYING_SLOT_TEX6:
1043          case VARYING_SLOT_TEX7:
1044             if (st->needs_texcoord_semantic) {
1045                input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
1046                input_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
1047                interpMode[slot] = stfp->glsl_to_tgsi ?
1048                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
1049                break;
1050             }
1051             /* fall through */
1052          case VARYING_SLOT_VAR0:
1053          default:
1054             /* Semantic indices should be zero-based because drivers may choose
1055              * to assign a fixed slot determined by that index.
1056              * This is useful because ARB_separate_shader_objects uses location
1057              * qualifiers for linkage, and if the semantic index corresponds to
1058              * these locations, linkage passes in the driver become unecessary.
1059              *
1060              * If needs_texcoord_semantic is true, no semantic indices will be
1061              * consumed for the TEXi varyings, and we can base the locations of
1062              * the user varyings on VAR0.  Otherwise, we use TEX0 as base index.
1063              */
1064             assert(attr >= VARYING_SLOT_VAR0 || attr == VARYING_SLOT_PNTC ||
1065                    (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
1066             input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
1067             input_semantic_index[slot] = st_get_generic_varying_index(st, attr);
1068             if (attr == VARYING_SLOT_PNTC)
1069                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
1070             else {
1071                interpMode[slot] = stfp->glsl_to_tgsi ?
1072                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
1073             }
1074             break;
1075          }
1076       }
1077       else {
1078          inputMapping[attr] = -1;
1079       }
1080    }
1081
1082    /*
1083     * Semantics and mapping for outputs
1084     */
1085    GLbitfield64 outputsWritten = stfp->Base.info.outputs_written;
1086
1087    /* if z is written, emit that first */
1088    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
1089       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
1090       fs_output_semantic_index[fs_num_outputs] = 0;
1091       outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
1092       fs_num_outputs++;
1093       outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
1094    }
1095
1096    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
1097       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
1098       fs_output_semantic_index[fs_num_outputs] = 0;
1099       outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
1100       fs_num_outputs++;
1101       outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
1102    }
1103
1104    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
1105       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_SAMPLEMASK;
1106       fs_output_semantic_index[fs_num_outputs] = 0;
1107       outputMapping[FRAG_RESULT_SAMPLE_MASK] = fs_num_outputs;
1108       fs_num_outputs++;
1109       outputsWritten &= ~(1 << FRAG_RESULT_SAMPLE_MASK);
1110    }
1111
1112    /* handle remaining outputs (color) */
1113    for (attr = 0; attr < ARRAY_SIZE(outputMapping); attr++) {
1114       const GLbitfield64 written = attr < FRAG_RESULT_MAX ? outputsWritten :
1115          stfp->Base.SecondaryOutputsWritten;
1116       const unsigned loc = attr % FRAG_RESULT_MAX;
1117
1118       if (written & BITFIELD64_BIT(loc)) {
1119          switch (loc) {
1120          case FRAG_RESULT_DEPTH:
1121          case FRAG_RESULT_STENCIL:
1122          case FRAG_RESULT_SAMPLE_MASK:
1123             /* handled above */
1124             assert(0);
1125             break;
1126          case FRAG_RESULT_COLOR:
1127             write_all = GL_TRUE; /* fallthrough */
1128          default: {
1129             int index;
1130             assert(loc == FRAG_RESULT_COLOR ||
1131                    (FRAG_RESULT_DATA0 <= loc && loc < FRAG_RESULT_MAX));
1132
1133             index = (loc == FRAG_RESULT_COLOR) ? 0 : (loc - FRAG_RESULT_DATA0);
1134
1135             if (attr >= FRAG_RESULT_MAX) {
1136                /* Secondary color for dual source blending. */
1137                assert(index == 0);
1138                index++;
1139             }
1140
1141             fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
1142             fs_output_semantic_index[fs_num_outputs] = index;
1143             outputMapping[attr] = fs_num_outputs;
1144             break;
1145          }
1146          }
1147
1148          fs_num_outputs++;
1149       }
1150    }
1151
1152    ureg = ureg_create_with_screen(PIPE_SHADER_FRAGMENT, st->pipe->screen);
1153    if (ureg == NULL)
1154       return false;
1155
1156    if (ST_DEBUG & DEBUG_MESA) {
1157       _mesa_print_program(&stfp->Base);
1158       _mesa_print_program_parameters(st->ctx, &stfp->Base);
1159       debug_printf("\n");
1160    }
1161    if (write_all == GL_TRUE)
1162       ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1);
1163
1164    if (stfp->Base.info.fs.depth_layout != FRAG_DEPTH_LAYOUT_NONE) {
1165       switch (stfp->Base.info.fs.depth_layout) {
1166       case FRAG_DEPTH_LAYOUT_ANY:
1167          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1168                        TGSI_FS_DEPTH_LAYOUT_ANY);
1169          break;
1170       case FRAG_DEPTH_LAYOUT_GREATER:
1171          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1172                        TGSI_FS_DEPTH_LAYOUT_GREATER);
1173          break;
1174       case FRAG_DEPTH_LAYOUT_LESS:
1175          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1176                        TGSI_FS_DEPTH_LAYOUT_LESS);
1177          break;
1178       case FRAG_DEPTH_LAYOUT_UNCHANGED:
1179          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1180                        TGSI_FS_DEPTH_LAYOUT_UNCHANGED);
1181          break;
1182       default:
1183          assert(0);
1184       }
1185    }
1186
1187    if (stfp->glsl_to_tgsi) {
1188       st_translate_program(st->ctx,
1189                            PIPE_SHADER_FRAGMENT,
1190                            ureg,
1191                            stfp->glsl_to_tgsi,
1192                            &stfp->Base,
1193                            /* inputs */
1194                            fs_num_inputs,
1195                            inputMapping,
1196                            inputSlotToAttr,
1197                            input_semantic_name,
1198                            input_semantic_index,
1199                            interpMode,
1200                            /* outputs */
1201                            fs_num_outputs,
1202                            outputMapping,
1203                            fs_output_semantic_name,
1204                            fs_output_semantic_index);
1205
1206       free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
1207    } else if (stfp->ati_fs)
1208       st_translate_atifs_program(ureg,
1209                                  stfp->ati_fs,
1210                                  &stfp->Base,
1211                                  /* inputs */
1212                                  fs_num_inputs,
1213                                  inputMapping,
1214                                  input_semantic_name,
1215                                  input_semantic_index,
1216                                  interpMode,
1217                                  /* outputs */
1218                                  fs_num_outputs,
1219                                  outputMapping,
1220                                  fs_output_semantic_name,
1221                                  fs_output_semantic_index);
1222    else
1223       st_translate_mesa_program(st->ctx,
1224                                 PIPE_SHADER_FRAGMENT,
1225                                 ureg,
1226                                 &stfp->Base,
1227                                 /* inputs */
1228                                 fs_num_inputs,
1229                                 inputMapping,
1230                                 input_semantic_name,
1231                                 input_semantic_index,
1232                                 interpMode,
1233                                 /* outputs */
1234                                 fs_num_outputs,
1235                                 outputMapping,
1236                                 fs_output_semantic_name,
1237                                 fs_output_semantic_index);
1238
1239    stfp->state.tokens = ureg_get_tokens(ureg, NULL);
1240    ureg_destroy(ureg);
1241
1242    if (stfp->glsl_to_tgsi) {
1243       stfp->glsl_to_tgsi = NULL;
1244       st_store_ir_in_disk_cache(st, &stfp->Base, false);
1245    }
1246
1247    return stfp->state.tokens != NULL;
1248 }
1249
1250 static struct st_fp_variant *
1251 st_create_fp_variant(struct st_context *st,
1252                      struct st_program *stfp,
1253                      const struct st_fp_variant_key *key)
1254 {
1255    struct pipe_context *pipe = st->pipe;
1256    struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
1257    struct pipe_shader_state state = {0};
1258    struct gl_program_parameter_list *params = stfp->Base.Parameters;
1259    static const gl_state_index16 texcoord_state[STATE_LENGTH] =
1260       { STATE_INTERNAL, STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
1261    static const gl_state_index16 scale_state[STATE_LENGTH] =
1262       { STATE_INTERNAL, STATE_PT_SCALE };
1263    static const gl_state_index16 bias_state[STATE_LENGTH] =
1264       { STATE_INTERNAL, STATE_PT_BIAS };
1265    static const gl_state_index16 alpha_ref_state[STATE_LENGTH] =
1266       { STATE_INTERNAL, STATE_ALPHA_REF };
1267
1268    if (!variant)
1269       return NULL;
1270
1271    if (stfp->state.type == PIPE_SHADER_IR_NIR) {
1272       bool finalize = false;
1273
1274       state.type = PIPE_SHADER_IR_NIR;
1275       state.ir.nir = get_nir_shader(st, stfp);
1276
1277       if (key->clamp_color) {
1278          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1279          finalize = true;
1280       }
1281
1282       if (key->lower_flatshade) {
1283          NIR_PASS_V(state.ir.nir, nir_lower_flatshade);
1284          finalize = true;
1285       }
1286
1287       if (key->lower_alpha_func != COMPARE_FUNC_NEVER) {
1288          _mesa_add_state_reference(params, alpha_ref_state);
1289          NIR_PASS_V(state.ir.nir, nir_lower_alpha_test, key->lower_alpha_func,
1290                     false, alpha_ref_state);
1291          finalize = true;
1292       }
1293
1294       if (key->lower_two_sided_color) {
1295          bool face_sysval = st->ctx->Const.GLSLFrontFacingIsSysVal;
1296          NIR_PASS_V(state.ir.nir, nir_lower_two_sided_color, face_sysval);
1297          finalize = true;
1298       }
1299
1300       if (key->persample_shading) {
1301           nir_shader *shader = state.ir.nir;
1302           nir_foreach_shader_in_variable(var, shader)
1303              var->data.sample = true;
1304           finalize = true;
1305       }
1306
1307       assert(!(key->bitmap && key->drawpixels));
1308
1309       /* glBitmap */
1310       if (key->bitmap) {
1311          nir_lower_bitmap_options options = {0};
1312
1313          variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1314          options.sampler = variant->bitmap_sampler;
1315          options.swizzle_xxxx = st->bitmap.tex_format == PIPE_FORMAT_R8_UNORM;
1316
1317          NIR_PASS_V(state.ir.nir, nir_lower_bitmap, &options);
1318          finalize = true;
1319       }
1320
1321       /* glDrawPixels (color only) */
1322       if (key->drawpixels) {
1323          nir_lower_drawpixels_options options = {{0}};
1324          unsigned samplers_used = stfp->Base.SamplersUsed;
1325
1326          /* Find the first unused slot. */
1327          variant->drawpix_sampler = ffs(~samplers_used) - 1;
1328          options.drawpix_sampler = variant->drawpix_sampler;
1329          samplers_used |= (1 << variant->drawpix_sampler);
1330
1331          options.pixel_maps = key->pixelMaps;
1332          if (key->pixelMaps) {
1333             variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1334             options.pixelmap_sampler = variant->pixelmap_sampler;
1335          }
1336
1337          options.scale_and_bias = key->scaleAndBias;
1338          if (key->scaleAndBias) {
1339             _mesa_add_state_reference(params, scale_state);
1340             memcpy(options.scale_state_tokens, scale_state,
1341                    sizeof(options.scale_state_tokens));
1342             _mesa_add_state_reference(params, bias_state);
1343             memcpy(options.bias_state_tokens, bias_state,
1344                    sizeof(options.bias_state_tokens));
1345          }
1346
1347          _mesa_add_state_reference(params, texcoord_state);
1348          memcpy(options.texcoord_state_tokens, texcoord_state,
1349                 sizeof(options.texcoord_state_tokens));
1350
1351          NIR_PASS_V(state.ir.nir, nir_lower_drawpixels, &options);
1352          finalize = true;
1353       }
1354
1355       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1356                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1357                    key->external.lower_ayuv || key->external.lower_xyuv)) {
1358
1359          st_nir_lower_samplers(pipe->screen, state.ir.nir,
1360                                stfp->shader_program, &stfp->Base);
1361
1362          nir_lower_tex_options options = {0};
1363          options.lower_y_uv_external = key->external.lower_nv12;
1364          options.lower_y_u_v_external = key->external.lower_iyuv;
1365          options.lower_xy_uxvx_external = key->external.lower_xy_uxvx;
1366          options.lower_yx_xuxv_external = key->external.lower_yx_xuxv;
1367          options.lower_ayuv_external = key->external.lower_ayuv;
1368          options.lower_xyuv_external = key->external.lower_xyuv;
1369          NIR_PASS_V(state.ir.nir, nir_lower_tex, &options);
1370          finalize = true;
1371       }
1372
1373       if (finalize || !st->allow_st_finalize_nir_twice) {
1374          st_finalize_nir(st, &stfp->Base, stfp->shader_program, state.ir.nir,
1375                          false);
1376       }
1377
1378       /* This pass needs to happen *after* nir_lower_sampler */
1379       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1380                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1381                    key->external.lower_ayuv || key->external.lower_xyuv)) {
1382          NIR_PASS_V(state.ir.nir, st_nir_lower_tex_src_plane,
1383                     ~stfp->Base.SamplersUsed,
1384                     key->external.lower_nv12 || key->external.lower_xy_uxvx ||
1385                        key->external.lower_yx_xuxv,
1386                     key->external.lower_iyuv);
1387          finalize = true;
1388       }
1389
1390       if (finalize || !st->allow_st_finalize_nir_twice) {
1391          /* Some of the lowering above may have introduced new varyings */
1392          nir_shader_gather_info(state.ir.nir,
1393                                 nir_shader_get_entrypoint(state.ir.nir));
1394
1395          struct pipe_screen *screen = pipe->screen;
1396          if (screen->finalize_nir)
1397             screen->finalize_nir(screen, state.ir.nir, false);
1398       }
1399
1400       if (ST_DEBUG & DEBUG_PRINT_IR)
1401          nir_print_shader(state.ir.nir, stderr);
1402
1403       variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1404       variant->key = *key;
1405
1406       return variant;
1407    }
1408
1409    state.tokens = stfp->state.tokens;
1410
1411    assert(!(key->bitmap && key->drawpixels));
1412
1413    /* Fix texture targets and add fog for ATI_fs */
1414    if (stfp->ati_fs) {
1415       const struct tgsi_token *tokens = st_fixup_atifs(state.tokens, key);
1416
1417       if (tokens)
1418          state.tokens = tokens;
1419       else
1420          fprintf(stderr, "mesa: cannot post-process ATI_fs\n");
1421    }
1422
1423    /* Emulate features. */
1424    if (key->clamp_color || key->persample_shading) {
1425       const struct tgsi_token *tokens;
1426       unsigned flags =
1427          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
1428          (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
1429
1430       tokens = tgsi_emulate(state.tokens, flags);
1431
1432       if (tokens) {
1433          if (state.tokens != stfp->state.tokens)
1434             tgsi_free_tokens(state.tokens);
1435          state.tokens = tokens;
1436       } else
1437          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
1438    }
1439
1440    /* glBitmap */
1441    if (key->bitmap) {
1442       const struct tgsi_token *tokens;
1443
1444       variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1445
1446       tokens = st_get_bitmap_shader(state.tokens,
1447                                     st->internal_target,
1448                                     variant->bitmap_sampler,
1449                                     st->needs_texcoord_semantic,
1450                                     st->bitmap.tex_format ==
1451                                     PIPE_FORMAT_R8_UNORM);
1452
1453       if (tokens) {
1454          if (state.tokens != stfp->state.tokens)
1455             tgsi_free_tokens(state.tokens);
1456          state.tokens = tokens;
1457       } else
1458          fprintf(stderr, "mesa: cannot create a shader for glBitmap\n");
1459    }
1460
1461    /* glDrawPixels (color only) */
1462    if (key->drawpixels) {
1463       const struct tgsi_token *tokens;
1464       unsigned scale_const = 0, bias_const = 0, texcoord_const = 0;
1465
1466       /* Find the first unused slot. */
1467       variant->drawpix_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1468
1469       if (key->pixelMaps) {
1470          unsigned samplers_used = stfp->Base.SamplersUsed |
1471                                   (1 << variant->drawpix_sampler);
1472
1473          variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1474       }
1475
1476       if (key->scaleAndBias) {
1477          scale_const = _mesa_add_state_reference(params, scale_state);
1478          bias_const = _mesa_add_state_reference(params, bias_state);
1479       }
1480
1481       texcoord_const = _mesa_add_state_reference(params, texcoord_state);
1482
1483       tokens = st_get_drawpix_shader(state.tokens,
1484                                      st->needs_texcoord_semantic,
1485                                      key->scaleAndBias, scale_const,
1486                                      bias_const, key->pixelMaps,
1487                                      variant->drawpix_sampler,
1488                                      variant->pixelmap_sampler,
1489                                      texcoord_const, st->internal_target);
1490
1491       if (tokens) {
1492          if (state.tokens != stfp->state.tokens)
1493             tgsi_free_tokens(state.tokens);
1494          state.tokens = tokens;
1495       } else
1496          fprintf(stderr, "mesa: cannot create a shader for glDrawPixels\n");
1497    }
1498
1499    if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1500                 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1501       const struct tgsi_token *tokens;
1502
1503       /* samplers inserted would conflict, but this should be unpossible: */
1504       assert(!(key->bitmap || key->drawpixels));
1505
1506       tokens = st_tgsi_lower_yuv(state.tokens,
1507                                  ~stfp->Base.SamplersUsed,
1508                                  key->external.lower_nv12 ||
1509                                     key->external.lower_xy_uxvx ||
1510                                     key->external.lower_yx_xuxv,
1511                                  key->external.lower_iyuv);
1512       if (tokens) {
1513          if (state.tokens != stfp->state.tokens)
1514             tgsi_free_tokens(state.tokens);
1515          state.tokens = tokens;
1516       } else {
1517          fprintf(stderr, "mesa: cannot create a shader for samplerExternalOES\n");
1518       }
1519    }
1520
1521    if (key->lower_depth_clamp) {
1522       unsigned depth_range_const = _mesa_add_state_reference(params, depth_range_state);
1523
1524       const struct tgsi_token *tokens;
1525       tokens = st_tgsi_lower_depth_clamp_fs(state.tokens, depth_range_const);
1526       if (state.tokens != stfp->state.tokens)
1527          tgsi_free_tokens(state.tokens);
1528       state.tokens = tokens;
1529    }
1530
1531    if (ST_DEBUG & DEBUG_PRINT_IR)
1532       tgsi_dump(state.tokens, 0);
1533
1534    /* fill in variant */
1535    variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1536    variant->key = *key;
1537
1538    if (state.tokens != stfp->state.tokens)
1539       tgsi_free_tokens(state.tokens);
1540    return variant;
1541 }
1542
1543 /**
1544  * Translate fragment program if needed.
1545  */
1546 struct st_fp_variant *
1547 st_get_fp_variant(struct st_context *st,
1548                   struct st_program *stfp,
1549                   const struct st_fp_variant_key *key)
1550 {
1551    struct st_fp_variant *fpv;
1552
1553    /* Search for existing variant */
1554    for (fpv = st_fp_variant(stfp->variants); fpv;
1555         fpv = st_fp_variant(fpv->base.next)) {
1556       if (memcmp(&fpv->key, key, sizeof(*key)) == 0) {
1557          break;
1558       }
1559    }
1560
1561    if (!fpv) {
1562       /* create new */
1563       fpv = st_create_fp_variant(st, stfp, key);
1564       if (fpv) {
1565          fpv->base.st = key->st;
1566
1567          if (key->bitmap || key->drawpixels) {
1568             /* Regular variants should always come before the
1569              * bitmap & drawpixels variants, (unless there
1570              * are no regular variants) so that
1571              * st_update_fp can take a fast path when
1572              * shader_has_one_variant is set.
1573              */
1574             if (!stfp->variants) {
1575                stfp->variants = &fpv->base;
1576             } else {
1577                /* insert into list after the first one */
1578                fpv->base.next = stfp->variants->next;
1579                stfp->variants->next = &fpv->base;
1580             }
1581          } else {
1582             /* insert into list */
1583             fpv->base.next = stfp->variants;
1584             stfp->variants = &fpv->base;
1585          }
1586       }
1587    }
1588
1589    return fpv;
1590 }
1591
1592 /**
1593  * Translate a program. This is common code for geometry and tessellation
1594  * shaders.
1595  */
1596 bool
1597 st_translate_common_program(struct st_context *st,
1598                             struct st_program *stp)
1599 {
1600    struct gl_program *prog = &stp->Base;
1601    enum pipe_shader_type stage =
1602       pipe_shader_type_from_mesa(stp->Base.info.stage);
1603    struct ureg_program *ureg = ureg_create_with_screen(stage, st->pipe->screen);
1604
1605    if (ureg == NULL)
1606       return false;
1607
1608    switch (stage) {
1609    case PIPE_SHADER_TESS_CTRL:
1610       ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT,
1611                     stp->Base.info.tess.tcs_vertices_out);
1612       break;
1613
1614    case PIPE_SHADER_TESS_EVAL:
1615       if (stp->Base.info.tess.primitive_mode == GL_ISOLINES)
1616          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, GL_LINES);
1617       else
1618          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE,
1619                        stp->Base.info.tess.primitive_mode);
1620
1621       STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL);
1622       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 ==
1623                     PIPE_TESS_SPACING_FRACTIONAL_ODD);
1624       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 ==
1625                     PIPE_TESS_SPACING_FRACTIONAL_EVEN);
1626
1627       ureg_property(ureg, TGSI_PROPERTY_TES_SPACING,
1628                     (stp->Base.info.tess.spacing + 1) % 3);
1629
1630       ureg_property(ureg, TGSI_PROPERTY_TES_VERTEX_ORDER_CW,
1631                     !stp->Base.info.tess.ccw);
1632       ureg_property(ureg, TGSI_PROPERTY_TES_POINT_MODE,
1633                     stp->Base.info.tess.point_mode);
1634       break;
1635
1636    case PIPE_SHADER_GEOMETRY:
1637       ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM,
1638                     stp->Base.info.gs.input_primitive);
1639       ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM,
1640                     stp->Base.info.gs.output_primitive);
1641       ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
1642                     stp->Base.info.gs.vertices_out);
1643       ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS,
1644                     stp->Base.info.gs.invocations);
1645       break;
1646
1647    default:
1648       break;
1649    }
1650
1651    ubyte inputSlotToAttr[VARYING_SLOT_TESS_MAX];
1652    ubyte inputMapping[VARYING_SLOT_TESS_MAX];
1653    ubyte outputMapping[VARYING_SLOT_TESS_MAX];
1654    GLuint attr;
1655
1656    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
1657    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
1658    uint num_inputs = 0;
1659
1660    ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
1661    ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
1662    uint num_outputs = 0;
1663
1664    GLint i;
1665
1666    memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr));
1667    memset(inputMapping, 0, sizeof(inputMapping));
1668    memset(outputMapping, 0, sizeof(outputMapping));
1669    memset(&stp->state, 0, sizeof(stp->state));
1670
1671    if (prog->info.clip_distance_array_size)
1672       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
1673                     prog->info.clip_distance_array_size);
1674    if (prog->info.cull_distance_array_size)
1675       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
1676                     prog->info.cull_distance_array_size);
1677
1678    /*
1679     * Convert Mesa program inputs to TGSI input register semantics.
1680     */
1681    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1682       if ((prog->info.inputs_read & BITFIELD64_BIT(attr)) == 0)
1683          continue;
1684
1685       unsigned slot = num_inputs++;
1686
1687       inputMapping[attr] = slot;
1688       inputSlotToAttr[slot] = attr;
1689
1690       unsigned semantic_name, semantic_index;
1691       tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1692                                    &semantic_name, &semantic_index);
1693       input_semantic_name[slot] = semantic_name;
1694       input_semantic_index[slot] = semantic_index;
1695    }
1696
1697    /* Also add patch inputs. */
1698    for (attr = 0; attr < 32; attr++) {
1699       if (prog->info.patch_inputs_read & (1u << attr)) {
1700          GLuint slot = num_inputs++;
1701          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1702
1703          inputMapping[patch_attr] = slot;
1704          inputSlotToAttr[slot] = patch_attr;
1705          input_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1706          input_semantic_index[slot] = attr;
1707       }
1708    }
1709
1710    /* initialize output semantics to defaults */
1711    for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
1712       output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
1713       output_semantic_index[i] = 0;
1714    }
1715
1716    /*
1717     * Determine number of outputs, the (default) output register
1718     * mapping and the semantic information for each output.
1719     */
1720    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1721       if (prog->info.outputs_written & BITFIELD64_BIT(attr)) {
1722          GLuint slot = num_outputs++;
1723
1724          outputMapping[attr] = slot;
1725
1726          unsigned semantic_name, semantic_index;
1727          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1728                                       &semantic_name, &semantic_index);
1729          output_semantic_name[slot] = semantic_name;
1730          output_semantic_index[slot] = semantic_index;
1731       }
1732    }
1733
1734    /* Also add patch outputs. */
1735    for (attr = 0; attr < 32; attr++) {
1736       if (prog->info.patch_outputs_written & (1u << attr)) {
1737          GLuint slot = num_outputs++;
1738          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1739
1740          outputMapping[patch_attr] = slot;
1741          output_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1742          output_semantic_index[slot] = attr;
1743       }
1744    }
1745
1746    st_translate_program(st->ctx,
1747                         stage,
1748                         ureg,
1749                         stp->glsl_to_tgsi,
1750                         prog,
1751                         /* inputs */
1752                         num_inputs,
1753                         inputMapping,
1754                         inputSlotToAttr,
1755                         input_semantic_name,
1756                         input_semantic_index,
1757                         NULL,
1758                         /* outputs */
1759                         num_outputs,
1760                         outputMapping,
1761                         output_semantic_name,
1762                         output_semantic_index);
1763
1764    stp->state.tokens = ureg_get_tokens(ureg, NULL);
1765
1766    ureg_destroy(ureg);
1767
1768    st_translate_stream_output_info(prog);
1769
1770    st_store_ir_in_disk_cache(st, prog, false);
1771
1772    if (ST_DEBUG & DEBUG_PRINT_IR && ST_DEBUG & DEBUG_MESA)
1773       _mesa_print_program(prog);
1774
1775    free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
1776    stp->glsl_to_tgsi = NULL;
1777    return true;
1778 }
1779
1780
1781 /**
1782  * Get/create a basic program variant.
1783  */
1784 struct st_variant *
1785 st_get_common_variant(struct st_context *st,
1786                       struct st_program *prog,
1787                       const struct st_common_variant_key *key)
1788 {
1789    struct pipe_context *pipe = st->pipe;
1790    struct st_variant *v;
1791    struct pipe_shader_state state = {0};
1792    struct gl_program_parameter_list *params = prog->Base.Parameters;
1793
1794    /* Search for existing variant */
1795    for (v = prog->variants; v; v = v->next) {
1796       if (memcmp(&st_common_variant(v)->key, key, sizeof(*key)) == 0)
1797          break;
1798    }
1799
1800    if (!v) {
1801       /* create new */
1802       v = (struct st_variant*)CALLOC_STRUCT(st_common_variant);
1803       if (v) {
1804          if (prog->state.type == PIPE_SHADER_IR_NIR) {
1805             bool finalize = false;
1806
1807             state.type = PIPE_SHADER_IR_NIR;
1808             state.ir.nir = get_nir_shader(st, prog);
1809
1810             if (key->clamp_color) {
1811                NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1812                finalize = true;
1813             }
1814
1815             if (key->lower_ucp) {
1816                lower_ucp(st, state.ir.nir, key->lower_ucp, params);
1817                finalize = true;
1818             }
1819
1820             state.stream_output = prog->state.stream_output;
1821
1822             if (finalize || !st->allow_st_finalize_nir_twice) {
1823                st_finalize_nir(st, &prog->Base, prog->shader_program,
1824                                state.ir.nir, true);
1825             }
1826
1827             if (ST_DEBUG & DEBUG_PRINT_IR)
1828                nir_print_shader(state.ir.nir, stderr);
1829          } else {
1830             if (key->lower_depth_clamp) {
1831                struct gl_program_parameter_list *params = prog->Base.Parameters;
1832
1833                unsigned depth_range_const =
1834                      _mesa_add_state_reference(params, depth_range_state);
1835
1836                const struct tgsi_token *tokens;
1837                tokens =
1838                      st_tgsi_lower_depth_clamp(prog->state.tokens,
1839                                                depth_range_const,
1840                                                key->clip_negative_one_to_one);
1841
1842                if (tokens != prog->state.tokens)
1843                   tgsi_free_tokens(prog->state.tokens);
1844
1845                prog->state.tokens = tokens;
1846             }
1847             state = prog->state;
1848
1849             if (ST_DEBUG & DEBUG_PRINT_IR)
1850                tgsi_dump(state.tokens, 0);
1851          }
1852          /* fill in new variant */
1853          switch (prog->Base.info.stage) {
1854          case MESA_SHADER_TESS_CTRL:
1855             v->driver_shader = pipe->create_tcs_state(pipe, &state);
1856             break;
1857          case MESA_SHADER_TESS_EVAL:
1858             v->driver_shader = pipe->create_tes_state(pipe, &state);
1859             break;
1860          case MESA_SHADER_GEOMETRY:
1861             v->driver_shader = pipe->create_gs_state(pipe, &state);
1862             break;
1863          case MESA_SHADER_COMPUTE: {
1864             struct pipe_compute_state cs = {0};
1865             cs.ir_type = state.type;
1866             cs.req_local_mem = prog->Base.info.cs.shared_size;
1867
1868             if (state.type == PIPE_SHADER_IR_NIR)
1869                cs.prog = state.ir.nir;
1870             else
1871                cs.prog = state.tokens;
1872
1873             v->driver_shader = pipe->create_compute_state(pipe, &cs);
1874             break;
1875          }
1876          default:
1877             assert(!"unhandled shader type");
1878             free(v);
1879             return NULL;
1880          }
1881
1882          st_common_variant(v)->key = *key;
1883          v->st = key->st;
1884
1885          /* insert into list */
1886          v->next = prog->variants;
1887          prog->variants = v;
1888       }
1889    }
1890
1891    return v;
1892 }
1893
1894
1895 /**
1896  * Vert/Geom/Frag programs have per-context variants.  Free all the
1897  * variants attached to the given program which match the given context.
1898  */
1899 static void
1900 destroy_program_variants(struct st_context *st, struct gl_program *target)
1901 {
1902    if (!target || target == &_mesa_DummyProgram)
1903       return;
1904
1905    struct st_program *p = st_program(target);
1906    struct st_variant *v, **prevPtr = &p->variants;
1907    bool unbound = false;
1908
1909    for (v = p->variants; v; ) {
1910       struct st_variant *next = v->next;
1911       if (v->st == st) {
1912          if (!unbound) {
1913             st_unbind_program(st, p);
1914             unbound = true;
1915          }
1916
1917          /* unlink from list */
1918          *prevPtr = next;
1919          /* destroy this variant */
1920          delete_variant(st, v, target->Target);
1921       }
1922       else {
1923          prevPtr = &v->next;
1924       }
1925       v = next;
1926    }
1927 }
1928
1929
1930 /**
1931  * Callback for _mesa_HashWalk.  Free all the shader's program variants
1932  * which match the given context.
1933  */
1934 static void
1935 destroy_shader_program_variants_cb(GLuint key, void *data, void *userData)
1936 {
1937    struct st_context *st = (struct st_context *) userData;
1938    struct gl_shader *shader = (struct gl_shader *) data;
1939
1940    switch (shader->Type) {
1941    case GL_SHADER_PROGRAM_MESA:
1942       {
1943          struct gl_shader_program *shProg = (struct gl_shader_program *) data;
1944          GLuint i;
1945
1946          for (i = 0; i < ARRAY_SIZE(shProg->_LinkedShaders); i++) {
1947             if (shProg->_LinkedShaders[i])
1948                destroy_program_variants(st, shProg->_LinkedShaders[i]->Program);
1949          }
1950       }
1951       break;
1952    case GL_VERTEX_SHADER:
1953    case GL_FRAGMENT_SHADER:
1954    case GL_GEOMETRY_SHADER:
1955    case GL_TESS_CONTROL_SHADER:
1956    case GL_TESS_EVALUATION_SHADER:
1957    case GL_COMPUTE_SHADER:
1958       break;
1959    default:
1960       assert(0);
1961    }
1962 }
1963
1964
1965 /**
1966  * Callback for _mesa_HashWalk.  Free all the program variants which match
1967  * the given context.
1968  */
1969 static void
1970 destroy_program_variants_cb(GLuint key, void *data, void *userData)
1971 {
1972    struct st_context *st = (struct st_context *) userData;
1973    struct gl_program *program = (struct gl_program *) data;
1974    destroy_program_variants(st, program);
1975 }
1976
1977
1978 /**
1979  * Walk over all shaders and programs to delete any variants which
1980  * belong to the given context.
1981  * This is called during context tear-down.
1982  */
1983 void
1984 st_destroy_program_variants(struct st_context *st)
1985 {
1986    /* If shaders can be shared with other contexts, the last context will
1987     * call DeleteProgram on all shaders, releasing everything.
1988     */
1989    if (st->has_shareable_shaders)
1990       return;
1991
1992    /* ARB vert/frag program */
1993    _mesa_HashWalk(st->ctx->Shared->Programs,
1994                   destroy_program_variants_cb, st);
1995
1996    /* GLSL vert/frag/geom shaders */
1997    _mesa_HashWalk(st->ctx->Shared->ShaderObjects,
1998                   destroy_shader_program_variants_cb, st);
1999 }
2000
2001
2002 /**
2003  * Compile one shader variant.
2004  */
2005 static void
2006 st_precompile_shader_variant(struct st_context *st,
2007                              struct gl_program *prog)
2008 {
2009    switch (prog->Target) {
2010    case GL_VERTEX_PROGRAM_ARB: {
2011       struct st_program *p = (struct st_program *)prog;
2012       struct st_common_variant_key key;
2013
2014       memset(&key, 0, sizeof(key));
2015
2016       key.st = st->has_shareable_shaders ? NULL : st;
2017       st_get_vp_variant(st, p, &key);
2018       break;
2019    }
2020
2021    case GL_FRAGMENT_PROGRAM_ARB: {
2022       struct st_program *p = (struct st_program *)prog;
2023       struct st_fp_variant_key key;
2024
2025       memset(&key, 0, sizeof(key));
2026
2027       key.st = st->has_shareable_shaders ? NULL : st;
2028       st_get_fp_variant(st, p, &key);
2029       break;
2030    }
2031
2032    case GL_TESS_CONTROL_PROGRAM_NV:
2033    case GL_TESS_EVALUATION_PROGRAM_NV:
2034    case GL_GEOMETRY_PROGRAM_NV:
2035    case GL_COMPUTE_PROGRAM_NV: {
2036       struct st_program *p = st_program(prog);
2037       struct st_common_variant_key key;
2038
2039       memset(&key, 0, sizeof(key));
2040
2041       key.st = st->has_shareable_shaders ? NULL : st;
2042       st_get_common_variant(st, p, &key);
2043       break;
2044    }
2045
2046    default:
2047       assert(0);
2048    }
2049 }
2050
2051 void
2052 st_serialize_nir(struct st_program *stp)
2053 {
2054    if (!stp->serialized_nir) {
2055       struct blob blob;
2056       size_t size;
2057
2058       blob_init(&blob);
2059       nir_serialize(&blob, stp->Base.nir, false);
2060       blob_finish_get_buffer(&blob, &stp->serialized_nir, &size);
2061       stp->serialized_nir_size = size;
2062    }
2063 }
2064
2065 void
2066 st_finalize_program(struct st_context *st, struct gl_program *prog)
2067 {
2068    if (st->current_program[prog->info.stage] == prog) {
2069       if (prog->info.stage == MESA_SHADER_VERTEX)
2070          st->dirty |= ST_NEW_VERTEX_PROGRAM(st, (struct st_program *)prog);
2071       else
2072          st->dirty |= ((struct st_program *)prog)->affected_states;
2073    }
2074
2075    if (prog->nir) {
2076       nir_sweep(prog->nir);
2077
2078       /* This is only needed for ARB_vp/fp programs and when the disk cache
2079        * is disabled. If the disk cache is enabled, GLSL programs are
2080        * serialized in write_nir_to_cache.
2081        */
2082       st_serialize_nir(st_program(prog));
2083    }
2084
2085    /* Create Gallium shaders now instead of on demand. */
2086    if (ST_DEBUG & DEBUG_PRECOMPILE ||
2087        st->shader_has_one_variant[prog->info.stage])
2088       st_precompile_shader_variant(st, prog);
2089 }