src/mesa/state_tracker/st_program.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   *   Brian Paul
  31   */
  32
  33
  34 #include "main/errors.h"
  35
  36 #include "main/hash.h"
  37 #include "main/mtypes.h"
  38 #include "program/prog_parameter.h"
  39 #include "program/prog_print.h"
  40 #include "program/prog_to_nir.h"
  41 #include "program/programopt.h"
  42
  43 #include "compiler/nir/nir.h"
  44 #include "compiler/nir/nir_serialize.h"
  45 #include "draw/draw_context.h"
  46
  47 #include "pipe/p_context.h"
  48 #include "pipe/p_defines.h"
  49 #include "pipe/p_shader_tokens.h"
  50 #include "draw/draw_context.h"
  51 #include "tgsi/tgsi_dump.h"
  52 #include "tgsi/tgsi_emulate.h"
  53 #include "tgsi/tgsi_parse.h"
  54 #include "tgsi/tgsi_ureg.h"
  55
  56 #include "util/u_memory.h"
  57
  58 #include "st_debug.h"
  59 #include "st_cb_bitmap.h"
  60 #include "st_cb_drawpixels.h"
  61 #include "st_context.h"
  62 #include "st_tgsi_lower_depth_clamp.h"
  63 #include "st_tgsi_lower_yuv.h"
  64 #include "st_program.h"
  65 #include "st_mesa_to_tgsi.h"
  66 #include "st_atifs_to_tgsi.h"
  67 #include "st_nir.h"
  68 #include "st_shader_cache.h"
  69 #include "st_util.h"
  70 #include "cso_cache/cso_context.h"
  71
  72
  73 static void
  74 destroy_program_variants(struct st_context *st, struct gl_program *target);
  75
  76 static void
  77 set_affected_state_flags(uint64_t *states,
  78                          struct gl_program *prog,
  79                          uint64_t new_constants,
  80                          uint64_t new_sampler_views,
  81                          uint64_t new_samplers,
  82                          uint64_t new_images,
  83                          uint64_t new_ubos,
  84                          uint64_t new_ssbos,
  85                          uint64_t new_atomics)
  86 {
  87    if (prog->Parameters->NumParameters)
  88       *states |= new_constants;
  89
  90    if (prog->info.num_textures)
  91       *states |= new_sampler_views | new_samplers;
  92
  93    if (prog->info.num_images)
  94       *states |= new_images;
  95
  96    if (prog->info.num_ubos)
  97       *states |= new_ubos;
  98
  99    if (prog->info.num_ssbos)
 100       *states |= new_ssbos;
 101
 102    if (prog->info.num_abos)
 103       *states |= new_atomics;
 104 }
 105
 106 /**
 107  * This determines which states will be updated when the shader is bound.
 108  */
 109 void
 110 st_set_prog_affected_state_flags(struct gl_program *prog)
 111 {
 112    uint64_t *states;
 113
 114    switch (prog->info.stage) {
 115    case MESA_SHADER_VERTEX:
 116       states = &((struct st_program*)prog)->affected_states;
 117
 118       *states = ST_NEW_VS_STATE |
 119                 ST_NEW_RASTERIZER |
 120                 ST_NEW_VERTEX_ARRAYS;
 121
 122       set_affected_state_flags(states, prog,
 123                                ST_NEW_VS_CONSTANTS,
 124                                ST_NEW_VS_SAMPLER_VIEWS,
 125                                ST_NEW_VS_SAMPLERS,
 126                                ST_NEW_VS_IMAGES,
 127                                ST_NEW_VS_UBOS,
 128                                ST_NEW_VS_SSBOS,
 129                                ST_NEW_VS_ATOMICS);
 130       break;
 131
 132    case MESA_SHADER_TESS_CTRL:
 133       states = &(st_program(prog))->affected_states;
 134
 135       *states = ST_NEW_TCS_STATE;
 136
 137       set_affected_state_flags(states, prog,
 138                                ST_NEW_TCS_CONSTANTS,
 139                                ST_NEW_TCS_SAMPLER_VIEWS,
 140                                ST_NEW_TCS_SAMPLERS,
 141                                ST_NEW_TCS_IMAGES,
 142                                ST_NEW_TCS_UBOS,
 143                                ST_NEW_TCS_SSBOS,
 144                                ST_NEW_TCS_ATOMICS);
 145       break;
 146
 147    case MESA_SHADER_TESS_EVAL:
 148       states = &(st_program(prog))->affected_states;
 149
 150       *states = ST_NEW_TES_STATE |
 151                 ST_NEW_RASTERIZER;
 152
 153       set_affected_state_flags(states, prog,
 154                                ST_NEW_TES_CONSTANTS,
 155                                ST_NEW_TES_SAMPLER_VIEWS,
 156                                ST_NEW_TES_SAMPLERS,
 157                                ST_NEW_TES_IMAGES,
 158                                ST_NEW_TES_UBOS,
 159                                ST_NEW_TES_SSBOS,
 160                                ST_NEW_TES_ATOMICS);
 161       break;
 162
 163    case MESA_SHADER_GEOMETRY:
 164       states = &(st_program(prog))->affected_states;
 165
 166       *states = ST_NEW_GS_STATE |
 167                 ST_NEW_RASTERIZER;
 168
 169       set_affected_state_flags(states, prog,
 170                                ST_NEW_GS_CONSTANTS,
 171                                ST_NEW_GS_SAMPLER_VIEWS,
 172                                ST_NEW_GS_SAMPLERS,
 173                                ST_NEW_GS_IMAGES,
 174                                ST_NEW_GS_UBOS,
 175                                ST_NEW_GS_SSBOS,
 176                                ST_NEW_GS_ATOMICS);
 177       break;
 178
 179    case MESA_SHADER_FRAGMENT:
 180       states = &((struct st_program*)prog)->affected_states;
 181
 182       /* gl_FragCoord and glDrawPixels always use constants. */
 183       *states = ST_NEW_FS_STATE |
 184                 ST_NEW_SAMPLE_SHADING |
 185                 ST_NEW_FS_CONSTANTS;
 186
 187       set_affected_state_flags(states, prog,
 188                                ST_NEW_FS_CONSTANTS,
 189                                ST_NEW_FS_SAMPLER_VIEWS,
 190                                ST_NEW_FS_SAMPLERS,
 191                                ST_NEW_FS_IMAGES,
 192                                ST_NEW_FS_UBOS,
 193                                ST_NEW_FS_SSBOS,
 194                                ST_NEW_FS_ATOMICS);
 195       break;
 196
 197    case MESA_SHADER_COMPUTE:
 198       states = &((struct st_program*)prog)->affected_states;
 199
 200       *states = ST_NEW_CS_STATE;
 201
 202       set_affected_state_flags(states, prog,
 203                                ST_NEW_CS_CONSTANTS,
 204                                ST_NEW_CS_SAMPLER_VIEWS,
 205                                ST_NEW_CS_SAMPLERS,
 206                                ST_NEW_CS_IMAGES,
 207                                ST_NEW_CS_UBOS,
 208                                ST_NEW_CS_SSBOS,
 209                                ST_NEW_CS_ATOMICS);
 210       break;
 211
 212    default:
 213       unreachable("unhandled shader stage");
 214    }
 215 }
 216
 217
 218 /**
 219  * Delete a shader variant.  Note the caller must unlink the variant from
 220  * the linked list.
 221  */
 222 static void
 223 delete_variant(struct st_context *st, struct st_variant *v, GLenum target)
 224 {
 225    if (v->driver_shader) {
 226       if (target == GL_VERTEX_PROGRAM_ARB &&
 227           ((struct st_common_variant*)v)->key.is_draw_shader) {
 228          /* Draw shader. */
 229          draw_delete_vertex_shader(st->draw, v->driver_shader);
 230       } else if (st->has_shareable_shaders || v->st == st) {
 231          /* The shader's context matches the calling context, or we
 232           * don't care.
 233           */
 234          switch (target) {
 235          case GL_VERTEX_PROGRAM_ARB:
 236             st->pipe->delete_vs_state(st->pipe, v->driver_shader);
 237             break;
 238          case GL_TESS_CONTROL_PROGRAM_NV:
 239             st->pipe->delete_tcs_state(st->pipe, v->driver_shader);
 240             break;
 241          case GL_TESS_EVALUATION_PROGRAM_NV:
 242             st->pipe->delete_tes_state(st->pipe, v->driver_shader);
 243             break;
 244          case GL_GEOMETRY_PROGRAM_NV:
 245             st->pipe->delete_gs_state(st->pipe, v->driver_shader);
 246             break;
 247          case GL_FRAGMENT_PROGRAM_ARB:
 248             st->pipe->delete_fs_state(st->pipe, v->driver_shader);
 249             break;
 250          case GL_COMPUTE_PROGRAM_NV:
 251             st->pipe->delete_compute_state(st->pipe, v->driver_shader);
 252             break;
 253          default:
 254             unreachable("bad shader type in delete_basic_variant");
 255          }
 256       } else {
 257          /* We can't delete a shader with a context different from the one
 258           * that created it.  Add it to the creating context's zombie list.
 259           */
 260          enum pipe_shader_type type =
 261             pipe_shader_type_from_mesa(_mesa_program_enum_to_shader_stage(target));
 262
 263          st_save_zombie_shader(v->st, type, v->driver_shader);
 264       }
 265    }
 266
 267    free(v);
 268 }
 269
 270 static void
 271 st_unbind_program(struct st_context *st, struct st_program *p)
 272 {
 273    /* Unbind the shader in cso_context and re-bind in st/mesa. */
 274    switch (p->Base.info.stage) {
 275    case MESA_SHADER_VERTEX:
 276       cso_set_vertex_shader_handle(st->cso_context, NULL);
 277       st->dirty |= ST_NEW_VS_STATE;
 278       break;
 279    case MESA_SHADER_TESS_CTRL:
 280       cso_set_tessctrl_shader_handle(st->cso_context, NULL);
 281       st->dirty |= ST_NEW_TCS_STATE;
 282       break;
 283    case MESA_SHADER_TESS_EVAL:
 284       cso_set_tesseval_shader_handle(st->cso_context, NULL);
 285       st->dirty |= ST_NEW_TES_STATE;
 286       break;
 287    case MESA_SHADER_GEOMETRY:
 288       cso_set_geometry_shader_handle(st->cso_context, NULL);
 289       st->dirty |= ST_NEW_GS_STATE;
 290       break;
 291    case MESA_SHADER_FRAGMENT:
 292       cso_set_fragment_shader_handle(st->cso_context, NULL);
 293       st->dirty |= ST_NEW_FS_STATE;
 294       break;
 295    case MESA_SHADER_COMPUTE:
 296       cso_set_compute_shader_handle(st->cso_context, NULL);
 297       st->dirty |= ST_NEW_CS_STATE;
 298       break;
 299    default:
 300       unreachable("invalid shader type");
 301    }
 302 }
 303
 304 /**
 305  * Free all basic program variants.
 306  */
 307 void
 308 st_release_variants(struct st_context *st, struct st_program *p)
 309 {
 310    struct st_variant *v;
 311
 312    /* If we are releasing shaders, re-bind them, because we don't
 313     * know which shaders are bound in the driver.
 314     */
 315    if (p->variants)
 316       st_unbind_program(st, p);
 317
 318    for (v = p->variants; v; ) {
 319       struct st_variant *next = v->next;
 320       delete_variant(st, v, p->Base.Target);
 321       v = next;
 322    }
 323
 324    p->variants = NULL;
 325
 326    if (p->state.tokens) {
 327       ureg_free_tokens(p->state.tokens);
 328       p->state.tokens = NULL;
 329    }
 330
 331    /* Note: Any setup of ->ir.nir that has had pipe->create_*_state called on
 332     * it has resulted in the driver taking ownership of the NIR.  Those
 333     * callers should be NULLing out the nir field in any pipe_shader_state
 334     * that might have this called in order to indicate that.
 335     *
 336     * GLSL IR and ARB programs will have set gl_program->nir to the same
 337     * shader as ir->ir.nir, so it will be freed by _mesa_delete_program().
 338     */
 339 }
 340
 341 /**
 342  * Free all basic program variants and unref program.
 343  */
 344 void
 345 st_release_program(struct st_context *st, struct st_program **p)
 346 {
 347    if (!*p)
 348       return;
 349
 350    destroy_program_variants(st, &((*p)->Base));
 351    st_reference_prog(st, p, NULL);
 352 }
 353
 354 void
 355 st_finalize_nir_before_variants(struct nir_shader *nir)
 356 {
 357    NIR_PASS_V(nir, nir_opt_access);
 358
 359    NIR_PASS_V(nir, nir_split_var_copies);
 360    NIR_PASS_V(nir, nir_lower_var_copies);
 361    if (nir->options->lower_all_io_to_temps ||
 362        nir->options->lower_all_io_to_elements ||
 363        nir->info.stage == MESA_SHADER_VERTEX ||
 364        nir->info.stage == MESA_SHADER_GEOMETRY) {
 365       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
 366    } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
 367       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
 368    }
 369
 370    st_nir_assign_vs_in_locations(nir);
 371 }
 372
 373 /**
 374  * Translate ARB (asm) program to NIR
 375  */
 376 static nir_shader *
 377 st_translate_prog_to_nir(struct st_context *st, struct gl_program *prog,
 378                          gl_shader_stage stage)
 379 {
 380    struct pipe_screen *screen = st->pipe->screen;
 381    const struct gl_shader_compiler_options *options =
 382       &st->ctx->Const.ShaderCompilerOptions[stage];
 383
 384    /* Translate to NIR */
 385    nir_shader *nir = prog_to_nir(prog, options->NirOptions);
 386    NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
 387    nir_validate_shader(nir, "after st/ptn lower_regs_to_ssa");
 388
 389    NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, prog, screen);
 390    NIR_PASS_V(nir, nir_lower_system_values);
 391    NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
 392
 393    /* Optimise NIR */
 394    NIR_PASS_V(nir, nir_opt_constant_folding);
 395    st_nir_opts(nir);
 396    st_finalize_nir_before_variants(nir);
 397
 398    if (st->allow_st_finalize_nir_twice)
 399       st_finalize_nir(st, prog, NULL, nir, true);
 400
 401    nir_validate_shader(nir, "after st/glsl finalize_nir");
 402
 403    return nir;
 404 }
 405
 406 void
 407 st_prepare_vertex_program(struct st_program *stp)
 408 {
 409    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 410
 411    stvp->num_inputs = 0;
 412    memset(stvp->input_to_index, ~0, sizeof(stvp->input_to_index));
 413    memset(stvp->result_to_output, ~0, sizeof(stvp->result_to_output));
 414
 415    /* Determine number of inputs, the mappings between VERT_ATTRIB_x
 416     * and TGSI generic input indexes, plus input attrib semantic info.
 417     */
 418    for (unsigned attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
 419       if ((stp->Base.info.inputs_read & BITFIELD64_BIT(attr)) != 0) {
 420          stvp->input_to_index[attr] = stvp->num_inputs;
 421          stvp->index_to_input[stvp->num_inputs] = attr;
 422          stvp->num_inputs++;
 423
 424          if ((stp->Base.DualSlotInputs & BITFIELD64_BIT(attr)) != 0) {
 425             /* add placeholder for second part of a double attribute */
 426             stvp->index_to_input[stvp->num_inputs] = ST_DOUBLE_ATTRIB_PLACEHOLDER;
 427             stvp->num_inputs++;
 428          }
 429       }
 430    }
 431    /* pre-setup potentially unused edgeflag input */
 432    stvp->input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
 433    stvp->index_to_input[stvp->num_inputs] = VERT_ATTRIB_EDGEFLAG;
 434
 435    /* Compute mapping of vertex program outputs to slots. */
 436    unsigned num_outputs = 0;
 437    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 438       if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr))
 439          stvp->result_to_output[attr] = num_outputs++;
 440    }
 441    /* pre-setup potentially unused edgeflag output */
 442    stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs;
 443 }
 444
 445 void
 446 st_translate_stream_output_info(struct gl_program *prog)
 447 {
 448    struct gl_transform_feedback_info *info = prog->sh.LinkedTransformFeedback;
 449    if (!info)
 450       return;
 451
 452    /* Determine the (default) output register mapping for each output. */
 453    unsigned num_outputs = 0;
 454    ubyte output_mapping[VARYING_SLOT_TESS_MAX];
 455    memset(output_mapping, 0, sizeof(output_mapping));
 456
 457    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 458       if (prog->info.outputs_written & BITFIELD64_BIT(attr))
 459          output_mapping[attr] = num_outputs++;
 460    }
 461
 462    /* Translate stream output info. */
 463    struct pipe_stream_output_info *so_info =
 464       &((struct st_program*)prog)->state.stream_output;
 465
 466    for (unsigned i = 0; i < info->NumOutputs; i++) {
 467       so_info->output[i].register_index =
 468          output_mapping[info->Outputs[i].OutputRegister];
 469       so_info->output[i].start_component = info->Outputs[i].ComponentOffset;
 470       so_info->output[i].num_components = info->Outputs[i].NumComponents;
 471       so_info->output[i].output_buffer = info->Outputs[i].OutputBuffer;
 472       so_info->output[i].dst_offset = info->Outputs[i].DstOffset;
 473       so_info->output[i].stream = info->Outputs[i].StreamId;
 474    }
 475
 476    for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
 477       so_info->stride[i] = info->Buffers[i].Stride;
 478    }
 479    so_info->num_outputs = info->NumOutputs;
 480 }
 481
 482 /**
 483  * Translate a vertex program.
 484  */
 485 bool
 486 st_translate_vertex_program(struct st_context *st,
 487                             struct st_program *stp)
 488 {
 489    struct ureg_program *ureg;
 490    enum pipe_error error;
 491    unsigned num_outputs = 0;
 492    unsigned attr;
 493    ubyte output_semantic_name[VARYING_SLOT_MAX] = {0};
 494    ubyte output_semantic_index[VARYING_SLOT_MAX] = {0};
 495
 496    if (stp->Base.arb.IsPositionInvariant)
 497       _mesa_insert_mvp_code(st->ctx, &stp->Base);
 498
 499    /* ARB_vp: */
 500    if (!stp->glsl_to_tgsi) {
 501       _mesa_remove_output_reads(&stp->Base, PROGRAM_OUTPUT);
 502
 503       /* This determines which states will be updated when the assembly
 504        * shader is bound.
 505        */
 506       stp->affected_states = ST_NEW_VS_STATE |
 507                               ST_NEW_RASTERIZER |
 508                               ST_NEW_VERTEX_ARRAYS;
 509
 510       if (stp->Base.Parameters->NumParameters)
 511          stp->affected_states |= ST_NEW_VS_CONSTANTS;
 512
 513       /* Translate to NIR if preferred. */
 514       if (st->pipe->screen->get_shader_param(st->pipe->screen,
 515                                              PIPE_SHADER_VERTEX,
 516                                              PIPE_SHADER_CAP_PREFERRED_IR)) {
 517          assert(!stp->glsl_to_tgsi);
 518
 519          if (stp->Base.nir)
 520             ralloc_free(stp->Base.nir);
 521
 522          if (stp->serialized_nir) {
 523             free(stp->serialized_nir);
 524             stp->serialized_nir = NULL;
 525          }
 526
 527          stp->state.type = PIPE_SHADER_IR_NIR;
 528          stp->Base.nir = st_translate_prog_to_nir(st, &stp->Base,
 529                                                   MESA_SHADER_VERTEX);
 530
 531          /* We must update stp->Base.info after translation and before
 532           * st_prepare_vertex_program is called, because inputs_read
 533           * may become outdated after NIR optimization passes.
 534           *
 535           * For ffvp/ARB_vp inputs_read is populated based
 536           * on declared attributes without taking their usage into
 537           * consideration. When creating shader variants we expect
 538           * that their inputs_read would match the base ones for
 539           * input mapping to work properly.
 540           */
 541          nir_shader_gather_info(stp->Base.nir,
 542                                 nir_shader_get_entrypoint(stp->Base.nir));
 543          st_nir_assign_vs_in_locations(stp->Base.nir);
 544          stp->Base.info = stp->Base.nir->info;
 545
 546          /* For st_draw_feedback, we need to generate TGSI too if draw doesn't
 547           * use LLVM.
 548           */
 549          /* TODO: Draw can't handle lowered IO. */
 550          if (draw_has_llvm() && !stp->Base.info.io_lowered) {
 551             st_prepare_vertex_program(stp);
 552             return true;
 553          }
 554       }
 555    }
 556
 557    st_prepare_vertex_program(stp);
 558
 559    /* Get semantic names and indices. */
 560    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 561       if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr)) {
 562          unsigned slot = num_outputs++;
 563          unsigned semantic_name, semantic_index;
 564          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
 565                                       &semantic_name, &semantic_index);
 566          output_semantic_name[slot] = semantic_name;
 567          output_semantic_index[slot] = semantic_index;
 568       }
 569    }
 570    /* pre-setup potentially unused edgeflag output */
 571    output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
 572    output_semantic_index[num_outputs] = 0;
 573
 574    ureg = ureg_create_with_screen(PIPE_SHADER_VERTEX, st->pipe->screen);
 575    if (ureg == NULL)
 576       return false;
 577
 578    if (stp->Base.info.clip_distance_array_size)
 579       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
 580                     stp->Base.info.clip_distance_array_size);
 581    if (stp->Base.info.cull_distance_array_size)
 582       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
 583                     stp->Base.info.cull_distance_array_size);
 584
 585    if (ST_DEBUG & DEBUG_MESA) {
 586       _mesa_print_program(&stp->Base);
 587       _mesa_print_program_parameters(st->ctx, &stp->Base);
 588       debug_printf("\n");
 589    }
 590
 591    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 592
 593    if (stp->glsl_to_tgsi) {
 594       error = st_translate_program(st->ctx,
 595                                    PIPE_SHADER_VERTEX,
 596                                    ureg,
 597                                    stp->glsl_to_tgsi,
 598                                    &stp->Base,
 599                                    /* inputs */
 600                                    stvp->num_inputs,
 601                                    stvp->input_to_index,
 602                                    NULL, /* inputSlotToAttr */
 603                                    NULL, /* input semantic name */
 604                                    NULL, /* input semantic index */
 605                                    NULL, /* interp mode */
 606                                    /* outputs */
 607                                    num_outputs,
 608                                    stvp->result_to_output,
 609                                    output_semantic_name,
 610                                    output_semantic_index);
 611
 612       st_translate_stream_output_info(&stp->Base);
 613
 614       free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
 615    } else
 616       error = st_translate_mesa_program(st->ctx,
 617                                         PIPE_SHADER_VERTEX,
 618                                         ureg,
 619                                         &stp->Base,
 620                                         /* inputs */
 621                                         stvp->num_inputs,
 622                                         stvp->input_to_index,
 623                                         NULL, /* input semantic name */
 624                                         NULL, /* input semantic index */
 625                                         NULL,
 626                                         /* outputs */
 627                                         num_outputs,
 628                                         stvp->result_to_output,
 629                                         output_semantic_name,
 630                                         output_semantic_index);
 631
 632    if (error) {
 633       debug_printf("%s: failed to translate Mesa program:\n", __func__);
 634       _mesa_print_program(&stp->Base);
 635       debug_assert(0);
 636       return false;
 637    }
 638
 639    stp->state.tokens = ureg_get_tokens(ureg, NULL);
 640    ureg_destroy(ureg);
 641
 642    if (stp->glsl_to_tgsi) {
 643       stp->glsl_to_tgsi = NULL;
 644       st_store_ir_in_disk_cache(st, &stp->Base, false);
 645    }
 646
 647    return stp->state.tokens != NULL;
 648 }
 649
 650 static struct nir_shader *
 651 get_nir_shader(struct st_context *st, struct st_program *stp)
 652 {
 653    if (stp->Base.nir) {
 654       nir_shader *nir = stp->Base.nir;
 655
 656       /* The first shader variant takes ownership of NIR, so that there is
 657        * no cloning. Additional shader variants are always generated from
 658        * serialized NIR to save memory.
 659        */
 660       stp->Base.nir = NULL;
 661       assert(stp->serialized_nir && stp->serialized_nir_size);
 662       return nir;
 663    }
 664
 665    struct blob_reader blob_reader;
 666    const struct nir_shader_compiler_options *options =
 667       st->ctx->Const.ShaderCompilerOptions[stp->Base.info.stage].NirOptions;
 668
 669    blob_reader_init(&blob_reader, stp->serialized_nir, stp->serialized_nir_size);
 670    return nir_deserialize(NULL, options, &blob_reader);
 671 }
 672
 673 static void
 674 lower_ucp(struct st_context *st,
 675           struct nir_shader *nir,
 676           unsigned ucp_enables,
 677           struct gl_program_parameter_list *params)
 678 {
 679    if (nir->info.outputs_written & VARYING_BIT_CLIP_DIST0)
 680       NIR_PASS_V(nir, nir_lower_clip_disable, ucp_enables);
 681    else {
 682       struct pipe_screen *screen = st->pipe->screen;
 683       bool can_compact = screen->get_param(screen,
 684                                            PIPE_CAP_NIR_COMPACT_ARRAYS);
 685       bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;
 686
 687       gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH];
 688       for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
 689          if (use_eye) {
 690             clipplane_state[i][0] = STATE_CLIPPLANE;
 691             clipplane_state[i][1] = i;
 692          } else {
 693             clipplane_state[i][0] = STATE_INTERNAL;
 694             clipplane_state[i][1] = STATE_CLIP_INTERNAL;
 695             clipplane_state[i][2] = i;
 696          }
 697          _mesa_add_state_reference(params, clipplane_state[i]);
 698       }
 699
 700       if (nir->info.stage == MESA_SHADER_VERTEX) {
 701          NIR_PASS_V(nir, nir_lower_clip_vs, ucp_enables,
 702                     true, can_compact, clipplane_state);
 703       } else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
 704          NIR_PASS_V(nir, nir_lower_clip_gs, ucp_enables,
 705                     can_compact, clipplane_state);
 706       }
 707
 708       NIR_PASS_V(nir, nir_lower_io_to_temporaries,
 709                  nir_shader_get_entrypoint(nir), true, false);
 710       NIR_PASS_V(nir, nir_lower_global_vars_to_local);
 711    }
 712 }
 713
 714 static const gl_state_index16 depth_range_state[STATE_LENGTH] =
 715    { STATE_DEPTH_RANGE };
 716
 717 static struct st_common_variant *
 718 st_create_vp_variant(struct st_context *st,
 719                      struct st_program *stvp,
 720                      const struct st_common_variant_key *key)
 721 {
 722    struct st_common_variant *vpv = CALLOC_STRUCT(st_common_variant);
 723    struct pipe_context *pipe = st->pipe;
 724    struct pipe_shader_state state = {0};
 725
 726    static const gl_state_index16 point_size_state[STATE_LENGTH] =
 727       { STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED, 0 };
 728    struct gl_program_parameter_list *params = stvp->Base.Parameters;
 729
 730    vpv->key = *key;
 731
 732    state.stream_output = stvp->state.stream_output;
 733
 734    if (stvp->state.type == PIPE_SHADER_IR_NIR &&
 735        (!key->is_draw_shader ||
 736         /* TODO: Draw can't handle lowered IO. */
 737         (draw_has_llvm() && !stvp->Base.info.io_lowered))) {
 738       bool finalize = false;
 739
 740       state.type = PIPE_SHADER_IR_NIR;
 741       state.ir.nir = get_nir_shader(st, stvp);
 742       if (key->clamp_color) {
 743          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
 744          finalize = true;
 745       }
 746       if (key->passthrough_edgeflags) {
 747          NIR_PASS_V(state.ir.nir, nir_lower_passthrough_edgeflags);
 748          finalize = true;
 749       }
 750
 751       if (key->lower_point_size) {
 752          _mesa_add_state_reference(params, point_size_state);
 753          NIR_PASS_V(state.ir.nir, nir_lower_point_size_mov,
 754                     point_size_state);
 755          finalize = true;
 756       }
 757
 758       if (key->lower_ucp) {
 759          lower_ucp(st, state.ir.nir, key->lower_ucp, params);
 760          finalize = true;
 761       }
 762
 763       if (finalize || !st->allow_st_finalize_nir_twice) {
 764          st_finalize_nir(st, &stvp->Base, stvp->shader_program, state.ir.nir,
 765                          true);
 766
 767          /* Some of the lowering above may have introduced new varyings */
 768          nir_shader_gather_info(state.ir.nir,
 769                                 nir_shader_get_entrypoint(state.ir.nir));
 770       }
 771
 772       if (ST_DEBUG & DEBUG_PRINT_IR)
 773          nir_print_shader(state.ir.nir, stderr);
 774
 775       if (key->is_draw_shader)
 776          vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
 777       else
 778          vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
 779
 780       return vpv;
 781    }
 782
 783    state.type = PIPE_SHADER_IR_TGSI;
 784    state.tokens = tgsi_dup_tokens(stvp->state.tokens);
 785
 786    /* Emulate features. */
 787    if (key->clamp_color || key->passthrough_edgeflags) {
 788       const struct tgsi_token *tokens;
 789       unsigned flags =
 790          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
 791          (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
 792
 793       tokens = tgsi_emulate(state.tokens, flags);
 794
 795       if (tokens) {
 796          tgsi_free_tokens(state.tokens);
 797          state.tokens = tokens;
 798       } else {
 799          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
 800       }
 801    }
 802
 803    if (key->lower_depth_clamp) {
 804       unsigned depth_range_const =
 805             _mesa_add_state_reference(params, depth_range_state);
 806
 807       const struct tgsi_token *tokens;
 808       tokens = st_tgsi_lower_depth_clamp(state.tokens, depth_range_const,
 809                                          key->clip_negative_one_to_one);
 810       if (tokens != state.tokens)
 811          tgsi_free_tokens(state.tokens);
 812       state.tokens = tokens;
 813    }
 814
 815    if (ST_DEBUG & DEBUG_PRINT_IR)
 816       tgsi_dump(state.tokens, 0);
 817
 818    if (key->is_draw_shader)
 819       vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
 820    else
 821       vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
 822
 823    if (state.tokens) {
 824       tgsi_free_tokens(state.tokens);
 825    }
 826
 827    return vpv;
 828 }
 829
 830
 831 /**
 832  * Find/create a vertex program variant.
 833  */
 834 struct st_common_variant *
 835 st_get_vp_variant(struct st_context *st,
 836                   struct st_program *stp,
 837                   const struct st_common_variant_key *key)
 838 {
 839    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 840    struct st_common_variant *vpv;
 841
 842    /* Search for existing variant */
 843    for (vpv = st_common_variant(stp->variants); vpv;
 844         vpv = st_common_variant(vpv->base.next)) {
 845       if (memcmp(&vpv->key, key, sizeof(*key)) == 0) {
 846          break;
 847       }
 848    }
 849
 850    if (!vpv) {
 851       /* create now */
 852       vpv = st_create_vp_variant(st, stp, key);
 853       if (vpv) {
 854          vpv->base.st = key->st;
 855
 856          unsigned num_inputs = stvp->num_inputs + key->passthrough_edgeflags;
 857          for (unsigned index = 0; index < num_inputs; ++index) {
 858             unsigned attr = stvp->index_to_input[index];
 859             if (attr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
 860                continue;
 861             vpv->vert_attrib_mask |= 1u << attr;
 862          }
 863
 864          /* insert into list */
 865          vpv->base.next = stp->variants;
 866          stp->variants = &vpv->base;
 867       }
 868    }
 869
 870    return vpv;
 871 }
 872
 873
 874 /**
 875  * Translate a Mesa fragment shader into a TGSI shader.
 876  */
 877 bool
 878 st_translate_fragment_program(struct st_context *st,
 879                               struct st_program *stfp)
 880 {
 881    /* Non-GLSL programs: */
 882    if (!stfp->glsl_to_tgsi) {
 883       _mesa_remove_output_reads(&stfp->Base, PROGRAM_OUTPUT);
 884       if (st->ctx->Const.GLSLFragCoordIsSysVal)
 885          _mesa_program_fragment_position_to_sysval(&stfp->Base);
 886
 887       /* This determines which states will be updated when the assembly
 888        * shader is bound.
 889        *
 890        * fragment.position and glDrawPixels always use constants.
 891        */
 892       stfp->affected_states = ST_NEW_FS_STATE |
 893                               ST_NEW_SAMPLE_SHADING |
 894                               ST_NEW_FS_CONSTANTS;
 895
 896       if (stfp->ati_fs) {
 897          /* Just set them for ATI_fs unconditionally. */
 898          stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 899                                   ST_NEW_FS_SAMPLERS;
 900       } else {
 901          /* ARB_fp */
 902          if (stfp->Base.SamplersUsed)
 903             stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 904                                      ST_NEW_FS_SAMPLERS;
 905       }
 906
 907       /* Translate to NIR. */
 908       if (!stfp->ati_fs &&
 909           st->pipe->screen->get_shader_param(st->pipe->screen,
 910                                              PIPE_SHADER_FRAGMENT,
 911                                              PIPE_SHADER_CAP_PREFERRED_IR)) {
 912          nir_shader *nir =
 913             st_translate_prog_to_nir(st, &stfp->Base, MESA_SHADER_FRAGMENT);
 914
 915          if (stfp->Base.nir)
 916             ralloc_free(stfp->Base.nir);
 917          if (stfp->serialized_nir) {
 918             free(stfp->serialized_nir);
 919             stfp->serialized_nir = NULL;
 920          }
 921          stfp->state.type = PIPE_SHADER_IR_NIR;
 922          stfp->Base.nir = nir;
 923          return true;
 924       }
 925    }
 926
 927    ubyte outputMapping[2 * FRAG_RESULT_MAX];
 928    ubyte inputMapping[VARYING_SLOT_MAX];
 929    ubyte inputSlotToAttr[VARYING_SLOT_MAX];
 930    ubyte interpMode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
 931    GLuint attr;
 932    GLbitfield64 inputsRead;
 933    struct ureg_program *ureg;
 934
 935    GLboolean write_all = GL_FALSE;
 936
 937    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
 938    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
 939    uint fs_num_inputs = 0;
 940
 941    ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
 942    ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
 943    uint fs_num_outputs = 0;
 944
 945    memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
 946
 947    /*
 948     * Convert Mesa program inputs to TGSI input register semantics.
 949     */
 950    inputsRead = stfp->Base.info.inputs_read;
 951    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 952       if ((inputsRead & BITFIELD64_BIT(attr)) != 0) {
 953          const GLuint slot = fs_num_inputs++;
 954
 955          inputMapping[attr] = slot;
 956          inputSlotToAttr[slot] = attr;
 957
 958          switch (attr) {
 959          case VARYING_SLOT_POS:
 960             input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
 961             input_semantic_index[slot] = 0;
 962             interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 963             break;
 964          case VARYING_SLOT_COL0:
 965             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 966             input_semantic_index[slot] = 0;
 967             interpMode[slot] = stfp->glsl_to_tgsi ?
 968                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 969             break;
 970          case VARYING_SLOT_COL1:
 971             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 972             input_semantic_index[slot] = 1;
 973             interpMode[slot] = stfp->glsl_to_tgsi ?
 974                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 975             break;
 976          case VARYING_SLOT_FOGC:
 977             input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
 978             input_semantic_index[slot] = 0;
 979             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 980             break;
 981          case VARYING_SLOT_FACE:
 982             input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
 983             input_semantic_index[slot] = 0;
 984             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 985             break;
 986          case VARYING_SLOT_PRIMITIVE_ID:
 987             input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
 988             input_semantic_index[slot] = 0;
 989             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 990             break;
 991          case VARYING_SLOT_LAYER:
 992             input_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
 993             input_semantic_index[slot] = 0;
 994             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 995             break;
 996          case VARYING_SLOT_VIEWPORT:
 997             input_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
 998             input_semantic_index[slot] = 0;
 999             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
1000             break;
1001          case VARYING_SLOT_CLIP_DIST0:
1002             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
1003             input_semantic_index[slot] = 0;
1004             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
1005             break;
1006          case VARYING_SLOT_CLIP_DIST1:
1007             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
1008             input_semantic_index[slot] = 1;
1009             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
1010             break;
1011          case VARYING_SLOT_CULL_DIST0:
1012          case VARYING_SLOT_CULL_DIST1:
1013             /* these should have been lowered by GLSL */
1014             assert(0);
1015             break;
1016             /* In most cases, there is nothing special about these
1017              * inputs, so adopt a convention to use the generic
1018              * semantic name and the mesa VARYING_SLOT_ number as the
1019              * index.
1020              *
1021              * All that is required is that the vertex shader labels
1022              * its own outputs similarly, and that the vertex shader
1023              * generates at least every output required by the
1024              * fragment shader plus fixed-function hardware (such as
1025              * BFC).
1026              *
1027              * However, some drivers may need us to identify the PNTC and TEXi
1028              * varyings if, for example, their capability to replace them with
1029              * sprite coordinates is limited.
1030              */
1031          case VARYING_SLOT_PNTC:
1032             if (st->needs_texcoord_semantic) {
1033                input_semantic_name[slot] = TGSI_SEMANTIC_PCOORD;
1034                input_semantic_index[slot] = 0;
1035                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
1036                break;
1037             }
1038             /* fall through */
1039          case VARYING_SLOT_TEX0:
1040          case VARYING_SLOT_TEX1:
1041          case VARYING_SLOT_TEX2:
1042          case VARYING_SLOT_TEX3:
1043          case VARYING_SLOT_TEX4:
1044          case VARYING_SLOT_TEX5:
1045          case VARYING_SLOT_TEX6:
1046          case VARYING_SLOT_TEX7:
1047             if (st->needs_texcoord_semantic) {
1048                input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
1049                input_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
1050                interpMode[slot] = stfp->glsl_to_tgsi ?
1051                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
1052                break;
1053             }
1054             /* fall through */
1055          case VARYING_SLOT_VAR0:
1056          default:
1057             /* Semantic indices should be zero-based because drivers may choose
1058              * to assign a fixed slot determined by that index.
1059              * This is useful because ARB_separate_shader_objects uses location
1060              * qualifiers for linkage, and if the semantic index corresponds to
1061              * these locations, linkage passes in the driver become unecessary.
1062              *
1063              * If needs_texcoord_semantic is true, no semantic indices will be
1064              * consumed for the TEXi varyings, and we can base the locations of
1065              * the user varyings on VAR0.  Otherwise, we use TEX0 as base index.
1066              */
1067             assert(attr >= VARYING_SLOT_VAR0 || attr == VARYING_SLOT_PNTC ||
1068                    (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
1069             input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
1070             input_semantic_index[slot] = st_get_generic_varying_index(st, attr);
1071             if (attr == VARYING_SLOT_PNTC)
1072                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
1073             else {
1074                interpMode[slot] = stfp->glsl_to_tgsi ?
1075                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
1076             }
1077             break;
1078          }
1079       }
1080       else {
1081          inputMapping[attr] = -1;
1082       }
1083    }
1084
1085    /*
1086     * Semantics and mapping for outputs
1087     */
1088    GLbitfield64 outputsWritten = stfp->Base.info.outputs_written;
1089
1090    /* if z is written, emit that first */
1091    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
1092       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
1093       fs_output_semantic_index[fs_num_outputs] = 0;
1094       outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
1095       fs_num_outputs++;
1096       outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
1097    }
1098
1099    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
1100       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
1101       fs_output_semantic_index[fs_num_outputs] = 0;
1102       outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
1103       fs_num_outputs++;
1104       outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
1105    }
1106
1107    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
1108       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_SAMPLEMASK;
1109       fs_output_semantic_index[fs_num_outputs] = 0;
1110       outputMapping[FRAG_RESULT_SAMPLE_MASK] = fs_num_outputs;
1111       fs_num_outputs++;
1112       outputsWritten &= ~(1 << FRAG_RESULT_SAMPLE_MASK);
1113    }
1114
1115    /* handle remaining outputs (color) */
1116    for (attr = 0; attr < ARRAY_SIZE(outputMapping); attr++) {
1117       const GLbitfield64 written = attr < FRAG_RESULT_MAX ? outputsWritten :
1118          stfp->Base.SecondaryOutputsWritten;
1119       const unsigned loc = attr % FRAG_RESULT_MAX;
1120
1121       if (written & BITFIELD64_BIT(loc)) {
1122          switch (loc) {
1123          case FRAG_RESULT_DEPTH:
1124          case FRAG_RESULT_STENCIL:
1125          case FRAG_RESULT_SAMPLE_MASK:
1126             /* handled above */
1127             assert(0);
1128             break;
1129          case FRAG_RESULT_COLOR:
1130             write_all = GL_TRUE; /* fallthrough */
1131          default: {
1132             int index;
1133             assert(loc == FRAG_RESULT_COLOR ||
1134                    (FRAG_RESULT_DATA0 <= loc && loc < FRAG_RESULT_MAX));
1135
1136             index = (loc == FRAG_RESULT_COLOR) ? 0 : (loc - FRAG_RESULT_DATA0);
1137
1138             if (attr >= FRAG_RESULT_MAX) {
1139                /* Secondary color for dual source blending. */
1140                assert(index == 0);
1141                index++;
1142             }
1143
1144             fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
1145             fs_output_semantic_index[fs_num_outputs] = index;
1146             outputMapping[attr] = fs_num_outputs;
1147             break;
1148          }
1149          }
1150
1151          fs_num_outputs++;
1152       }
1153    }
1154
1155    ureg = ureg_create_with_screen(PIPE_SHADER_FRAGMENT, st->pipe->screen);
1156    if (ureg == NULL)
1157       return false;
1158
1159    if (ST_DEBUG & DEBUG_MESA) {
1160       _mesa_print_program(&stfp->Base);
1161       _mesa_print_program_parameters(st->ctx, &stfp->Base);
1162       debug_printf("\n");
1163    }
1164    if (write_all == GL_TRUE)
1165       ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1);
1166
1167    if (stfp->Base.info.fs.depth_layout != FRAG_DEPTH_LAYOUT_NONE) {
1168       switch (stfp->Base.info.fs.depth_layout) {
1169       case FRAG_DEPTH_LAYOUT_ANY:
1170          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1171                        TGSI_FS_DEPTH_LAYOUT_ANY);
1172          break;
1173       case FRAG_DEPTH_LAYOUT_GREATER:
1174          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1175                        TGSI_FS_DEPTH_LAYOUT_GREATER);
1176          break;
1177       case FRAG_DEPTH_LAYOUT_LESS:
1178          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1179                        TGSI_FS_DEPTH_LAYOUT_LESS);
1180          break;
1181       case FRAG_DEPTH_LAYOUT_UNCHANGED:
1182          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1183                        TGSI_FS_DEPTH_LAYOUT_UNCHANGED);
1184          break;
1185       default:
1186          assert(0);
1187       }
1188    }
1189
1190    if (stfp->glsl_to_tgsi) {
1191       st_translate_program(st->ctx,
1192                            PIPE_SHADER_FRAGMENT,
1193                            ureg,
1194                            stfp->glsl_to_tgsi,
1195                            &stfp->Base,
1196                            /* inputs */
1197                            fs_num_inputs,
1198                            inputMapping,
1199                            inputSlotToAttr,
1200                            input_semantic_name,
1201                            input_semantic_index,
1202                            interpMode,
1203                            /* outputs */
1204                            fs_num_outputs,
1205                            outputMapping,
1206                            fs_output_semantic_name,
1207                            fs_output_semantic_index);
1208
1209       free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
1210    } else if (stfp->ati_fs)
1211       st_translate_atifs_program(ureg,
1212                                  stfp->ati_fs,
1213                                  &stfp->Base,
1214                                  /* inputs */
1215                                  fs_num_inputs,
1216                                  inputMapping,
1217                                  input_semantic_name,
1218                                  input_semantic_index,
1219                                  interpMode,
1220                                  /* outputs */
1221                                  fs_num_outputs,
1222                                  outputMapping,
1223                                  fs_output_semantic_name,
1224                                  fs_output_semantic_index);
1225    else
1226       st_translate_mesa_program(st->ctx,
1227                                 PIPE_SHADER_FRAGMENT,
1228                                 ureg,
1229                                 &stfp->Base,
1230                                 /* inputs */
1231                                 fs_num_inputs,
1232                                 inputMapping,
1233                                 input_semantic_name,
1234                                 input_semantic_index,
1235                                 interpMode,
1236                                 /* outputs */
1237                                 fs_num_outputs,
1238                                 outputMapping,
1239                                 fs_output_semantic_name,
1240                                 fs_output_semantic_index);
1241
1242    stfp->state.tokens = ureg_get_tokens(ureg, NULL);
1243    ureg_destroy(ureg);
1244
1245    if (stfp->glsl_to_tgsi) {
1246       stfp->glsl_to_tgsi = NULL;
1247       st_store_ir_in_disk_cache(st, &stfp->Base, false);
1248    }
1249
1250    return stfp->state.tokens != NULL;
1251 }
1252
1253 static struct st_fp_variant *
1254 st_create_fp_variant(struct st_context *st,
1255                      struct st_program *stfp,
1256                      const struct st_fp_variant_key *key)
1257 {
1258    struct pipe_context *pipe = st->pipe;
1259    struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
1260    struct pipe_shader_state state = {0};
1261    struct gl_program_parameter_list *params = stfp->Base.Parameters;
1262    static const gl_state_index16 texcoord_state[STATE_LENGTH] =
1263       { STATE_INTERNAL, STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
1264    static const gl_state_index16 scale_state[STATE_LENGTH] =
1265       { STATE_INTERNAL, STATE_PT_SCALE };
1266    static const gl_state_index16 bias_state[STATE_LENGTH] =
1267       { STATE_INTERNAL, STATE_PT_BIAS };
1268    static const gl_state_index16 alpha_ref_state[STATE_LENGTH] =
1269       { STATE_INTERNAL, STATE_ALPHA_REF };
1270
1271    if (!variant)
1272       return NULL;
1273
1274    if (stfp->state.type == PIPE_SHADER_IR_NIR) {
1275       bool finalize = false;
1276
1277       state.type = PIPE_SHADER_IR_NIR;
1278       state.ir.nir = get_nir_shader(st, stfp);
1279
1280       if (key->clamp_color) {
1281          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1282          finalize = true;
1283       }
1284
1285       if (key->lower_flatshade) {
1286          NIR_PASS_V(state.ir.nir, nir_lower_flatshade);
1287          finalize = true;
1288       }
1289
1290       if (key->lower_alpha_func != COMPARE_FUNC_NEVER) {
1291          _mesa_add_state_reference(params, alpha_ref_state);
1292          NIR_PASS_V(state.ir.nir, nir_lower_alpha_test, key->lower_alpha_func,
1293                     false, alpha_ref_state);
1294          finalize = true;
1295       }
1296
1297       if (key->lower_two_sided_color) {
1298          bool face_sysval = st->ctx->Const.GLSLFrontFacingIsSysVal;
1299          NIR_PASS_V(state.ir.nir, nir_lower_two_sided_color, face_sysval);
1300          finalize = true;
1301       }
1302
1303       if (key->persample_shading) {
1304           nir_shader *shader = state.ir.nir;
1305           nir_foreach_shader_in_variable(var, shader)
1306              var->data.sample = true;
1307           finalize = true;
1308       }
1309
1310       assert(!(key->bitmap && key->drawpixels));
1311
1312       /* glBitmap */
1313       if (key->bitmap) {
1314          nir_lower_bitmap_options options = {0};
1315
1316          variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1317          options.sampler = variant->bitmap_sampler;
1318          options.swizzle_xxxx = st->bitmap.tex_format == PIPE_FORMAT_R8_UNORM;
1319
1320          NIR_PASS_V(state.ir.nir, nir_lower_bitmap, &options);
1321          finalize = true;
1322       }
1323
1324       /* glDrawPixels (color only) */
1325       if (key->drawpixels) {
1326          nir_lower_drawpixels_options options = {{0}};
1327          unsigned samplers_used = stfp->Base.SamplersUsed;
1328
1329          /* Find the first unused slot. */
1330          variant->drawpix_sampler = ffs(~samplers_used) - 1;
1331          options.drawpix_sampler = variant->drawpix_sampler;
1332          samplers_used |= (1 << variant->drawpix_sampler);
1333
1334          options.pixel_maps = key->pixelMaps;
1335          if (key->pixelMaps) {
1336             variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1337             options.pixelmap_sampler = variant->pixelmap_sampler;
1338          }
1339
1340          options.scale_and_bias = key->scaleAndBias;
1341          if (key->scaleAndBias) {
1342             _mesa_add_state_reference(params, scale_state);
1343             memcpy(options.scale_state_tokens, scale_state,
1344                    sizeof(options.scale_state_tokens));
1345             _mesa_add_state_reference(params, bias_state);
1346             memcpy(options.bias_state_tokens, bias_state,
1347                    sizeof(options.bias_state_tokens));
1348          }
1349
1350          _mesa_add_state_reference(params, texcoord_state);
1351          memcpy(options.texcoord_state_tokens, texcoord_state,
1352                 sizeof(options.texcoord_state_tokens));
1353
1354          NIR_PASS_V(state.ir.nir, nir_lower_drawpixels, &options);
1355          finalize = true;
1356       }
1357
1358       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1359                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1360                    key->external.lower_ayuv || key->external.lower_xyuv)) {
1361
1362          st_nir_lower_samplers(pipe->screen, state.ir.nir,
1363                                stfp->shader_program, &stfp->Base);
1364
1365          nir_lower_tex_options options = {0};
1366          options.lower_y_uv_external = key->external.lower_nv12;
1367          options.lower_y_u_v_external = key->external.lower_iyuv;
1368          options.lower_xy_uxvx_external = key->external.lower_xy_uxvx;
1369          options.lower_yx_xuxv_external = key->external.lower_yx_xuxv;
1370          options.lower_ayuv_external = key->external.lower_ayuv;
1371          options.lower_xyuv_external = key->external.lower_xyuv;
1372          NIR_PASS_V(state.ir.nir, nir_lower_tex, &options);
1373          finalize = true;
1374       }
1375
1376       if (finalize || !st->allow_st_finalize_nir_twice) {
1377          st_finalize_nir(st, &stfp->Base, stfp->shader_program, state.ir.nir,
1378                          false);
1379       }
1380
1381       /* This pass needs to happen *after* nir_lower_sampler */
1382       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1383                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1384                    key->external.lower_ayuv || key->external.lower_xyuv)) {
1385          NIR_PASS_V(state.ir.nir, st_nir_lower_tex_src_plane,
1386                     ~stfp->Base.SamplersUsed,
1387                     key->external.lower_nv12 || key->external.lower_xy_uxvx ||
1388                        key->external.lower_yx_xuxv,
1389                     key->external.lower_iyuv);
1390          finalize = true;
1391       }
1392
1393       if (finalize || !st->allow_st_finalize_nir_twice) {
1394          /* Some of the lowering above may have introduced new varyings */
1395          nir_shader_gather_info(state.ir.nir,
1396                                 nir_shader_get_entrypoint(state.ir.nir));
1397
1398          struct pipe_screen *screen = pipe->screen;
1399          if (screen->finalize_nir)
1400             screen->finalize_nir(screen, state.ir.nir, false);
1401       }
1402
1403       if (ST_DEBUG & DEBUG_PRINT_IR)
1404          nir_print_shader(state.ir.nir, stderr);
1405
1406       variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1407       variant->key = *key;
1408
1409       return variant;
1410    }
1411
1412    state.tokens = stfp->state.tokens;
1413
1414    assert(!(key->bitmap && key->drawpixels));
1415
1416    /* Fix texture targets and add fog for ATI_fs */
1417    if (stfp->ati_fs) {
1418       const struct tgsi_token *tokens = st_fixup_atifs(state.tokens, key);
1419
1420       if (tokens)
1421          state.tokens = tokens;
1422       else
1423          fprintf(stderr, "mesa: cannot post-process ATI_fs\n");
1424    }
1425
1426    /* Emulate features. */
1427    if (key->clamp_color || key->persample_shading) {
1428       const struct tgsi_token *tokens;
1429       unsigned flags =
1430          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
1431          (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
1432
1433       tokens = tgsi_emulate(state.tokens, flags);
1434
1435       if (tokens) {
1436          if (state.tokens != stfp->state.tokens)
1437             tgsi_free_tokens(state.tokens);
1438          state.tokens = tokens;
1439       } else
1440          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
1441    }
1442
1443    /* glBitmap */
1444    if (key->bitmap) {
1445       const struct tgsi_token *tokens;
1446
1447       variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1448
1449       tokens = st_get_bitmap_shader(state.tokens,
1450                                     st->internal_target,
1451                                     variant->bitmap_sampler,
1452                                     st->needs_texcoord_semantic,
1453                                     st->bitmap.tex_format ==
1454                                     PIPE_FORMAT_R8_UNORM);
1455
1456       if (tokens) {
1457          if (state.tokens != stfp->state.tokens)
1458             tgsi_free_tokens(state.tokens);
1459          state.tokens = tokens;
1460       } else
1461          fprintf(stderr, "mesa: cannot create a shader for glBitmap\n");
1462    }
1463
1464    /* glDrawPixels (color only) */
1465    if (key->drawpixels) {
1466       const struct tgsi_token *tokens;
1467       unsigned scale_const = 0, bias_const = 0, texcoord_const = 0;
1468
1469       /* Find the first unused slot. */
1470       variant->drawpix_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1471
1472       if (key->pixelMaps) {
1473          unsigned samplers_used = stfp->Base.SamplersUsed |
1474                                   (1 << variant->drawpix_sampler);
1475
1476          variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1477       }
1478
1479       if (key->scaleAndBias) {
1480          scale_const = _mesa_add_state_reference(params, scale_state);
1481          bias_const = _mesa_add_state_reference(params, bias_state);
1482       }
1483
1484       texcoord_const = _mesa_add_state_reference(params, texcoord_state);
1485
1486       tokens = st_get_drawpix_shader(state.tokens,
1487                                      st->needs_texcoord_semantic,
1488                                      key->scaleAndBias, scale_const,
1489                                      bias_const, key->pixelMaps,
1490                                      variant->drawpix_sampler,
1491                                      variant->pixelmap_sampler,
1492                                      texcoord_const, st->internal_target);
1493
1494       if (tokens) {
1495          if (state.tokens != stfp->state.tokens)
1496             tgsi_free_tokens(state.tokens);
1497          state.tokens = tokens;
1498       } else
1499          fprintf(stderr, "mesa: cannot create a shader for glDrawPixels\n");
1500    }
1501
1502    if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1503                 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1504       const struct tgsi_token *tokens;
1505
1506       /* samplers inserted would conflict, but this should be unpossible: */
1507       assert(!(key->bitmap || key->drawpixels));
1508
1509       tokens = st_tgsi_lower_yuv(state.tokens,
1510                                  ~stfp->Base.SamplersUsed,
1511                                  key->external.lower_nv12 ||
1512                                     key->external.lower_xy_uxvx ||
1513                                     key->external.lower_yx_xuxv,
1514                                  key->external.lower_iyuv);
1515       if (tokens) {
1516          if (state.tokens != stfp->state.tokens)
1517             tgsi_free_tokens(state.tokens);
1518          state.tokens = tokens;
1519       } else {
1520          fprintf(stderr, "mesa: cannot create a shader for samplerExternalOES\n");
1521       }
1522    }
1523
1524    if (key->lower_depth_clamp) {
1525       unsigned depth_range_const = _mesa_add_state_reference(params, depth_range_state);
1526
1527       const struct tgsi_token *tokens;
1528       tokens = st_tgsi_lower_depth_clamp_fs(state.tokens, depth_range_const);
1529       if (state.tokens != stfp->state.tokens)
1530          tgsi_free_tokens(state.tokens);
1531       state.tokens = tokens;
1532    }
1533
1534    if (ST_DEBUG & DEBUG_PRINT_IR)
1535       tgsi_dump(state.tokens, 0);
1536
1537    /* fill in variant */
1538    variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1539    variant->key = *key;
1540
1541    if (state.tokens != stfp->state.tokens)
1542       tgsi_free_tokens(state.tokens);
1543    return variant;
1544 }
1545
1546 /**
1547  * Translate fragment program if needed.
1548  */
1549 struct st_fp_variant *
1550 st_get_fp_variant(struct st_context *st,
1551                   struct st_program *stfp,
1552                   const struct st_fp_variant_key *key)
1553 {
1554    struct st_fp_variant *fpv;
1555
1556    /* Search for existing variant */
1557    for (fpv = st_fp_variant(stfp->variants); fpv;
1558         fpv = st_fp_variant(fpv->base.next)) {
1559       if (memcmp(&fpv->key, key, sizeof(*key)) == 0) {
1560          break;
1561       }
1562    }
1563
1564    if (!fpv) {
1565       /* create new */
1566       fpv = st_create_fp_variant(st, stfp, key);
1567       if (fpv) {
1568          fpv->base.st = key->st;
1569
1570          if (key->bitmap || key->drawpixels) {
1571             /* Regular variants should always come before the
1572              * bitmap & drawpixels variants, (unless there
1573              * are no regular variants) so that
1574              * st_update_fp can take a fast path when
1575              * shader_has_one_variant is set.
1576              */
1577             if (!stfp->variants) {
1578                stfp->variants = &fpv->base;
1579             } else {
1580                /* insert into list after the first one */
1581                fpv->base.next = stfp->variants->next;
1582                stfp->variants->next = &fpv->base;
1583             }
1584          } else {
1585             /* insert into list */
1586             fpv->base.next = stfp->variants;
1587             stfp->variants = &fpv->base;
1588          }
1589       }
1590    }
1591
1592    return fpv;
1593 }
1594
1595 /**
1596  * Translate a program. This is common code for geometry and tessellation
1597  * shaders.
1598  */
1599 bool
1600 st_translate_common_program(struct st_context *st,
1601                             struct st_program *stp)
1602 {
1603    struct gl_program *prog = &stp->Base;
1604    enum pipe_shader_type stage =
1605       pipe_shader_type_from_mesa(stp->Base.info.stage);
1606    struct ureg_program *ureg = ureg_create_with_screen(stage, st->pipe->screen);
1607
1608    if (ureg == NULL)
1609       return false;
1610
1611    switch (stage) {
1612    case PIPE_SHADER_TESS_CTRL:
1613       ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT,
1614                     stp->Base.info.tess.tcs_vertices_out);
1615       break;
1616
1617    case PIPE_SHADER_TESS_EVAL:
1618       if (stp->Base.info.tess.primitive_mode == GL_ISOLINES)
1619          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, GL_LINES);
1620       else
1621          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE,
1622                        stp->Base.info.tess.primitive_mode);
1623
1624       STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL);
1625       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 ==
1626                     PIPE_TESS_SPACING_FRACTIONAL_ODD);
1627       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 ==
1628                     PIPE_TESS_SPACING_FRACTIONAL_EVEN);
1629
1630       ureg_property(ureg, TGSI_PROPERTY_TES_SPACING,
1631                     (stp->Base.info.tess.spacing + 1) % 3);
1632
1633       ureg_property(ureg, TGSI_PROPERTY_TES_VERTEX_ORDER_CW,
1634                     !stp->Base.info.tess.ccw);
1635       ureg_property(ureg, TGSI_PROPERTY_TES_POINT_MODE,
1636                     stp->Base.info.tess.point_mode);
1637       break;
1638
1639    case PIPE_SHADER_GEOMETRY:
1640       ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM,
1641                     stp->Base.info.gs.input_primitive);
1642       ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM,
1643                     stp->Base.info.gs.output_primitive);
1644       ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
1645                     stp->Base.info.gs.vertices_out);
1646       ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS,
1647                     stp->Base.info.gs.invocations);
1648       break;
1649
1650    default:
1651       break;
1652    }
1653
1654    ubyte inputSlotToAttr[VARYING_SLOT_TESS_MAX];
1655    ubyte inputMapping[VARYING_SLOT_TESS_MAX];
1656    ubyte outputMapping[VARYING_SLOT_TESS_MAX];
1657    GLuint attr;
1658
1659    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
1660    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
1661    uint num_inputs = 0;
1662
1663    ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
1664    ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
1665    uint num_outputs = 0;
1666
1667    GLint i;
1668
1669    memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr));
1670    memset(inputMapping, 0, sizeof(inputMapping));
1671    memset(outputMapping, 0, sizeof(outputMapping));
1672    memset(&stp->state, 0, sizeof(stp->state));
1673
1674    if (prog->info.clip_distance_array_size)
1675       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
1676                     prog->info.clip_distance_array_size);
1677    if (prog->info.cull_distance_array_size)
1678       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
1679                     prog->info.cull_distance_array_size);
1680
1681    /*
1682     * Convert Mesa program inputs to TGSI input register semantics.
1683     */
1684    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1685       if ((prog->info.inputs_read & BITFIELD64_BIT(attr)) == 0)
1686          continue;
1687
1688       unsigned slot = num_inputs++;
1689
1690       inputMapping[attr] = slot;
1691       inputSlotToAttr[slot] = attr;
1692
1693       unsigned semantic_name, semantic_index;
1694       tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1695                                    &semantic_name, &semantic_index);
1696       input_semantic_name[slot] = semantic_name;
1697       input_semantic_index[slot] = semantic_index;
1698    }
1699
1700    /* Also add patch inputs. */
1701    for (attr = 0; attr < 32; attr++) {
1702       if (prog->info.patch_inputs_read & (1u << attr)) {
1703          GLuint slot = num_inputs++;
1704          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1705
1706          inputMapping[patch_attr] = slot;
1707          inputSlotToAttr[slot] = patch_attr;
1708          input_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1709          input_semantic_index[slot] = attr;
1710       }
1711    }
1712
1713    /* initialize output semantics to defaults */
1714    for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
1715       output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
1716       output_semantic_index[i] = 0;
1717    }
1718
1719    /*
1720     * Determine number of outputs, the (default) output register
1721     * mapping and the semantic information for each output.
1722     */
1723    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1724       if (prog->info.outputs_written & BITFIELD64_BIT(attr)) {
1725          GLuint slot = num_outputs++;
1726
1727          outputMapping[attr] = slot;
1728
1729          unsigned semantic_name, semantic_index;
1730          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1731                                       &semantic_name, &semantic_index);
1732          output_semantic_name[slot] = semantic_name;
1733          output_semantic_index[slot] = semantic_index;
1734       }
1735    }
1736
1737    /* Also add patch outputs. */
1738    for (attr = 0; attr < 32; attr++) {
1739       if (prog->info.patch_outputs_written & (1u << attr)) {
1740          GLuint slot = num_outputs++;
1741          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1742
1743          outputMapping[patch_attr] = slot;
1744          output_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1745          output_semantic_index[slot] = attr;
1746       }
1747    }
1748
1749    st_translate_program(st->ctx,
1750                         stage,
1751                         ureg,
1752                         stp->glsl_to_tgsi,
1753                         prog,
1754                         /* inputs */
1755                         num_inputs,
1756                         inputMapping,
1757                         inputSlotToAttr,
1758                         input_semantic_name,
1759                         input_semantic_index,
1760                         NULL,
1761                         /* outputs */
1762                         num_outputs,
1763                         outputMapping,
1764                         output_semantic_name,
1765                         output_semantic_index);
1766
1767    stp->state.tokens = ureg_get_tokens(ureg, NULL);
1768
1769    ureg_destroy(ureg);
1770
1771    st_translate_stream_output_info(prog);
1772
1773    st_store_ir_in_disk_cache(st, prog, false);
1774
1775    if (ST_DEBUG & DEBUG_PRINT_IR && ST_DEBUG & DEBUG_MESA)
1776       _mesa_print_program(prog);
1777
1778    free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
1779    stp->glsl_to_tgsi = NULL;
1780    return true;
1781 }
1782
1783
1784 /**
1785  * Get/create a basic program variant.
1786  */
1787 struct st_variant *
1788 st_get_common_variant(struct st_context *st,
1789                       struct st_program *prog,
1790                       const struct st_common_variant_key *key)
1791 {
1792    struct pipe_context *pipe = st->pipe;
1793    struct st_variant *v;
1794    struct pipe_shader_state state = {0};
1795    struct gl_program_parameter_list *params = prog->Base.Parameters;
1796
1797    /* Search for existing variant */
1798    for (v = prog->variants; v; v = v->next) {
1799       if (memcmp(&st_common_variant(v)->key, key, sizeof(*key)) == 0)
1800          break;
1801    }
1802
1803    if (!v) {
1804       /* create new */
1805       v = (struct st_variant*)CALLOC_STRUCT(st_common_variant);
1806       if (v) {
1807          if (prog->state.type == PIPE_SHADER_IR_NIR) {
1808             bool finalize = false;
1809
1810             state.type = PIPE_SHADER_IR_NIR;
1811             state.ir.nir = get_nir_shader(st, prog);
1812
1813             if (key->clamp_color) {
1814                NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1815                finalize = true;
1816             }
1817
1818             if (key->lower_ucp) {
1819                lower_ucp(st, state.ir.nir, key->lower_ucp, params);
1820                finalize = true;
1821             }
1822
1823             state.stream_output = prog->state.stream_output;
1824
1825             if (finalize || !st->allow_st_finalize_nir_twice) {
1826                st_finalize_nir(st, &prog->Base, prog->shader_program,
1827                                state.ir.nir, true);
1828             }
1829
1830             if (ST_DEBUG & DEBUG_PRINT_IR)
1831                nir_print_shader(state.ir.nir, stderr);
1832          } else {
1833             if (key->lower_depth_clamp) {
1834                struct gl_program_parameter_list *params = prog->Base.Parameters;
1835
1836                unsigned depth_range_const =
1837                      _mesa_add_state_reference(params, depth_range_state);
1838
1839                const struct tgsi_token *tokens;
1840                tokens =
1841                      st_tgsi_lower_depth_clamp(prog->state.tokens,
1842                                                depth_range_const,
1843                                                key->clip_negative_one_to_one);
1844
1845                if (tokens != prog->state.tokens)
1846                   tgsi_free_tokens(prog->state.tokens);
1847
1848                prog->state.tokens = tokens;
1849             }
1850             state = prog->state;
1851
1852             if (ST_DEBUG & DEBUG_PRINT_IR)
1853                tgsi_dump(state.tokens, 0);
1854          }
1855          /* fill in new variant */
1856          switch (prog->Base.info.stage) {
1857          case MESA_SHADER_TESS_CTRL:
1858             v->driver_shader = pipe->create_tcs_state(pipe, &state);
1859             break;
1860          case MESA_SHADER_TESS_EVAL:
1861             v->driver_shader = pipe->create_tes_state(pipe, &state);
1862             break;
1863          case MESA_SHADER_GEOMETRY:
1864             v->driver_shader = pipe->create_gs_state(pipe, &state);
1865             break;
1866          case MESA_SHADER_COMPUTE: {
1867             struct pipe_compute_state cs = {0};
1868             cs.ir_type = state.type;
1869             cs.req_local_mem = prog->Base.info.cs.shared_size;
1870
1871             if (state.type == PIPE_SHADER_IR_NIR)
1872                cs.prog = state.ir.nir;
1873             else
1874                cs.prog = state.tokens;
1875
1876             v->driver_shader = pipe->create_compute_state(pipe, &cs);
1877             break;
1878          }
1879          default:
1880             assert(!"unhandled shader type");
1881             free(v);
1882             return NULL;
1883          }
1884
1885          st_common_variant(v)->key = *key;
1886          v->st = key->st;
1887
1888          /* insert into list */
1889          v->next = prog->variants;
1890          prog->variants = v;
1891       }
1892    }
1893
1894    return v;
1895 }
1896
1897
1898 /**
1899  * Vert/Geom/Frag programs have per-context variants.  Free all the
1900  * variants attached to the given program which match the given context.
1901  */
1902 static void
1903 destroy_program_variants(struct st_context *st, struct gl_program *target)
1904 {
1905    if (!target || target == &_mesa_DummyProgram)
1906       return;
1907
1908    struct st_program *p = st_program(target);
1909    struct st_variant *v, **prevPtr = &p->variants;
1910    bool unbound = false;
1911
1912    for (v = p->variants; v; ) {
1913       struct st_variant *next = v->next;
1914       if (v->st == st) {
1915          if (!unbound) {
1916             st_unbind_program(st, p);
1917             unbound = true;
1918          }
1919
1920          /* unlink from list */
1921          *prevPtr = next;
1922          /* destroy this variant */
1923          delete_variant(st, v, target->Target);
1924       }
1925       else {
1926          prevPtr = &v->next;
1927       }
1928       v = next;
1929    }
1930 }
1931
1932
1933 /**
1934  * Callback for _mesa_HashWalk.  Free all the shader's program variants
1935  * which match the given context.
1936  */
1937 static void
1938 destroy_shader_program_variants_cb(GLuint key, void *data, void *userData)
1939 {
1940    struct st_context *st = (struct st_context *) userData;
1941    struct gl_shader *shader = (struct gl_shader *) data;
1942
1943    switch (shader->Type) {
1944    case GL_SHADER_PROGRAM_MESA:
1945       {
1946          struct gl_shader_program *shProg = (struct gl_shader_program *) data;
1947          GLuint i;
1948
1949          for (i = 0; i < ARRAY_SIZE(shProg->_LinkedShaders); i++) {
1950             if (shProg->_LinkedShaders[i])
1951                destroy_program_variants(st, shProg->_LinkedShaders[i]->Program);
1952          }
1953       }
1954       break;
1955    case GL_VERTEX_SHADER:
1956    case GL_FRAGMENT_SHADER:
1957    case GL_GEOMETRY_SHADER:
1958    case GL_TESS_CONTROL_SHADER:
1959    case GL_TESS_EVALUATION_SHADER:
1960    case GL_COMPUTE_SHADER:
1961       break;
1962    default:
1963       assert(0);
1964    }
1965 }
1966
1967
1968 /**
1969  * Callback for _mesa_HashWalk.  Free all the program variants which match
1970  * the given context.
1971  */
1972 static void
1973 destroy_program_variants_cb(GLuint key, void *data, void *userData)
1974 {
1975    struct st_context *st = (struct st_context *) userData;
1976    struct gl_program *program = (struct gl_program *) data;
1977    destroy_program_variants(st, program);
1978 }
1979
1980
1981 /**
1982  * Walk over all shaders and programs to delete any variants which
1983  * belong to the given context.
1984  * This is called during context tear-down.
1985  */
1986 void
1987 st_destroy_program_variants(struct st_context *st)
1988 {
1989    /* If shaders can be shared with other contexts, the last context will
1990     * call DeleteProgram on all shaders, releasing everything.
1991     */
1992    if (st->has_shareable_shaders)
1993       return;
1994
1995    /* ARB vert/frag program */
1996    _mesa_HashWalk(st->ctx->Shared->Programs,
1997                   destroy_program_variants_cb, st);
1998
1999    /* GLSL vert/frag/geom shaders */
2000    _mesa_HashWalk(st->ctx->Shared->ShaderObjects,
2001                   destroy_shader_program_variants_cb, st);
2002 }
2003
2004
2005 /**
2006  * Compile one shader variant.
2007  */
2008 static void
2009 st_precompile_shader_variant(struct st_context *st,
2010                              struct gl_program *prog)
2011 {
2012    switch (prog->Target) {
2013    case GL_VERTEX_PROGRAM_ARB: {
2014       struct st_program *p = (struct st_program *)prog;
2015       struct st_common_variant_key key;
2016
2017       memset(&key, 0, sizeof(key));
2018
2019       key.st = st->has_shareable_shaders ? NULL : st;
2020       st_get_vp_variant(st, p, &key);
2021       break;
2022    }
2023
2024    case GL_FRAGMENT_PROGRAM_ARB: {
2025       struct st_program *p = (struct st_program *)prog;
2026       struct st_fp_variant_key key;
2027
2028       memset(&key, 0, sizeof(key));
2029
2030       key.st = st->has_shareable_shaders ? NULL : st;
2031       st_get_fp_variant(st, p, &key);
2032       break;
2033    }
2034
2035    case GL_TESS_CONTROL_PROGRAM_NV:
2036    case GL_TESS_EVALUATION_PROGRAM_NV:
2037    case GL_GEOMETRY_PROGRAM_NV:
2038    case GL_COMPUTE_PROGRAM_NV: {
2039       struct st_program *p = st_program(prog);
2040       struct st_common_variant_key key;
2041
2042       memset(&key, 0, sizeof(key));
2043
2044       key.st = st->has_shareable_shaders ? NULL : st;
2045       st_get_common_variant(st, p, &key);
2046       break;
2047    }
2048
2049    default:
2050       assert(0);
2051    }
2052 }
2053
2054 void
2055 st_serialize_nir(struct st_program *stp)
2056 {
2057    if (!stp->serialized_nir) {
2058       struct blob blob;
2059       size_t size;
2060
2061       blob_init(&blob);
2062       nir_serialize(&blob, stp->Base.nir, false);
2063       blob_finish_get_buffer(&blob, &stp->serialized_nir, &size);
2064       stp->serialized_nir_size = size;
2065    }
2066 }
2067
2068 void
2069 st_finalize_program(struct st_context *st, struct gl_program *prog)
2070 {
2071    if (st->current_program[prog->info.stage] == prog) {
2072       if (prog->info.stage == MESA_SHADER_VERTEX)
2073          st->dirty |= ST_NEW_VERTEX_PROGRAM(st, (struct st_program *)prog);
2074       else
2075          st->dirty |= ((struct st_program *)prog)->affected_states;
2076    }
2077
2078    if (prog->nir) {
2079       nir_sweep(prog->nir);
2080
2081       /* This is only needed for ARB_vp/fp programs and when the disk cache
2082        * is disabled. If the disk cache is enabled, GLSL programs are
2083        * serialized in write_nir_to_cache.
2084        */
2085       st_serialize_nir(st_program(prog));
2086    }
2087
2088    /* Create Gallium shaders now instead of on demand. */
2089    if (ST_DEBUG & DEBUG_PRECOMPILE ||
2090        st->shader_has_one_variant[prog->info.stage])
2091       st_precompile_shader_variant(st, prog);
2092 }