src/mesa/state_tracker/st_program.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   *   Brian Paul
  31   */
  32
  33
  34 #include "main/errors.h"
  35
  36 #include "main/hash.h"
  37 #include "main/mtypes.h"
  38 #include "program/prog_parameter.h"
  39 #include "program/prog_print.h"
  40 #include "program/prog_to_nir.h"
  41 #include "program/programopt.h"
  42
  43 #include "compiler/nir/nir.h"
  44 #include "compiler/nir/nir_serialize.h"
  45 #include "draw/draw_context.h"
  46
  47 #include "pipe/p_context.h"
  48 #include "pipe/p_defines.h"
  49 #include "pipe/p_shader_tokens.h"
  50 #include "draw/draw_context.h"
  51 #include "tgsi/tgsi_dump.h"
  52 #include "tgsi/tgsi_emulate.h"
  53 #include "tgsi/tgsi_parse.h"
  54 #include "tgsi/tgsi_ureg.h"
  55
  56 #include "util/u_memory.h"
  57
  58 #include "st_debug.h"
  59 #include "st_cb_bitmap.h"
  60 #include "st_cb_drawpixels.h"
  61 #include "st_context.h"
  62 #include "st_tgsi_lower_depth_clamp.h"
  63 #include "st_tgsi_lower_yuv.h"
  64 #include "st_program.h"
  65 #include "st_mesa_to_tgsi.h"
  66 #include "st_atifs_to_tgsi.h"
  67 #include "st_nir.h"
  68 #include "st_shader_cache.h"
  69 #include "st_util.h"
  70 #include "cso_cache/cso_context.h"
  71
  72
  73 static void
  74 destroy_program_variants(struct st_context *st, struct gl_program *target);
  75
  76 static void
  77 set_affected_state_flags(uint64_t *states,
  78                          struct gl_program *prog,
  79                          uint64_t new_constants,
  80                          uint64_t new_sampler_views,
  81                          uint64_t new_samplers,
  82                          uint64_t new_images,
  83                          uint64_t new_ubos,
  84                          uint64_t new_ssbos,
  85                          uint64_t new_atomics)
  86 {
  87    if (prog->Parameters->NumParameters)
  88       *states |= new_constants;
  89
  90    if (prog->info.num_textures)
  91       *states |= new_sampler_views | new_samplers;
  92
  93    if (prog->info.num_images)
  94       *states |= new_images;
  95
  96    if (prog->info.num_ubos)
  97       *states |= new_ubos;
  98
  99    if (prog->info.num_ssbos)
 100       *states |= new_ssbos;
 101
 102    if (prog->info.num_abos)
 103       *states |= new_atomics;
 104 }
 105
 106 /**
 107  * This determines which states will be updated when the shader is bound.
 108  */
 109 void
 110 st_set_prog_affected_state_flags(struct gl_program *prog)
 111 {
 112    uint64_t *states;
 113
 114    switch (prog->info.stage) {
 115    case MESA_SHADER_VERTEX:
 116       states = &((struct st_program*)prog)->affected_states;
 117
 118       *states = ST_NEW_VS_STATE |
 119                 ST_NEW_RASTERIZER |
 120                 ST_NEW_VERTEX_ARRAYS;
 121
 122       set_affected_state_flags(states, prog,
 123                                ST_NEW_VS_CONSTANTS,
 124                                ST_NEW_VS_SAMPLER_VIEWS,
 125                                ST_NEW_VS_SAMPLERS,
 126                                ST_NEW_VS_IMAGES,
 127                                ST_NEW_VS_UBOS,
 128                                ST_NEW_VS_SSBOS,
 129                                ST_NEW_VS_ATOMICS);
 130       break;
 131
 132    case MESA_SHADER_TESS_CTRL:
 133       states = &(st_program(prog))->affected_states;
 134
 135       *states = ST_NEW_TCS_STATE;
 136
 137       set_affected_state_flags(states, prog,
 138                                ST_NEW_TCS_CONSTANTS,
 139                                ST_NEW_TCS_SAMPLER_VIEWS,
 140                                ST_NEW_TCS_SAMPLERS,
 141                                ST_NEW_TCS_IMAGES,
 142                                ST_NEW_TCS_UBOS,
 143                                ST_NEW_TCS_SSBOS,
 144                                ST_NEW_TCS_ATOMICS);
 145       break;
 146
 147    case MESA_SHADER_TESS_EVAL:
 148       states = &(st_program(prog))->affected_states;
 149
 150       *states = ST_NEW_TES_STATE |
 151                 ST_NEW_RASTERIZER;
 152
 153       set_affected_state_flags(states, prog,
 154                                ST_NEW_TES_CONSTANTS,
 155                                ST_NEW_TES_SAMPLER_VIEWS,
 156                                ST_NEW_TES_SAMPLERS,
 157                                ST_NEW_TES_IMAGES,
 158                                ST_NEW_TES_UBOS,
 159                                ST_NEW_TES_SSBOS,
 160                                ST_NEW_TES_ATOMICS);
 161       break;
 162
 163    case MESA_SHADER_GEOMETRY:
 164       states = &(st_program(prog))->affected_states;
 165
 166       *states = ST_NEW_GS_STATE |
 167                 ST_NEW_RASTERIZER;
 168
 169       set_affected_state_flags(states, prog,
 170                                ST_NEW_GS_CONSTANTS,
 171                                ST_NEW_GS_SAMPLER_VIEWS,
 172                                ST_NEW_GS_SAMPLERS,
 173                                ST_NEW_GS_IMAGES,
 174                                ST_NEW_GS_UBOS,
 175                                ST_NEW_GS_SSBOS,
 176                                ST_NEW_GS_ATOMICS);
 177       break;
 178
 179    case MESA_SHADER_FRAGMENT:
 180       states = &((struct st_program*)prog)->affected_states;
 181
 182       /* gl_FragCoord and glDrawPixels always use constants. */
 183       *states = ST_NEW_FS_STATE |
 184                 ST_NEW_SAMPLE_SHADING |
 185                 ST_NEW_FS_CONSTANTS;
 186
 187       set_affected_state_flags(states, prog,
 188                                ST_NEW_FS_CONSTANTS,
 189                                ST_NEW_FS_SAMPLER_VIEWS,
 190                                ST_NEW_FS_SAMPLERS,
 191                                ST_NEW_FS_IMAGES,
 192                                ST_NEW_FS_UBOS,
 193                                ST_NEW_FS_SSBOS,
 194                                ST_NEW_FS_ATOMICS);
 195       break;
 196
 197    case MESA_SHADER_COMPUTE:
 198       states = &((struct st_program*)prog)->affected_states;
 199
 200       *states = ST_NEW_CS_STATE;
 201
 202       set_affected_state_flags(states, prog,
 203                                ST_NEW_CS_CONSTANTS,
 204                                ST_NEW_CS_SAMPLER_VIEWS,
 205                                ST_NEW_CS_SAMPLERS,
 206                                ST_NEW_CS_IMAGES,
 207                                ST_NEW_CS_UBOS,
 208                                ST_NEW_CS_SSBOS,
 209                                ST_NEW_CS_ATOMICS);
 210       break;
 211
 212    default:
 213       unreachable("unhandled shader stage");
 214    }
 215 }
 216
 217
 218 /**
 219  * Delete a shader variant.  Note the caller must unlink the variant from
 220  * the linked list.
 221  */
 222 static void
 223 delete_variant(struct st_context *st, struct st_variant *v, GLenum target)
 224 {
 225    if (v->driver_shader) {
 226       if (target == GL_VERTEX_PROGRAM_ARB &&
 227           ((struct st_common_variant*)v)->key.is_draw_shader) {
 228          /* Draw shader. */
 229          draw_delete_vertex_shader(st->draw, v->driver_shader);
 230       } else if (st->has_shareable_shaders || v->st == st) {
 231          /* The shader's context matches the calling context, or we
 232           * don't care.
 233           */
 234          switch (target) {
 235          case GL_VERTEX_PROGRAM_ARB:
 236             st->pipe->delete_vs_state(st->pipe, v->driver_shader);
 237             break;
 238          case GL_TESS_CONTROL_PROGRAM_NV:
 239             st->pipe->delete_tcs_state(st->pipe, v->driver_shader);
 240             break;
 241          case GL_TESS_EVALUATION_PROGRAM_NV:
 242             st->pipe->delete_tes_state(st->pipe, v->driver_shader);
 243             break;
 244          case GL_GEOMETRY_PROGRAM_NV:
 245             st->pipe->delete_gs_state(st->pipe, v->driver_shader);
 246             break;
 247          case GL_FRAGMENT_PROGRAM_ARB:
 248             st->pipe->delete_fs_state(st->pipe, v->driver_shader);
 249             break;
 250          case GL_COMPUTE_PROGRAM_NV:
 251             st->pipe->delete_compute_state(st->pipe, v->driver_shader);
 252             break;
 253          default:
 254             unreachable("bad shader type in delete_basic_variant");
 255          }
 256       } else {
 257          /* We can't delete a shader with a context different from the one
 258           * that created it.  Add it to the creating context's zombie list.
 259           */
 260          enum pipe_shader_type type =
 261             pipe_shader_type_from_mesa(_mesa_program_enum_to_shader_stage(target));
 262
 263          st_save_zombie_shader(v->st, type, v->driver_shader);
 264       }
 265    }
 266
 267    free(v);
 268 }
 269
 270 static void
 271 st_unbind_program(struct st_context *st, struct st_program *p)
 272 {
 273    /* Unbind the shader in cso_context and re-bind in st/mesa. */
 274    switch (p->Base.info.stage) {
 275    case MESA_SHADER_VERTEX:
 276       cso_set_vertex_shader_handle(st->cso_context, NULL);
 277       st->dirty |= ST_NEW_VS_STATE;
 278       break;
 279    case MESA_SHADER_TESS_CTRL:
 280       cso_set_tessctrl_shader_handle(st->cso_context, NULL);
 281       st->dirty |= ST_NEW_TCS_STATE;
 282       break;
 283    case MESA_SHADER_TESS_EVAL:
 284       cso_set_tesseval_shader_handle(st->cso_context, NULL);
 285       st->dirty |= ST_NEW_TES_STATE;
 286       break;
 287    case MESA_SHADER_GEOMETRY:
 288       cso_set_geometry_shader_handle(st->cso_context, NULL);
 289       st->dirty |= ST_NEW_GS_STATE;
 290       break;
 291    case MESA_SHADER_FRAGMENT:
 292       cso_set_fragment_shader_handle(st->cso_context, NULL);
 293       st->dirty |= ST_NEW_FS_STATE;
 294       break;
 295    case MESA_SHADER_COMPUTE:
 296       cso_set_compute_shader_handle(st->cso_context, NULL);
 297       st->dirty |= ST_NEW_CS_STATE;
 298       break;
 299    default:
 300       unreachable("invalid shader type");
 301    }
 302 }
 303
 304 /**
 305  * Free all basic program variants.
 306  */
 307 void
 308 st_release_variants(struct st_context *st, struct st_program *p)
 309 {
 310    struct st_variant *v;
 311
 312    /* If we are releasing shaders, re-bind them, because we don't
 313     * know which shaders are bound in the driver.
 314     */
 315    if (p->variants)
 316       st_unbind_program(st, p);
 317
 318    for (v = p->variants; v; ) {
 319       struct st_variant *next = v->next;
 320       delete_variant(st, v, p->Base.Target);
 321       v = next;
 322    }
 323
 324    p->variants = NULL;
 325
 326    if (p->state.tokens) {
 327       ureg_free_tokens(p->state.tokens);
 328       p->state.tokens = NULL;
 329    }
 330
 331    /* Note: Any setup of ->ir.nir that has had pipe->create_*_state called on
 332     * it has resulted in the driver taking ownership of the NIR.  Those
 333     * callers should be NULLing out the nir field in any pipe_shader_state
 334     * that might have this called in order to indicate that.
 335     *
 336     * GLSL IR and ARB programs will have set gl_program->nir to the same
 337     * shader as ir->ir.nir, so it will be freed by _mesa_delete_program().
 338     */
 339 }
 340
 341 /**
 342  * Free all basic program variants and unref program.
 343  */
 344 void
 345 st_release_program(struct st_context *st, struct st_program **p)
 346 {
 347    if (!*p)
 348       return;
 349
 350    destroy_program_variants(st, &((*p)->Base));
 351    st_reference_prog(st, p, NULL);
 352 }
 353
 354 void
 355 st_finalize_nir_before_variants(struct nir_shader *nir)
 356 {
 357    NIR_PASS_V(nir, nir_opt_access);
 358
 359    NIR_PASS_V(nir, nir_split_var_copies);
 360    NIR_PASS_V(nir, nir_lower_var_copies);
 361    if (nir->options->lower_all_io_to_temps ||
 362        nir->options->lower_all_io_to_elements ||
 363        nir->info.stage == MESA_SHADER_VERTEX ||
 364        nir->info.stage == MESA_SHADER_GEOMETRY) {
 365       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
 366    } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
 367       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
 368    }
 369
 370    st_nir_assign_vs_in_locations(nir);
 371 }
 372
 373 /**
 374  * Translate ARB (asm) program to NIR
 375  */
 376 static nir_shader *
 377 st_translate_prog_to_nir(struct st_context *st, struct gl_program *prog,
 378                          gl_shader_stage stage)
 379 {
 380    struct pipe_screen *screen = st->pipe->screen;
 381    const struct gl_shader_compiler_options *options =
 382       &st->ctx->Const.ShaderCompilerOptions[stage];
 383
 384    /* Translate to NIR */
 385    nir_shader *nir = prog_to_nir(prog, options->NirOptions);
 386    NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
 387    nir_validate_shader(nir, "after st/ptn lower_regs_to_ssa");
 388
 389    NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, prog, screen);
 390    NIR_PASS_V(nir, nir_lower_system_values);
 391    NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
 392
 393    /* Optimise NIR */
 394    NIR_PASS_V(nir, nir_opt_constant_folding);
 395    st_nir_opts(nir);
 396    st_finalize_nir_before_variants(nir);
 397
 398    if (st->allow_st_finalize_nir_twice)
 399       st_finalize_nir(st, prog, NULL, nir, true);
 400
 401    nir_validate_shader(nir, "after st/glsl finalize_nir");
 402
 403    return nir;
 404 }
 405
 406 void
 407 st_prepare_vertex_program(struct st_program *stp)
 408 {
 409    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 410
 411    stvp->num_inputs = 0;
 412    memset(stvp->input_to_index, ~0, sizeof(stvp->input_to_index));
 413    memset(stvp->result_to_output, ~0, sizeof(stvp->result_to_output));
 414
 415    /* Determine number of inputs, the mappings between VERT_ATTRIB_x
 416     * and TGSI generic input indexes, plus input attrib semantic info.
 417     */
 418    for (unsigned attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
 419       if ((stp->Base.info.inputs_read & BITFIELD64_BIT(attr)) != 0) {
 420          stvp->input_to_index[attr] = stvp->num_inputs;
 421          stvp->index_to_input[stvp->num_inputs] = attr;
 422          stvp->num_inputs++;
 423
 424          if ((stp->Base.DualSlotInputs & BITFIELD64_BIT(attr)) != 0) {
 425             /* add placeholder for second part of a double attribute */
 426             stvp->index_to_input[stvp->num_inputs] = ST_DOUBLE_ATTRIB_PLACEHOLDER;
 427             stvp->num_inputs++;
 428          }
 429       }
 430    }
 431    /* pre-setup potentially unused edgeflag input */
 432    stvp->input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
 433    stvp->index_to_input[stvp->num_inputs] = VERT_ATTRIB_EDGEFLAG;
 434
 435    /* Compute mapping of vertex program outputs to slots. */
 436    unsigned num_outputs = 0;
 437    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 438       if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr))
 439          stvp->result_to_output[attr] = num_outputs++;
 440    }
 441    /* pre-setup potentially unused edgeflag output */
 442    stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs;
 443 }
 444
 445 void
 446 st_translate_stream_output_info(struct gl_program *prog)
 447 {
 448    struct gl_transform_feedback_info *info = prog->sh.LinkedTransformFeedback;
 449    if (!info)
 450       return;
 451
 452    /* Determine the (default) output register mapping for each output. */
 453    unsigned num_outputs = 0;
 454    ubyte output_mapping[VARYING_SLOT_TESS_MAX];
 455    memset(output_mapping, 0, sizeof(output_mapping));
 456
 457    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 458       if (prog->info.outputs_written & BITFIELD64_BIT(attr))
 459          output_mapping[attr] = num_outputs++;
 460    }
 461
 462    /* Translate stream output info. */
 463    struct pipe_stream_output_info *so_info =
 464       &((struct st_program*)prog)->state.stream_output;
 465
 466    for (unsigned i = 0; i < info->NumOutputs; i++) {
 467       so_info->output[i].register_index =
 468          output_mapping[info->Outputs[i].OutputRegister];
 469       so_info->output[i].start_component = info->Outputs[i].ComponentOffset;
 470       so_info->output[i].num_components = info->Outputs[i].NumComponents;
 471       so_info->output[i].output_buffer = info->Outputs[i].OutputBuffer;
 472       so_info->output[i].dst_offset = info->Outputs[i].DstOffset;
 473       so_info->output[i].stream = info->Outputs[i].StreamId;
 474    }
 475
 476    for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
 477       so_info->stride[i] = info->Buffers[i].Stride;
 478    }
 479    so_info->num_outputs = info->NumOutputs;
 480 }
 481
 482 /**
 483  * Translate a vertex program.
 484  */
 485 bool
 486 st_translate_vertex_program(struct st_context *st,
 487                             struct st_program *stp)
 488 {
 489    struct ureg_program *ureg;
 490    enum pipe_error error;
 491    unsigned num_outputs = 0;
 492    unsigned attr;
 493    ubyte output_semantic_name[VARYING_SLOT_MAX] = {0};
 494    ubyte output_semantic_index[VARYING_SLOT_MAX] = {0};
 495
 496    if (stp->Base.arb.IsPositionInvariant)
 497       _mesa_insert_mvp_code(st->ctx, &stp->Base);
 498
 499    /* ARB_vp: */
 500    if (!stp->glsl_to_tgsi) {
 501       _mesa_remove_output_reads(&stp->Base, PROGRAM_OUTPUT);
 502
 503       /* This determines which states will be updated when the assembly
 504        * shader is bound.
 505        */
 506       stp->affected_states = ST_NEW_VS_STATE |
 507                               ST_NEW_RASTERIZER |
 508                               ST_NEW_VERTEX_ARRAYS;
 509
 510       if (stp->Base.Parameters->NumParameters)
 511          stp->affected_states |= ST_NEW_VS_CONSTANTS;
 512
 513       /* Translate to NIR if preferred. */
 514       if (PIPE_SHADER_IR_NIR ==
 515           st->pipe->screen->get_shader_param(st->pipe->screen,
 516                                              PIPE_SHADER_VERTEX,
 517                                              PIPE_SHADER_CAP_PREFERRED_IR)) {
 518          assert(!stp->glsl_to_tgsi);
 519
 520          if (stp->Base.nir)
 521             ralloc_free(stp->Base.nir);
 522
 523          if (stp->serialized_nir) {
 524             free(stp->serialized_nir);
 525             stp->serialized_nir = NULL;
 526          }
 527
 528          stp->state.type = PIPE_SHADER_IR_NIR;
 529          stp->Base.nir = st_translate_prog_to_nir(st, &stp->Base,
 530                                                   MESA_SHADER_VERTEX);
 531
 532          /* We must update stp->Base.info after translation and before
 533           * st_prepare_vertex_program is called, because inputs_read
 534           * may become outdated after NIR optimization passes.
 535           *
 536           * For ffvp/ARB_vp inputs_read is populated based
 537           * on declared attributes without taking their usage into
 538           * consideration. When creating shader variants we expect
 539           * that their inputs_read would match the base ones for
 540           * input mapping to work properly.
 541           */
 542          nir_shader_gather_info(stp->Base.nir,
 543                                 nir_shader_get_entrypoint(stp->Base.nir));
 544          st_nir_assign_vs_in_locations(stp->Base.nir);
 545          stp->Base.info = stp->Base.nir->info;
 546
 547          /* For st_draw_feedback, we need to generate TGSI too if draw doesn't
 548           * use LLVM.
 549           */
 550          /* TODO: Draw can't handle lowered IO. */
 551          if (draw_has_llvm() && !stp->Base.info.io_lowered) {
 552             st_prepare_vertex_program(stp);
 553             return true;
 554          }
 555       }
 556    }
 557
 558    st_prepare_vertex_program(stp);
 559
 560    /* Get semantic names and indices. */
 561    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 562       if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr)) {
 563          unsigned slot = num_outputs++;
 564          unsigned semantic_name, semantic_index;
 565          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
 566                                       &semantic_name, &semantic_index);
 567          output_semantic_name[slot] = semantic_name;
 568          output_semantic_index[slot] = semantic_index;
 569       }
 570    }
 571    /* pre-setup potentially unused edgeflag output */
 572    output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
 573    output_semantic_index[num_outputs] = 0;
 574
 575    ureg = ureg_create_with_screen(PIPE_SHADER_VERTEX, st->pipe->screen);
 576    if (ureg == NULL)
 577       return false;
 578
 579    if (stp->Base.info.clip_distance_array_size)
 580       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
 581                     stp->Base.info.clip_distance_array_size);
 582    if (stp->Base.info.cull_distance_array_size)
 583       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
 584                     stp->Base.info.cull_distance_array_size);
 585
 586    if (ST_DEBUG & DEBUG_MESA) {
 587       _mesa_print_program(&stp->Base);
 588       _mesa_print_program_parameters(st->ctx, &stp->Base);
 589       debug_printf("\n");
 590    }
 591
 592    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 593
 594    if (stp->glsl_to_tgsi) {
 595       error = st_translate_program(st->ctx,
 596                                    PIPE_SHADER_VERTEX,
 597                                    ureg,
 598                                    stp->glsl_to_tgsi,
 599                                    &stp->Base,
 600                                    /* inputs */
 601                                    stvp->num_inputs,
 602                                    stvp->input_to_index,
 603                                    NULL, /* inputSlotToAttr */
 604                                    NULL, /* input semantic name */
 605                                    NULL, /* input semantic index */
 606                                    NULL, /* interp mode */
 607                                    /* outputs */
 608                                    num_outputs,
 609                                    stvp->result_to_output,
 610                                    output_semantic_name,
 611                                    output_semantic_index);
 612
 613       st_translate_stream_output_info(&stp->Base);
 614
 615       free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
 616    } else
 617       error = st_translate_mesa_program(st->ctx,
 618                                         PIPE_SHADER_VERTEX,
 619                                         ureg,
 620                                         &stp->Base,
 621                                         /* inputs */
 622                                         stvp->num_inputs,
 623                                         stvp->input_to_index,
 624                                         NULL, /* input semantic name */
 625                                         NULL, /* input semantic index */
 626                                         NULL,
 627                                         /* outputs */
 628                                         num_outputs,
 629                                         stvp->result_to_output,
 630                                         output_semantic_name,
 631                                         output_semantic_index);
 632
 633    if (error) {
 634       debug_printf("%s: failed to translate Mesa program:\n", __func__);
 635       _mesa_print_program(&stp->Base);
 636       debug_assert(0);
 637       return false;
 638    }
 639
 640    stp->state.tokens = ureg_get_tokens(ureg, NULL);
 641    ureg_destroy(ureg);
 642
 643    if (stp->glsl_to_tgsi) {
 644       stp->glsl_to_tgsi = NULL;
 645       st_store_ir_in_disk_cache(st, &stp->Base, false);
 646    }
 647
 648    return stp->state.tokens != NULL;
 649 }
 650
 651 static struct nir_shader *
 652 get_nir_shader(struct st_context *st, struct st_program *stp)
 653 {
 654    if (stp->Base.nir) {
 655       nir_shader *nir = stp->Base.nir;
 656
 657       /* The first shader variant takes ownership of NIR, so that there is
 658        * no cloning. Additional shader variants are always generated from
 659        * serialized NIR to save memory.
 660        */
 661       stp->Base.nir = NULL;
 662       assert(stp->serialized_nir && stp->serialized_nir_size);
 663       return nir;
 664    }
 665
 666    struct blob_reader blob_reader;
 667    const struct nir_shader_compiler_options *options =
 668       st->ctx->Const.ShaderCompilerOptions[stp->Base.info.stage].NirOptions;
 669
 670    blob_reader_init(&blob_reader, stp->serialized_nir, stp->serialized_nir_size);
 671    return nir_deserialize(NULL, options, &blob_reader);
 672 }
 673
 674 static void
 675 lower_ucp(struct st_context *st,
 676           struct nir_shader *nir,
 677           unsigned ucp_enables,
 678           struct gl_program_parameter_list *params)
 679 {
 680    if (nir->info.outputs_written & VARYING_BIT_CLIP_DIST0)
 681       NIR_PASS_V(nir, nir_lower_clip_disable, ucp_enables);
 682    else {
 683       struct pipe_screen *screen = st->pipe->screen;
 684       bool can_compact = screen->get_param(screen,
 685                                            PIPE_CAP_NIR_COMPACT_ARRAYS);
 686       bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;
 687
 688       gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH];
 689       for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
 690          if (use_eye) {
 691             clipplane_state[i][0] = STATE_CLIPPLANE;
 692             clipplane_state[i][1] = i;
 693          } else {
 694             clipplane_state[i][0] = STATE_INTERNAL;
 695             clipplane_state[i][1] = STATE_CLIP_INTERNAL;
 696             clipplane_state[i][2] = i;
 697          }
 698          _mesa_add_state_reference(params, clipplane_state[i]);
 699       }
 700
 701       if (nir->info.stage == MESA_SHADER_VERTEX) {
 702          NIR_PASS_V(nir, nir_lower_clip_vs, ucp_enables,
 703                     true, can_compact, clipplane_state);
 704       } else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
 705          NIR_PASS_V(nir, nir_lower_clip_gs, ucp_enables,
 706                     can_compact, clipplane_state);
 707       }
 708
 709       NIR_PASS_V(nir, nir_lower_io_to_temporaries,
 710                  nir_shader_get_entrypoint(nir), true, false);
 711       NIR_PASS_V(nir, nir_lower_global_vars_to_local);
 712    }
 713 }
 714
 715 static const gl_state_index16 depth_range_state[STATE_LENGTH] =
 716    { STATE_DEPTH_RANGE };
 717
 718 static struct st_common_variant *
 719 st_create_vp_variant(struct st_context *st,
 720                      struct st_program *stvp,
 721                      const struct st_common_variant_key *key)
 722 {
 723    struct st_common_variant *vpv = CALLOC_STRUCT(st_common_variant);
 724    struct pipe_context *pipe = st->pipe;
 725    struct pipe_shader_state state = {0};
 726
 727    static const gl_state_index16 point_size_state[STATE_LENGTH] =
 728       { STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED, 0 };
 729    struct gl_program_parameter_list *params = stvp->Base.Parameters;
 730
 731    vpv->key = *key;
 732
 733    state.stream_output = stvp->state.stream_output;
 734
 735    if (stvp->state.type == PIPE_SHADER_IR_NIR &&
 736        (!key->is_draw_shader ||
 737         /* TODO: Draw can't handle lowered IO. */
 738         (draw_has_llvm() && !stvp->Base.info.io_lowered))) {
 739       bool finalize = false;
 740
 741       state.type = PIPE_SHADER_IR_NIR;
 742       state.ir.nir = get_nir_shader(st, stvp);
 743       if (key->clamp_color) {
 744          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
 745          finalize = true;
 746       }
 747       if (key->passthrough_edgeflags) {
 748          NIR_PASS_V(state.ir.nir, nir_lower_passthrough_edgeflags);
 749          finalize = true;
 750       }
 751
 752       if (key->lower_point_size) {
 753          _mesa_add_state_reference(params, point_size_state);
 754          NIR_PASS_V(state.ir.nir, nir_lower_point_size_mov,
 755                     point_size_state);
 756          finalize = true;
 757       }
 758
 759       if (key->lower_ucp) {
 760          lower_ucp(st, state.ir.nir, key->lower_ucp, params);
 761          finalize = true;
 762       }
 763
 764       if (finalize || !st->allow_st_finalize_nir_twice) {
 765          st_finalize_nir(st, &stvp->Base, stvp->shader_program, state.ir.nir,
 766                          true);
 767
 768          /* Some of the lowering above may have introduced new varyings */
 769          nir_shader_gather_info(state.ir.nir,
 770                                 nir_shader_get_entrypoint(state.ir.nir));
 771       }
 772
 773       if (ST_DEBUG & DEBUG_PRINT_IR)
 774          nir_print_shader(state.ir.nir, stderr);
 775
 776       if (key->is_draw_shader)
 777          vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
 778       else
 779          vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
 780
 781       return vpv;
 782    }
 783
 784    state.type = PIPE_SHADER_IR_TGSI;
 785    state.tokens = tgsi_dup_tokens(stvp->state.tokens);
 786
 787    /* Emulate features. */
 788    if (key->clamp_color || key->passthrough_edgeflags) {
 789       const struct tgsi_token *tokens;
 790       unsigned flags =
 791          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
 792          (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
 793
 794       tokens = tgsi_emulate(state.tokens, flags);
 795
 796       if (tokens) {
 797          tgsi_free_tokens(state.tokens);
 798          state.tokens = tokens;
 799       } else {
 800          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
 801       }
 802    }
 803
 804    if (key->lower_depth_clamp) {
 805       unsigned depth_range_const =
 806             _mesa_add_state_reference(params, depth_range_state);
 807
 808       const struct tgsi_token *tokens;
 809       tokens = st_tgsi_lower_depth_clamp(state.tokens, depth_range_const,
 810                                          key->clip_negative_one_to_one);
 811       if (tokens != state.tokens)
 812          tgsi_free_tokens(state.tokens);
 813       state.tokens = tokens;
 814    }
 815
 816    if (ST_DEBUG & DEBUG_PRINT_IR)
 817       tgsi_dump(state.tokens, 0);
 818
 819    if (key->is_draw_shader)
 820       vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
 821    else
 822       vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
 823
 824    if (state.tokens) {
 825       tgsi_free_tokens(state.tokens);
 826    }
 827
 828    return vpv;
 829 }
 830
 831
 832 /**
 833  * Find/create a vertex program variant.
 834  */
 835 struct st_common_variant *
 836 st_get_vp_variant(struct st_context *st,
 837                   struct st_program *stp,
 838                   const struct st_common_variant_key *key)
 839 {
 840    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 841    struct st_common_variant *vpv;
 842
 843    /* Search for existing variant */
 844    for (vpv = st_common_variant(stp->variants); vpv;
 845         vpv = st_common_variant(vpv->base.next)) {
 846       if (memcmp(&vpv->key, key, sizeof(*key)) == 0) {
 847          break;
 848       }
 849    }
 850
 851    if (!vpv) {
 852       /* create now */
 853       vpv = st_create_vp_variant(st, stp, key);
 854       if (vpv) {
 855          vpv->base.st = key->st;
 856
 857          unsigned num_inputs = stvp->num_inputs + key->passthrough_edgeflags;
 858          for (unsigned index = 0; index < num_inputs; ++index) {
 859             unsigned attr = stvp->index_to_input[index];
 860             if (attr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
 861                continue;
 862             vpv->vert_attrib_mask |= 1u << attr;
 863          }
 864
 865          /* insert into list */
 866          vpv->base.next = stp->variants;
 867          stp->variants = &vpv->base;
 868       }
 869    }
 870
 871    return vpv;
 872 }
 873
 874
 875 /**
 876  * Translate a Mesa fragment shader into a TGSI shader.
 877  */
 878 bool
 879 st_translate_fragment_program(struct st_context *st,
 880                               struct st_program *stfp)
 881 {
 882    /* Non-GLSL programs: */
 883    if (!stfp->glsl_to_tgsi) {
 884       _mesa_remove_output_reads(&stfp->Base, PROGRAM_OUTPUT);
 885       if (st->ctx->Const.GLSLFragCoordIsSysVal)
 886          _mesa_program_fragment_position_to_sysval(&stfp->Base);
 887
 888       /* This determines which states will be updated when the assembly
 889        * shader is bound.
 890        *
 891        * fragment.position and glDrawPixels always use constants.
 892        */
 893       stfp->affected_states = ST_NEW_FS_STATE |
 894                               ST_NEW_SAMPLE_SHADING |
 895                               ST_NEW_FS_CONSTANTS;
 896
 897       if (stfp->ati_fs) {
 898          /* Just set them for ATI_fs unconditionally. */
 899          stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 900                                   ST_NEW_FS_SAMPLERS;
 901       } else {
 902          /* ARB_fp */
 903          if (stfp->Base.SamplersUsed)
 904             stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 905                                      ST_NEW_FS_SAMPLERS;
 906       }
 907
 908       /* Translate to NIR. */
 909       if (!stfp->ati_fs &&
 910           PIPE_SHADER_IR_NIR ==
 911           st->pipe->screen->get_shader_param(st->pipe->screen,
 912                                              PIPE_SHADER_FRAGMENT,
 913                                              PIPE_SHADER_CAP_PREFERRED_IR)) {
 914          nir_shader *nir =
 915             st_translate_prog_to_nir(st, &stfp->Base, MESA_SHADER_FRAGMENT);
 916
 917          if (stfp->Base.nir)
 918             ralloc_free(stfp->Base.nir);
 919          if (stfp->serialized_nir) {
 920             free(stfp->serialized_nir);
 921             stfp->serialized_nir = NULL;
 922          }
 923          stfp->state.type = PIPE_SHADER_IR_NIR;
 924          stfp->Base.nir = nir;
 925          return true;
 926       }
 927    }
 928
 929    ubyte outputMapping[2 * FRAG_RESULT_MAX];
 930    ubyte inputMapping[VARYING_SLOT_MAX];
 931    ubyte inputSlotToAttr[VARYING_SLOT_MAX];
 932    ubyte interpMode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
 933    GLuint attr;
 934    GLbitfield64 inputsRead;
 935    struct ureg_program *ureg;
 936
 937    GLboolean write_all = GL_FALSE;
 938
 939    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
 940    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
 941    uint fs_num_inputs = 0;
 942
 943    ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
 944    ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
 945    uint fs_num_outputs = 0;
 946
 947    memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
 948
 949    /*
 950     * Convert Mesa program inputs to TGSI input register semantics.
 951     */
 952    inputsRead = stfp->Base.info.inputs_read;
 953    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 954       if ((inputsRead & BITFIELD64_BIT(attr)) != 0) {
 955          const GLuint slot = fs_num_inputs++;
 956
 957          inputMapping[attr] = slot;
 958          inputSlotToAttr[slot] = attr;
 959
 960          switch (attr) {
 961          case VARYING_SLOT_POS:
 962             input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
 963             input_semantic_index[slot] = 0;
 964             interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 965             break;
 966          case VARYING_SLOT_COL0:
 967             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 968             input_semantic_index[slot] = 0;
 969             interpMode[slot] = stfp->glsl_to_tgsi ?
 970                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 971             break;
 972          case VARYING_SLOT_COL1:
 973             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 974             input_semantic_index[slot] = 1;
 975             interpMode[slot] = stfp->glsl_to_tgsi ?
 976                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 977             break;
 978          case VARYING_SLOT_FOGC:
 979             input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
 980             input_semantic_index[slot] = 0;
 981             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 982             break;
 983          case VARYING_SLOT_FACE:
 984             input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
 985             input_semantic_index[slot] = 0;
 986             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 987             break;
 988          case VARYING_SLOT_PRIMITIVE_ID:
 989             input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
 990             input_semantic_index[slot] = 0;
 991             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 992             break;
 993          case VARYING_SLOT_LAYER:
 994             input_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
 995             input_semantic_index[slot] = 0;
 996             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 997             break;
 998          case VARYING_SLOT_VIEWPORT:
 999             input_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
1000             input_semantic_index[slot] = 0;
1001             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
1002             break;
1003          case VARYING_SLOT_CLIP_DIST0:
1004             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
1005             input_semantic_index[slot] = 0;
1006             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
1007             break;
1008          case VARYING_SLOT_CLIP_DIST1:
1009             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
1010             input_semantic_index[slot] = 1;
1011             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
1012             break;
1013          case VARYING_SLOT_CULL_DIST0:
1014          case VARYING_SLOT_CULL_DIST1:
1015             /* these should have been lowered by GLSL */
1016             assert(0);
1017             break;
1018             /* In most cases, there is nothing special about these
1019              * inputs, so adopt a convention to use the generic
1020              * semantic name and the mesa VARYING_SLOT_ number as the
1021              * index.
1022              *
1023              * All that is required is that the vertex shader labels
1024              * its own outputs similarly, and that the vertex shader
1025              * generates at least every output required by the
1026              * fragment shader plus fixed-function hardware (such as
1027              * BFC).
1028              *
1029              * However, some drivers may need us to identify the PNTC and TEXi
1030              * varyings if, for example, their capability to replace them with
1031              * sprite coordinates is limited.
1032              */
1033          case VARYING_SLOT_PNTC:
1034             if (st->needs_texcoord_semantic) {
1035                input_semantic_name[slot] = TGSI_SEMANTIC_PCOORD;
1036                input_semantic_index[slot] = 0;
1037                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
1038                break;
1039             }
1040             /* fall through */
1041          case VARYING_SLOT_TEX0:
1042          case VARYING_SLOT_TEX1:
1043          case VARYING_SLOT_TEX2:
1044          case VARYING_SLOT_TEX3:
1045          case VARYING_SLOT_TEX4:
1046          case VARYING_SLOT_TEX5:
1047          case VARYING_SLOT_TEX6:
1048          case VARYING_SLOT_TEX7:
1049             if (st->needs_texcoord_semantic) {
1050                input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
1051                input_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
1052                interpMode[slot] = stfp->glsl_to_tgsi ?
1053                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
1054                break;
1055             }
1056             /* fall through */
1057          case VARYING_SLOT_VAR0:
1058          default:
1059             /* Semantic indices should be zero-based because drivers may choose
1060              * to assign a fixed slot determined by that index.
1061              * This is useful because ARB_separate_shader_objects uses location
1062              * qualifiers for linkage, and if the semantic index corresponds to
1063              * these locations, linkage passes in the driver become unecessary.
1064              *
1065              * If needs_texcoord_semantic is true, no semantic indices will be
1066              * consumed for the TEXi varyings, and we can base the locations of
1067              * the user varyings on VAR0.  Otherwise, we use TEX0 as base index.
1068              */
1069             assert(attr >= VARYING_SLOT_VAR0 || attr == VARYING_SLOT_PNTC ||
1070                    (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
1071             input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
1072             input_semantic_index[slot] = st_get_generic_varying_index(st, attr);
1073             if (attr == VARYING_SLOT_PNTC)
1074                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
1075             else {
1076                interpMode[slot] = stfp->glsl_to_tgsi ?
1077                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
1078             }
1079             break;
1080          }
1081       }
1082       else {
1083          inputMapping[attr] = -1;
1084       }
1085    }
1086
1087    /*
1088     * Semantics and mapping for outputs
1089     */
1090    GLbitfield64 outputsWritten = stfp->Base.info.outputs_written;
1091
1092    /* if z is written, emit that first */
1093    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
1094       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
1095       fs_output_semantic_index[fs_num_outputs] = 0;
1096       outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
1097       fs_num_outputs++;
1098       outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
1099    }
1100
1101    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
1102       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
1103       fs_output_semantic_index[fs_num_outputs] = 0;
1104       outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
1105       fs_num_outputs++;
1106       outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
1107    }
1108
1109    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
1110       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_SAMPLEMASK;
1111       fs_output_semantic_index[fs_num_outputs] = 0;
1112       outputMapping[FRAG_RESULT_SAMPLE_MASK] = fs_num_outputs;
1113       fs_num_outputs++;
1114       outputsWritten &= ~(1 << FRAG_RESULT_SAMPLE_MASK);
1115    }
1116
1117    /* handle remaining outputs (color) */
1118    for (attr = 0; attr < ARRAY_SIZE(outputMapping); attr++) {
1119       const GLbitfield64 written = attr < FRAG_RESULT_MAX ? outputsWritten :
1120          stfp->Base.SecondaryOutputsWritten;
1121       const unsigned loc = attr % FRAG_RESULT_MAX;
1122
1123       if (written & BITFIELD64_BIT(loc)) {
1124          switch (loc) {
1125          case FRAG_RESULT_DEPTH:
1126          case FRAG_RESULT_STENCIL:
1127          case FRAG_RESULT_SAMPLE_MASK:
1128             /* handled above */
1129             assert(0);
1130             break;
1131          case FRAG_RESULT_COLOR:
1132             write_all = GL_TRUE; /* fallthrough */
1133          default: {
1134             int index;
1135             assert(loc == FRAG_RESULT_COLOR ||
1136                    (FRAG_RESULT_DATA0 <= loc && loc < FRAG_RESULT_MAX));
1137
1138             index = (loc == FRAG_RESULT_COLOR) ? 0 : (loc - FRAG_RESULT_DATA0);
1139
1140             if (attr >= FRAG_RESULT_MAX) {
1141                /* Secondary color for dual source blending. */
1142                assert(index == 0);
1143                index++;
1144             }
1145
1146             fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
1147             fs_output_semantic_index[fs_num_outputs] = index;
1148             outputMapping[attr] = fs_num_outputs;
1149             break;
1150          }
1151          }
1152
1153          fs_num_outputs++;
1154       }
1155    }
1156
1157    ureg = ureg_create_with_screen(PIPE_SHADER_FRAGMENT, st->pipe->screen);
1158    if (ureg == NULL)
1159       return false;
1160
1161    if (ST_DEBUG & DEBUG_MESA) {
1162       _mesa_print_program(&stfp->Base);
1163       _mesa_print_program_parameters(st->ctx, &stfp->Base);
1164       debug_printf("\n");
1165    }
1166    if (write_all == GL_TRUE)
1167       ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1);
1168
1169    if (stfp->Base.info.fs.depth_layout != FRAG_DEPTH_LAYOUT_NONE) {
1170       switch (stfp->Base.info.fs.depth_layout) {
1171       case FRAG_DEPTH_LAYOUT_ANY:
1172          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1173                        TGSI_FS_DEPTH_LAYOUT_ANY);
1174          break;
1175       case FRAG_DEPTH_LAYOUT_GREATER:
1176          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1177                        TGSI_FS_DEPTH_LAYOUT_GREATER);
1178          break;
1179       case FRAG_DEPTH_LAYOUT_LESS:
1180          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1181                        TGSI_FS_DEPTH_LAYOUT_LESS);
1182          break;
1183       case FRAG_DEPTH_LAYOUT_UNCHANGED:
1184          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1185                        TGSI_FS_DEPTH_LAYOUT_UNCHANGED);
1186          break;
1187       default:
1188          assert(0);
1189       }
1190    }
1191
1192    if (stfp->glsl_to_tgsi) {
1193       st_translate_program(st->ctx,
1194                            PIPE_SHADER_FRAGMENT,
1195                            ureg,
1196                            stfp->glsl_to_tgsi,
1197                            &stfp->Base,
1198                            /* inputs */
1199                            fs_num_inputs,
1200                            inputMapping,
1201                            inputSlotToAttr,
1202                            input_semantic_name,
1203                            input_semantic_index,
1204                            interpMode,
1205                            /* outputs */
1206                            fs_num_outputs,
1207                            outputMapping,
1208                            fs_output_semantic_name,
1209                            fs_output_semantic_index);
1210
1211       free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
1212    } else if (stfp->ati_fs)
1213       st_translate_atifs_program(ureg,
1214                                  stfp->ati_fs,
1215                                  &stfp->Base,
1216                                  /* inputs */
1217                                  fs_num_inputs,
1218                                  inputMapping,
1219                                  input_semantic_name,
1220                                  input_semantic_index,
1221                                  interpMode,
1222                                  /* outputs */
1223                                  fs_num_outputs,
1224                                  outputMapping,
1225                                  fs_output_semantic_name,
1226                                  fs_output_semantic_index);
1227    else
1228       st_translate_mesa_program(st->ctx,
1229                                 PIPE_SHADER_FRAGMENT,
1230                                 ureg,
1231                                 &stfp->Base,
1232                                 /* inputs */
1233                                 fs_num_inputs,
1234                                 inputMapping,
1235                                 input_semantic_name,
1236                                 input_semantic_index,
1237                                 interpMode,
1238                                 /* outputs */
1239                                 fs_num_outputs,
1240                                 outputMapping,
1241                                 fs_output_semantic_name,
1242                                 fs_output_semantic_index);
1243
1244    stfp->state.tokens = ureg_get_tokens(ureg, NULL);
1245    ureg_destroy(ureg);
1246
1247    if (stfp->glsl_to_tgsi) {
1248       stfp->glsl_to_tgsi = NULL;
1249       st_store_ir_in_disk_cache(st, &stfp->Base, false);
1250    }
1251
1252    return stfp->state.tokens != NULL;
1253 }
1254
1255 static struct st_fp_variant *
1256 st_create_fp_variant(struct st_context *st,
1257                      struct st_program *stfp,
1258                      const struct st_fp_variant_key *key)
1259 {
1260    struct pipe_context *pipe = st->pipe;
1261    struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
1262    struct pipe_shader_state state = {0};
1263    struct gl_program_parameter_list *params = stfp->Base.Parameters;
1264    static const gl_state_index16 texcoord_state[STATE_LENGTH] =
1265       { STATE_INTERNAL, STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
1266    static const gl_state_index16 scale_state[STATE_LENGTH] =
1267       { STATE_INTERNAL, STATE_PT_SCALE };
1268    static const gl_state_index16 bias_state[STATE_LENGTH] =
1269       { STATE_INTERNAL, STATE_PT_BIAS };
1270    static const gl_state_index16 alpha_ref_state[STATE_LENGTH] =
1271       { STATE_INTERNAL, STATE_ALPHA_REF };
1272
1273    if (!variant)
1274       return NULL;
1275
1276    if (stfp->state.type == PIPE_SHADER_IR_NIR) {
1277       bool finalize = false;
1278
1279       state.type = PIPE_SHADER_IR_NIR;
1280       state.ir.nir = get_nir_shader(st, stfp);
1281
1282       if (key->clamp_color) {
1283          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1284          finalize = true;
1285       }
1286
1287       if (key->lower_flatshade) {
1288          NIR_PASS_V(state.ir.nir, nir_lower_flatshade);
1289          finalize = true;
1290       }
1291
1292       if (key->lower_alpha_func != COMPARE_FUNC_NEVER) {
1293          _mesa_add_state_reference(params, alpha_ref_state);
1294          NIR_PASS_V(state.ir.nir, nir_lower_alpha_test, key->lower_alpha_func,
1295                     false, alpha_ref_state);
1296          finalize = true;
1297       }
1298
1299       if (key->lower_two_sided_color) {
1300          bool face_sysval = st->ctx->Const.GLSLFrontFacingIsSysVal;
1301          NIR_PASS_V(state.ir.nir, nir_lower_two_sided_color, face_sysval);
1302          finalize = true;
1303       }
1304
1305       if (key->persample_shading) {
1306           nir_shader *shader = state.ir.nir;
1307           nir_foreach_shader_in_variable(var, shader)
1308              var->data.sample = true;
1309           finalize = true;
1310       }
1311
1312       assert(!(key->bitmap && key->drawpixels));
1313
1314       /* glBitmap */
1315       if (key->bitmap) {
1316          nir_lower_bitmap_options options = {0};
1317
1318          variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1319          options.sampler = variant->bitmap_sampler;
1320          options.swizzle_xxxx = st->bitmap.tex_format == PIPE_FORMAT_R8_UNORM;
1321
1322          NIR_PASS_V(state.ir.nir, nir_lower_bitmap, &options);
1323          finalize = true;
1324       }
1325
1326       /* glDrawPixels (color only) */
1327       if (key->drawpixels) {
1328          nir_lower_drawpixels_options options = {{0}};
1329          unsigned samplers_used = stfp->Base.SamplersUsed;
1330
1331          /* Find the first unused slot. */
1332          variant->drawpix_sampler = ffs(~samplers_used) - 1;
1333          options.drawpix_sampler = variant->drawpix_sampler;
1334          samplers_used |= (1 << variant->drawpix_sampler);
1335
1336          options.pixel_maps = key->pixelMaps;
1337          if (key->pixelMaps) {
1338             variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1339             options.pixelmap_sampler = variant->pixelmap_sampler;
1340          }
1341
1342          options.scale_and_bias = key->scaleAndBias;
1343          if (key->scaleAndBias) {
1344             _mesa_add_state_reference(params, scale_state);
1345             memcpy(options.scale_state_tokens, scale_state,
1346                    sizeof(options.scale_state_tokens));
1347             _mesa_add_state_reference(params, bias_state);
1348             memcpy(options.bias_state_tokens, bias_state,
1349                    sizeof(options.bias_state_tokens));
1350          }
1351
1352          _mesa_add_state_reference(params, texcoord_state);
1353          memcpy(options.texcoord_state_tokens, texcoord_state,
1354                 sizeof(options.texcoord_state_tokens));
1355
1356          NIR_PASS_V(state.ir.nir, nir_lower_drawpixels, &options);
1357          finalize = true;
1358       }
1359
1360       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1361                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1362                    key->external.lower_ayuv || key->external.lower_xyuv)) {
1363
1364          st_nir_lower_samplers(pipe->screen, state.ir.nir,
1365                                stfp->shader_program, &stfp->Base);
1366
1367          nir_lower_tex_options options = {0};
1368          options.lower_y_uv_external = key->external.lower_nv12;
1369          options.lower_y_u_v_external = key->external.lower_iyuv;
1370          options.lower_xy_uxvx_external = key->external.lower_xy_uxvx;
1371          options.lower_yx_xuxv_external = key->external.lower_yx_xuxv;
1372          options.lower_ayuv_external = key->external.lower_ayuv;
1373          options.lower_xyuv_external = key->external.lower_xyuv;
1374          NIR_PASS_V(state.ir.nir, nir_lower_tex, &options);
1375          finalize = true;
1376       }
1377
1378       if (finalize || !st->allow_st_finalize_nir_twice) {
1379          st_finalize_nir(st, &stfp->Base, stfp->shader_program, state.ir.nir,
1380                          false);
1381       }
1382
1383       /* This pass needs to happen *after* nir_lower_sampler */
1384       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1385                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1386                    key->external.lower_ayuv || key->external.lower_xyuv)) {
1387          NIR_PASS_V(state.ir.nir, st_nir_lower_tex_src_plane,
1388                     ~stfp->Base.SamplersUsed,
1389                     key->external.lower_nv12 || key->external.lower_xy_uxvx ||
1390                        key->external.lower_yx_xuxv,
1391                     key->external.lower_iyuv);
1392          finalize = true;
1393       }
1394
1395       if (finalize || !st->allow_st_finalize_nir_twice) {
1396          /* Some of the lowering above may have introduced new varyings */
1397          nir_shader_gather_info(state.ir.nir,
1398                                 nir_shader_get_entrypoint(state.ir.nir));
1399
1400          struct pipe_screen *screen = pipe->screen;
1401          if (screen->finalize_nir)
1402             screen->finalize_nir(screen, state.ir.nir, false);
1403       }
1404
1405       if (ST_DEBUG & DEBUG_PRINT_IR)
1406          nir_print_shader(state.ir.nir, stderr);
1407
1408       variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1409       variant->key = *key;
1410
1411       return variant;
1412    }
1413
1414    state.tokens = stfp->state.tokens;
1415
1416    assert(!(key->bitmap && key->drawpixels));
1417
1418    /* Fix texture targets and add fog for ATI_fs */
1419    if (stfp->ati_fs) {
1420       const struct tgsi_token *tokens = st_fixup_atifs(state.tokens, key);
1421
1422       if (tokens)
1423          state.tokens = tokens;
1424       else
1425          fprintf(stderr, "mesa: cannot post-process ATI_fs\n");
1426    }
1427
1428    /* Emulate features. */
1429    if (key->clamp_color || key->persample_shading) {
1430       const struct tgsi_token *tokens;
1431       unsigned flags =
1432          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
1433          (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
1434
1435       tokens = tgsi_emulate(state.tokens, flags);
1436
1437       if (tokens) {
1438          if (state.tokens != stfp->state.tokens)
1439             tgsi_free_tokens(state.tokens);
1440          state.tokens = tokens;
1441       } else
1442          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
1443    }
1444
1445    /* glBitmap */
1446    if (key->bitmap) {
1447       const struct tgsi_token *tokens;
1448
1449       variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1450
1451       tokens = st_get_bitmap_shader(state.tokens,
1452                                     st->internal_target,
1453                                     variant->bitmap_sampler,
1454                                     st->needs_texcoord_semantic,
1455                                     st->bitmap.tex_format ==
1456                                     PIPE_FORMAT_R8_UNORM);
1457
1458       if (tokens) {
1459          if (state.tokens != stfp->state.tokens)
1460             tgsi_free_tokens(state.tokens);
1461          state.tokens = tokens;
1462       } else
1463          fprintf(stderr, "mesa: cannot create a shader for glBitmap\n");
1464    }
1465
1466    /* glDrawPixels (color only) */
1467    if (key->drawpixels) {
1468       const struct tgsi_token *tokens;
1469       unsigned scale_const = 0, bias_const = 0, texcoord_const = 0;
1470
1471       /* Find the first unused slot. */
1472       variant->drawpix_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1473
1474       if (key->pixelMaps) {
1475          unsigned samplers_used = stfp->Base.SamplersUsed |
1476                                   (1 << variant->drawpix_sampler);
1477
1478          variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1479       }
1480
1481       if (key->scaleAndBias) {
1482          scale_const = _mesa_add_state_reference(params, scale_state);
1483          bias_const = _mesa_add_state_reference(params, bias_state);
1484       }
1485
1486       texcoord_const = _mesa_add_state_reference(params, texcoord_state);
1487
1488       tokens = st_get_drawpix_shader(state.tokens,
1489                                      st->needs_texcoord_semantic,
1490                                      key->scaleAndBias, scale_const,
1491                                      bias_const, key->pixelMaps,
1492                                      variant->drawpix_sampler,
1493                                      variant->pixelmap_sampler,
1494                                      texcoord_const, st->internal_target);
1495
1496       if (tokens) {
1497          if (state.tokens != stfp->state.tokens)
1498             tgsi_free_tokens(state.tokens);
1499          state.tokens = tokens;
1500       } else
1501          fprintf(stderr, "mesa: cannot create a shader for glDrawPixels\n");
1502    }
1503
1504    if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1505                 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1506       const struct tgsi_token *tokens;
1507
1508       /* samplers inserted would conflict, but this should be unpossible: */
1509       assert(!(key->bitmap || key->drawpixels));
1510
1511       tokens = st_tgsi_lower_yuv(state.tokens,
1512                                  ~stfp->Base.SamplersUsed,
1513                                  key->external.lower_nv12 ||
1514                                     key->external.lower_xy_uxvx ||
1515                                     key->external.lower_yx_xuxv,
1516                                  key->external.lower_iyuv);
1517       if (tokens) {
1518          if (state.tokens != stfp->state.tokens)
1519             tgsi_free_tokens(state.tokens);
1520          state.tokens = tokens;
1521       } else {
1522          fprintf(stderr, "mesa: cannot create a shader for samplerExternalOES\n");
1523       }
1524    }
1525
1526    if (key->lower_depth_clamp) {
1527       unsigned depth_range_const = _mesa_add_state_reference(params, depth_range_state);
1528
1529       const struct tgsi_token *tokens;
1530       tokens = st_tgsi_lower_depth_clamp_fs(state.tokens, depth_range_const);
1531       if (state.tokens != stfp->state.tokens)
1532          tgsi_free_tokens(state.tokens);
1533       state.tokens = tokens;
1534    }
1535
1536    if (ST_DEBUG & DEBUG_PRINT_IR)
1537       tgsi_dump(state.tokens, 0);
1538
1539    /* fill in variant */
1540    variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1541    variant->key = *key;
1542
1543    if (state.tokens != stfp->state.tokens)
1544       tgsi_free_tokens(state.tokens);
1545    return variant;
1546 }
1547
1548 /**
1549  * Translate fragment program if needed.
1550  */
1551 struct st_fp_variant *
1552 st_get_fp_variant(struct st_context *st,
1553                   struct st_program *stfp,
1554                   const struct st_fp_variant_key *key)
1555 {
1556    struct st_fp_variant *fpv;
1557
1558    /* Search for existing variant */
1559    for (fpv = st_fp_variant(stfp->variants); fpv;
1560         fpv = st_fp_variant(fpv->base.next)) {
1561       if (memcmp(&fpv->key, key, sizeof(*key)) == 0) {
1562          break;
1563       }
1564    }
1565
1566    if (!fpv) {
1567       /* create new */
1568       fpv = st_create_fp_variant(st, stfp, key);
1569       if (fpv) {
1570          fpv->base.st = key->st;
1571
1572          if (key->bitmap || key->drawpixels) {
1573             /* Regular variants should always come before the
1574              * bitmap & drawpixels variants, (unless there
1575              * are no regular variants) so that
1576              * st_update_fp can take a fast path when
1577              * shader_has_one_variant is set.
1578              */
1579             if (!stfp->variants) {
1580                stfp->variants = &fpv->base;
1581             } else {
1582                /* insert into list after the first one */
1583                fpv->base.next = stfp->variants->next;
1584                stfp->variants->next = &fpv->base;
1585             }
1586          } else {
1587             /* insert into list */
1588             fpv->base.next = stfp->variants;
1589             stfp->variants = &fpv->base;
1590          }
1591       }
1592    }
1593
1594    return fpv;
1595 }
1596
1597 /**
1598  * Translate a program. This is common code for geometry and tessellation
1599  * shaders.
1600  */
1601 bool
1602 st_translate_common_program(struct st_context *st,
1603                             struct st_program *stp)
1604 {
1605    struct gl_program *prog = &stp->Base;
1606    enum pipe_shader_type stage =
1607       pipe_shader_type_from_mesa(stp->Base.info.stage);
1608    struct ureg_program *ureg = ureg_create_with_screen(stage, st->pipe->screen);
1609
1610    if (ureg == NULL)
1611       return false;
1612
1613    switch (stage) {
1614    case PIPE_SHADER_TESS_CTRL:
1615       ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT,
1616                     stp->Base.info.tess.tcs_vertices_out);
1617       break;
1618
1619    case PIPE_SHADER_TESS_EVAL:
1620       if (stp->Base.info.tess.primitive_mode == GL_ISOLINES)
1621          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, GL_LINES);
1622       else
1623          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE,
1624                        stp->Base.info.tess.primitive_mode);
1625
1626       STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL);
1627       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 ==
1628                     PIPE_TESS_SPACING_FRACTIONAL_ODD);
1629       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 ==
1630                     PIPE_TESS_SPACING_FRACTIONAL_EVEN);
1631
1632       ureg_property(ureg, TGSI_PROPERTY_TES_SPACING,
1633                     (stp->Base.info.tess.spacing + 1) % 3);
1634
1635       ureg_property(ureg, TGSI_PROPERTY_TES_VERTEX_ORDER_CW,
1636                     !stp->Base.info.tess.ccw);
1637       ureg_property(ureg, TGSI_PROPERTY_TES_POINT_MODE,
1638                     stp->Base.info.tess.point_mode);
1639       break;
1640
1641    case PIPE_SHADER_GEOMETRY:
1642       ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM,
1643                     stp->Base.info.gs.input_primitive);
1644       ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM,
1645                     stp->Base.info.gs.output_primitive);
1646       ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
1647                     stp->Base.info.gs.vertices_out);
1648       ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS,
1649                     stp->Base.info.gs.invocations);
1650       break;
1651
1652    default:
1653       break;
1654    }
1655
1656    ubyte inputSlotToAttr[VARYING_SLOT_TESS_MAX];
1657    ubyte inputMapping[VARYING_SLOT_TESS_MAX];
1658    ubyte outputMapping[VARYING_SLOT_TESS_MAX];
1659    GLuint attr;
1660
1661    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
1662    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
1663    uint num_inputs = 0;
1664
1665    ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
1666    ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
1667    uint num_outputs = 0;
1668
1669    GLint i;
1670
1671    memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr));
1672    memset(inputMapping, 0, sizeof(inputMapping));
1673    memset(outputMapping, 0, sizeof(outputMapping));
1674    memset(&stp->state, 0, sizeof(stp->state));
1675
1676    if (prog->info.clip_distance_array_size)
1677       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
1678                     prog->info.clip_distance_array_size);
1679    if (prog->info.cull_distance_array_size)
1680       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
1681                     prog->info.cull_distance_array_size);
1682
1683    /*
1684     * Convert Mesa program inputs to TGSI input register semantics.
1685     */
1686    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1687       if ((prog->info.inputs_read & BITFIELD64_BIT(attr)) == 0)
1688          continue;
1689
1690       unsigned slot = num_inputs++;
1691
1692       inputMapping[attr] = slot;
1693       inputSlotToAttr[slot] = attr;
1694
1695       unsigned semantic_name, semantic_index;
1696       tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1697                                    &semantic_name, &semantic_index);
1698       input_semantic_name[slot] = semantic_name;
1699       input_semantic_index[slot] = semantic_index;
1700    }
1701
1702    /* Also add patch inputs. */
1703    for (attr = 0; attr < 32; attr++) {
1704       if (prog->info.patch_inputs_read & (1u << attr)) {
1705          GLuint slot = num_inputs++;
1706          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1707
1708          inputMapping[patch_attr] = slot;
1709          inputSlotToAttr[slot] = patch_attr;
1710          input_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1711          input_semantic_index[slot] = attr;
1712       }
1713    }
1714
1715    /* initialize output semantics to defaults */
1716    for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
1717       output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
1718       output_semantic_index[i] = 0;
1719    }
1720
1721    /*
1722     * Determine number of outputs, the (default) output register
1723     * mapping and the semantic information for each output.
1724     */
1725    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1726       if (prog->info.outputs_written & BITFIELD64_BIT(attr)) {
1727          GLuint slot = num_outputs++;
1728
1729          outputMapping[attr] = slot;
1730
1731          unsigned semantic_name, semantic_index;
1732          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1733                                       &semantic_name, &semantic_index);
1734          output_semantic_name[slot] = semantic_name;
1735          output_semantic_index[slot] = semantic_index;
1736       }
1737    }
1738
1739    /* Also add patch outputs. */
1740    for (attr = 0; attr < 32; attr++) {
1741       if (prog->info.patch_outputs_written & (1u << attr)) {
1742          GLuint slot = num_outputs++;
1743          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1744
1745          outputMapping[patch_attr] = slot;
1746          output_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1747          output_semantic_index[slot] = attr;
1748       }
1749    }
1750
1751    st_translate_program(st->ctx,
1752                         stage,
1753                         ureg,
1754                         stp->glsl_to_tgsi,
1755                         prog,
1756                         /* inputs */
1757                         num_inputs,
1758                         inputMapping,
1759                         inputSlotToAttr,
1760                         input_semantic_name,
1761                         input_semantic_index,
1762                         NULL,
1763                         /* outputs */
1764                         num_outputs,
1765                         outputMapping,
1766                         output_semantic_name,
1767                         output_semantic_index);
1768
1769    stp->state.tokens = ureg_get_tokens(ureg, NULL);
1770
1771    ureg_destroy(ureg);
1772
1773    st_translate_stream_output_info(prog);
1774
1775    st_store_ir_in_disk_cache(st, prog, false);
1776
1777    if (ST_DEBUG & DEBUG_PRINT_IR && ST_DEBUG & DEBUG_MESA)
1778       _mesa_print_program(prog);
1779
1780    free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
1781    stp->glsl_to_tgsi = NULL;
1782    return true;
1783 }
1784
1785
1786 /**
1787  * Get/create a basic program variant.
1788  */
1789 struct st_variant *
1790 st_get_common_variant(struct st_context *st,
1791                       struct st_program *prog,
1792                       const struct st_common_variant_key *key)
1793 {
1794    struct pipe_context *pipe = st->pipe;
1795    struct st_variant *v;
1796    struct pipe_shader_state state = {0};
1797    struct gl_program_parameter_list *params = prog->Base.Parameters;
1798
1799    /* Search for existing variant */
1800    for (v = prog->variants; v; v = v->next) {
1801       if (memcmp(&st_common_variant(v)->key, key, sizeof(*key)) == 0)
1802          break;
1803    }
1804
1805    if (!v) {
1806       /* create new */
1807       v = (struct st_variant*)CALLOC_STRUCT(st_common_variant);
1808       if (v) {
1809          if (prog->state.type == PIPE_SHADER_IR_NIR) {
1810             bool finalize = false;
1811
1812             state.type = PIPE_SHADER_IR_NIR;
1813             state.ir.nir = get_nir_shader(st, prog);
1814
1815             if (key->clamp_color) {
1816                NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1817                finalize = true;
1818             }
1819
1820             if (key->lower_ucp) {
1821                lower_ucp(st, state.ir.nir, key->lower_ucp, params);
1822                finalize = true;
1823             }
1824
1825             state.stream_output = prog->state.stream_output;
1826
1827             if (finalize || !st->allow_st_finalize_nir_twice) {
1828                st_finalize_nir(st, &prog->Base, prog->shader_program,
1829                                state.ir.nir, true);
1830             }
1831
1832             if (ST_DEBUG & DEBUG_PRINT_IR)
1833                nir_print_shader(state.ir.nir, stderr);
1834          } else {
1835             if (key->lower_depth_clamp) {
1836                struct gl_program_parameter_list *params = prog->Base.Parameters;
1837
1838                unsigned depth_range_const =
1839                      _mesa_add_state_reference(params, depth_range_state);
1840
1841                const struct tgsi_token *tokens;
1842                tokens =
1843                      st_tgsi_lower_depth_clamp(prog->state.tokens,
1844                                                depth_range_const,
1845                                                key->clip_negative_one_to_one);
1846
1847                if (tokens != prog->state.tokens)
1848                   tgsi_free_tokens(prog->state.tokens);
1849
1850                prog->state.tokens = tokens;
1851             }
1852             state = prog->state;
1853
1854             if (ST_DEBUG & DEBUG_PRINT_IR)
1855                tgsi_dump(state.tokens, 0);
1856          }
1857          /* fill in new variant */
1858          switch (prog->Base.info.stage) {
1859          case MESA_SHADER_TESS_CTRL:
1860             v->driver_shader = pipe->create_tcs_state(pipe, &state);
1861             break;
1862          case MESA_SHADER_TESS_EVAL:
1863             v->driver_shader = pipe->create_tes_state(pipe, &state);
1864             break;
1865          case MESA_SHADER_GEOMETRY:
1866             v->driver_shader = pipe->create_gs_state(pipe, &state);
1867             break;
1868          case MESA_SHADER_COMPUTE: {
1869             struct pipe_compute_state cs = {0};
1870             cs.ir_type = state.type;
1871             cs.req_local_mem = prog->Base.info.cs.shared_size;
1872
1873             if (state.type == PIPE_SHADER_IR_NIR)
1874                cs.prog = state.ir.nir;
1875             else
1876                cs.prog = state.tokens;
1877
1878             v->driver_shader = pipe->create_compute_state(pipe, &cs);
1879             break;
1880          }
1881          default:
1882             assert(!"unhandled shader type");
1883             free(v);
1884             return NULL;
1885          }
1886
1887          st_common_variant(v)->key = *key;
1888          v->st = key->st;
1889
1890          /* insert into list */
1891          v->next = prog->variants;
1892          prog->variants = v;
1893       }
1894    }
1895
1896    return v;
1897 }
1898
1899
1900 /**
1901  * Vert/Geom/Frag programs have per-context variants.  Free all the
1902  * variants attached to the given program which match the given context.
1903  */
1904 static void
1905 destroy_program_variants(struct st_context *st, struct gl_program *target)
1906 {
1907    if (!target || target == &_mesa_DummyProgram)
1908       return;
1909
1910    struct st_program *p = st_program(target);
1911    struct st_variant *v, **prevPtr = &p->variants;
1912    bool unbound = false;
1913
1914    for (v = p->variants; v; ) {
1915       struct st_variant *next = v->next;
1916       if (v->st == st) {
1917          if (!unbound) {
1918             st_unbind_program(st, p);
1919             unbound = true;
1920          }
1921
1922          /* unlink from list */
1923          *prevPtr = next;
1924          /* destroy this variant */
1925          delete_variant(st, v, target->Target);
1926       }
1927       else {
1928          prevPtr = &v->next;
1929       }
1930       v = next;
1931    }
1932 }
1933
1934
1935 /**
1936  * Callback for _mesa_HashWalk.  Free all the shader's program variants
1937  * which match the given context.
1938  */
1939 static void
1940 destroy_shader_program_variants_cb(GLuint key, void *data, void *userData)
1941 {
1942    struct st_context *st = (struct st_context *) userData;
1943    struct gl_shader *shader = (struct gl_shader *) data;
1944
1945    switch (shader->Type) {
1946    case GL_SHADER_PROGRAM_MESA:
1947       {
1948          struct gl_shader_program *shProg = (struct gl_shader_program *) data;
1949          GLuint i;
1950
1951          for (i = 0; i < ARRAY_SIZE(shProg->_LinkedShaders); i++) {
1952             if (shProg->_LinkedShaders[i])
1953                destroy_program_variants(st, shProg->_LinkedShaders[i]->Program);
1954          }
1955       }
1956       break;
1957    case GL_VERTEX_SHADER:
1958    case GL_FRAGMENT_SHADER:
1959    case GL_GEOMETRY_SHADER:
1960    case GL_TESS_CONTROL_SHADER:
1961    case GL_TESS_EVALUATION_SHADER:
1962    case GL_COMPUTE_SHADER:
1963       break;
1964    default:
1965       assert(0);
1966    }
1967 }
1968
1969
1970 /**
1971  * Callback for _mesa_HashWalk.  Free all the program variants which match
1972  * the given context.
1973  */
1974 static void
1975 destroy_program_variants_cb(GLuint key, void *data, void *userData)
1976 {
1977    struct st_context *st = (struct st_context *) userData;
1978    struct gl_program *program = (struct gl_program *) data;
1979    destroy_program_variants(st, program);
1980 }
1981
1982
1983 /**
1984  * Walk over all shaders and programs to delete any variants which
1985  * belong to the given context.
1986  * This is called during context tear-down.
1987  */
1988 void
1989 st_destroy_program_variants(struct st_context *st)
1990 {
1991    /* If shaders can be shared with other contexts, the last context will
1992     * call DeleteProgram on all shaders, releasing everything.
1993     */
1994    if (st->has_shareable_shaders)
1995       return;
1996
1997    /* ARB vert/frag program */
1998    _mesa_HashWalk(st->ctx->Shared->Programs,
1999                   destroy_program_variants_cb, st);
2000
2001    /* GLSL vert/frag/geom shaders */
2002    _mesa_HashWalk(st->ctx->Shared->ShaderObjects,
2003                   destroy_shader_program_variants_cb, st);
2004 }
2005
2006
2007 /**
2008  * Compile one shader variant.
2009  */
2010 static void
2011 st_precompile_shader_variant(struct st_context *st,
2012                              struct gl_program *prog)
2013 {
2014    switch (prog->Target) {
2015    case GL_VERTEX_PROGRAM_ARB: {
2016       struct st_program *p = (struct st_program *)prog;
2017       struct st_common_variant_key key;
2018
2019       memset(&key, 0, sizeof(key));
2020
2021       key.st = st->has_shareable_shaders ? NULL : st;
2022       st_get_vp_variant(st, p, &key);
2023       break;
2024    }
2025
2026    case GL_FRAGMENT_PROGRAM_ARB: {
2027       struct st_program *p = (struct st_program *)prog;
2028       struct st_fp_variant_key key;
2029
2030       memset(&key, 0, sizeof(key));
2031
2032       key.st = st->has_shareable_shaders ? NULL : st;
2033       st_get_fp_variant(st, p, &key);
2034       break;
2035    }
2036
2037    case GL_TESS_CONTROL_PROGRAM_NV:
2038    case GL_TESS_EVALUATION_PROGRAM_NV:
2039    case GL_GEOMETRY_PROGRAM_NV:
2040    case GL_COMPUTE_PROGRAM_NV: {
2041       struct st_program *p = st_program(prog);
2042       struct st_common_variant_key key;
2043
2044       memset(&key, 0, sizeof(key));
2045
2046       key.st = st->has_shareable_shaders ? NULL : st;
2047       st_get_common_variant(st, p, &key);
2048       break;
2049    }
2050
2051    default:
2052       assert(0);
2053    }
2054 }
2055
2056 void
2057 st_serialize_nir(struct st_program *stp)
2058 {
2059    if (!stp->serialized_nir) {
2060       struct blob blob;
2061       size_t size;
2062
2063       blob_init(&blob);
2064       nir_serialize(&blob, stp->Base.nir, false);
2065       blob_finish_get_buffer(&blob, &stp->serialized_nir, &size);
2066       stp->serialized_nir_size = size;
2067    }
2068 }
2069
2070 void
2071 st_finalize_program(struct st_context *st, struct gl_program *prog)
2072 {
2073    if (st->current_program[prog->info.stage] == prog) {
2074       if (prog->info.stage == MESA_SHADER_VERTEX)
2075          st->dirty |= ST_NEW_VERTEX_PROGRAM(st, (struct st_program *)prog);
2076       else
2077          st->dirty |= ((struct st_program *)prog)->affected_states;
2078    }
2079
2080    if (prog->nir) {
2081       nir_sweep(prog->nir);
2082
2083       /* This is only needed for ARB_vp/fp programs and when the disk cache
2084        * is disabled. If the disk cache is enabled, GLSL programs are
2085        * serialized in write_nir_to_cache.
2086        */
2087       st_serialize_nir(st_program(prog));
2088    }
2089
2090    /* Create Gallium shaders now instead of on demand. */
2091    if (ST_DEBUG & DEBUG_PRECOMPILE ||
2092        st->shader_has_one_variant[prog->info.stage])
2093       st_precompile_shader_variant(st, prog);
2094 }