src/mesa/state_tracker/st_program.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keithw@vmware.com>
  30   *   Brian Paul
  31   */
  32
  33
  34 #include "main/errors.h"
  35 #include "util/imports.h"
  36 #include "main/hash.h"
  37 #include "main/mtypes.h"
  38 #include "program/prog_parameter.h"
  39 #include "program/prog_print.h"
  40 #include "program/prog_to_nir.h"
  41 #include "program/programopt.h"
  42
  43 #include "compiler/nir/nir.h"
  44 #include "compiler/nir/nir_serialize.h"
  45 #include "draw/draw_context.h"
  46
  47 #include "pipe/p_context.h"
  48 #include "pipe/p_defines.h"
  49 #include "pipe/p_shader_tokens.h"
  50 #include "draw/draw_context.h"
  51 #include "tgsi/tgsi_dump.h"
  52 #include "tgsi/tgsi_emulate.h"
  53 #include "tgsi/tgsi_parse.h"
  54 #include "tgsi/tgsi_ureg.h"
  55
  56 #include "st_debug.h"
  57 #include "st_cb_bitmap.h"
  58 #include "st_cb_drawpixels.h"
  59 #include "st_context.h"
  60 #include "st_tgsi_lower_depth_clamp.h"
  61 #include "st_tgsi_lower_yuv.h"
  62 #include "st_program.h"
  63 #include "st_mesa_to_tgsi.h"
  64 #include "st_atifs_to_tgsi.h"
  65 #include "st_nir.h"
  66 #include "st_shader_cache.h"
  67 #include "st_util.h"
  68 #include "cso_cache/cso_context.h"
  69
  70
  71
  72 static void
  73 set_affected_state_flags(uint64_t *states,
  74                          struct gl_program *prog,
  75                          uint64_t new_constants,
  76                          uint64_t new_sampler_views,
  77                          uint64_t new_samplers,
  78                          uint64_t new_images,
  79                          uint64_t new_ubos,
  80                          uint64_t new_ssbos,
  81                          uint64_t new_atomics)
  82 {
  83    if (prog->Parameters->NumParameters)
  84       *states |= new_constants;
  85
  86    if (prog->info.num_textures)
  87       *states |= new_sampler_views | new_samplers;
  88
  89    if (prog->info.num_images)
  90       *states |= new_images;
  91
  92    if (prog->info.num_ubos)
  93       *states |= new_ubos;
  94
  95    if (prog->info.num_ssbos)
  96       *states |= new_ssbos;
  97
  98    if (prog->info.num_abos)
  99       *states |= new_atomics;
 100 }
 101
 102 /**
 103  * This determines which states will be updated when the shader is bound.
 104  */
 105 void
 106 st_set_prog_affected_state_flags(struct gl_program *prog)
 107 {
 108    uint64_t *states;
 109
 110    switch (prog->info.stage) {
 111    case MESA_SHADER_VERTEX:
 112       states = &((struct st_program*)prog)->affected_states;
 113
 114       *states = ST_NEW_VS_STATE |
 115                 ST_NEW_RASTERIZER |
 116                 ST_NEW_VERTEX_ARRAYS;
 117
 118       set_affected_state_flags(states, prog,
 119                                ST_NEW_VS_CONSTANTS,
 120                                ST_NEW_VS_SAMPLER_VIEWS,
 121                                ST_NEW_VS_SAMPLERS,
 122                                ST_NEW_VS_IMAGES,
 123                                ST_NEW_VS_UBOS,
 124                                ST_NEW_VS_SSBOS,
 125                                ST_NEW_VS_ATOMICS);
 126       break;
 127
 128    case MESA_SHADER_TESS_CTRL:
 129       states = &(st_program(prog))->affected_states;
 130
 131       *states = ST_NEW_TCS_STATE;
 132
 133       set_affected_state_flags(states, prog,
 134                                ST_NEW_TCS_CONSTANTS,
 135                                ST_NEW_TCS_SAMPLER_VIEWS,
 136                                ST_NEW_TCS_SAMPLERS,
 137                                ST_NEW_TCS_IMAGES,
 138                                ST_NEW_TCS_UBOS,
 139                                ST_NEW_TCS_SSBOS,
 140                                ST_NEW_TCS_ATOMICS);
 141       break;
 142
 143    case MESA_SHADER_TESS_EVAL:
 144       states = &(st_program(prog))->affected_states;
 145
 146       *states = ST_NEW_TES_STATE |
 147                 ST_NEW_RASTERIZER;
 148
 149       set_affected_state_flags(states, prog,
 150                                ST_NEW_TES_CONSTANTS,
 151                                ST_NEW_TES_SAMPLER_VIEWS,
 152                                ST_NEW_TES_SAMPLERS,
 153                                ST_NEW_TES_IMAGES,
 154                                ST_NEW_TES_UBOS,
 155                                ST_NEW_TES_SSBOS,
 156                                ST_NEW_TES_ATOMICS);
 157       break;
 158
 159    case MESA_SHADER_GEOMETRY:
 160       states = &(st_program(prog))->affected_states;
 161
 162       *states = ST_NEW_GS_STATE |
 163                 ST_NEW_RASTERIZER;
 164
 165       set_affected_state_flags(states, prog,
 166                                ST_NEW_GS_CONSTANTS,
 167                                ST_NEW_GS_SAMPLER_VIEWS,
 168                                ST_NEW_GS_SAMPLERS,
 169                                ST_NEW_GS_IMAGES,
 170                                ST_NEW_GS_UBOS,
 171                                ST_NEW_GS_SSBOS,
 172                                ST_NEW_GS_ATOMICS);
 173       break;
 174
 175    case MESA_SHADER_FRAGMENT:
 176       states = &((struct st_program*)prog)->affected_states;
 177
 178       /* gl_FragCoord and glDrawPixels always use constants. */
 179       *states = ST_NEW_FS_STATE |
 180                 ST_NEW_SAMPLE_SHADING |
 181                 ST_NEW_FS_CONSTANTS;
 182
 183       set_affected_state_flags(states, prog,
 184                                ST_NEW_FS_CONSTANTS,
 185                                ST_NEW_FS_SAMPLER_VIEWS,
 186                                ST_NEW_FS_SAMPLERS,
 187                                ST_NEW_FS_IMAGES,
 188                                ST_NEW_FS_UBOS,
 189                                ST_NEW_FS_SSBOS,
 190                                ST_NEW_FS_ATOMICS);
 191       break;
 192
 193    case MESA_SHADER_COMPUTE:
 194       states = &((struct st_program*)prog)->affected_states;
 195
 196       *states = ST_NEW_CS_STATE;
 197
 198       set_affected_state_flags(states, prog,
 199                                ST_NEW_CS_CONSTANTS,
 200                                ST_NEW_CS_SAMPLER_VIEWS,
 201                                ST_NEW_CS_SAMPLERS,
 202                                ST_NEW_CS_IMAGES,
 203                                ST_NEW_CS_UBOS,
 204                                ST_NEW_CS_SSBOS,
 205                                ST_NEW_CS_ATOMICS);
 206       break;
 207
 208    default:
 209       unreachable("unhandled shader stage");
 210    }
 211 }
 212
 213
 214 /**
 215  * Delete a shader variant.  Note the caller must unlink the variant from
 216  * the linked list.
 217  */
 218 static void
 219 delete_variant(struct st_context *st, struct st_variant *v, GLenum target)
 220 {
 221    if (v->driver_shader) {
 222       if (target == GL_VERTEX_PROGRAM_ARB &&
 223           ((struct st_common_variant*)v)->key.is_draw_shader) {
 224          /* Draw shader. */
 225          draw_delete_vertex_shader(st->draw, v->driver_shader);
 226       } else if (st->has_shareable_shaders || v->st == st) {
 227          /* The shader's context matches the calling context, or we
 228           * don't care.
 229           */
 230          switch (target) {
 231          case GL_VERTEX_PROGRAM_ARB:
 232             st->pipe->delete_vs_state(st->pipe, v->driver_shader);
 233             break;
 234          case GL_TESS_CONTROL_PROGRAM_NV:
 235             st->pipe->delete_tcs_state(st->pipe, v->driver_shader);
 236             break;
 237          case GL_TESS_EVALUATION_PROGRAM_NV:
 238             st->pipe->delete_tes_state(st->pipe, v->driver_shader);
 239             break;
 240          case GL_GEOMETRY_PROGRAM_NV:
 241             st->pipe->delete_gs_state(st->pipe, v->driver_shader);
 242             break;
 243          case GL_FRAGMENT_PROGRAM_ARB:
 244             st->pipe->delete_fs_state(st->pipe, v->driver_shader);
 245             break;
 246          case GL_COMPUTE_PROGRAM_NV:
 247             st->pipe->delete_compute_state(st->pipe, v->driver_shader);
 248             break;
 249          default:
 250             unreachable("bad shader type in delete_basic_variant");
 251          }
 252       } else {
 253          /* We can't delete a shader with a context different from the one
 254           * that created it.  Add it to the creating context's zombie list.
 255           */
 256          enum pipe_shader_type type =
 257             pipe_shader_type_from_mesa(_mesa_program_enum_to_shader_stage(target));
 258
 259          st_save_zombie_shader(v->st, type, v->driver_shader);
 260       }
 261    }
 262
 263    free(v);
 264 }
 265
 266 static void
 267 st_unbind_program(struct st_context *st, struct st_program *p)
 268 {
 269    /* Unbind the shader in cso_context and re-bind in st/mesa. */
 270    switch (p->Base.info.stage) {
 271    case MESA_SHADER_VERTEX:
 272       cso_set_vertex_shader_handle(st->cso_context, NULL);
 273       st->dirty |= ST_NEW_VS_STATE;
 274       break;
 275    case MESA_SHADER_TESS_CTRL:
 276       cso_set_tessctrl_shader_handle(st->cso_context, NULL);
 277       st->dirty |= ST_NEW_TCS_STATE;
 278       break;
 279    case MESA_SHADER_TESS_EVAL:
 280       cso_set_tesseval_shader_handle(st->cso_context, NULL);
 281       st->dirty |= ST_NEW_TES_STATE;
 282       break;
 283    case MESA_SHADER_GEOMETRY:
 284       cso_set_geometry_shader_handle(st->cso_context, NULL);
 285       st->dirty |= ST_NEW_GS_STATE;
 286       break;
 287    case MESA_SHADER_FRAGMENT:
 288       cso_set_fragment_shader_handle(st->cso_context, NULL);
 289       st->dirty |= ST_NEW_FS_STATE;
 290       break;
 291    case MESA_SHADER_COMPUTE:
 292       cso_set_compute_shader_handle(st->cso_context, NULL);
 293       st->dirty |= ST_NEW_CS_STATE;
 294       break;
 295    default:
 296       unreachable("invalid shader type");
 297    }
 298 }
 299
 300 /**
 301  * Free all basic program variants.
 302  */
 303 void
 304 st_release_variants(struct st_context *st, struct st_program *p)
 305 {
 306    struct st_variant *v;
 307
 308    /* If we are releasing shaders, re-bind them, because we don't
 309     * know which shaders are bound in the driver.
 310     */
 311    if (p->variants)
 312       st_unbind_program(st, p);
 313
 314    for (v = p->variants; v; ) {
 315       struct st_variant *next = v->next;
 316       delete_variant(st, v, p->Base.Target);
 317       v = next;
 318    }
 319
 320    p->variants = NULL;
 321
 322    if (p->state.tokens) {
 323       ureg_free_tokens(p->state.tokens);
 324       p->state.tokens = NULL;
 325    }
 326
 327    /* Note: Any setup of ->ir.nir that has had pipe->create_*_state called on
 328     * it has resulted in the driver taking ownership of the NIR.  Those
 329     * callers should be NULLing out the nir field in any pipe_shader_state
 330     * that might have this called in order to indicate that.
 331     *
 332     * GLSL IR and ARB programs will have set gl_program->nir to the same
 333     * shader as ir->ir.nir, so it will be freed by _mesa_delete_program().
 334     */
 335 }
 336
 337 /**
 338  * Free all basic program variants and unref program.
 339  */
 340 void
 341 st_release_program(struct st_context *st, struct st_program **p)
 342 {
 343    if (!*p)
 344       return;
 345
 346    st_release_variants(st, *p);
 347    st_reference_prog(st, p, NULL);
 348 }
 349
 350 void
 351 st_finalize_nir_before_variants(struct nir_shader *nir)
 352 {
 353    NIR_PASS_V(nir, nir_opt_access);
 354
 355    NIR_PASS_V(nir, nir_split_var_copies);
 356    NIR_PASS_V(nir, nir_lower_var_copies);
 357    if (nir->options->lower_all_io_to_temps ||
 358        nir->options->lower_all_io_to_elements ||
 359        nir->info.stage == MESA_SHADER_VERTEX ||
 360        nir->info.stage == MESA_SHADER_GEOMETRY) {
 361       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
 362    } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
 363       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
 364    }
 365
 366    st_nir_assign_vs_in_locations(nir);
 367 }
 368
 369 /**
 370  * Translate ARB (asm) program to NIR
 371  */
 372 static nir_shader *
 373 st_translate_prog_to_nir(struct st_context *st, struct gl_program *prog,
 374                          gl_shader_stage stage)
 375 {
 376    struct pipe_screen *screen = st->pipe->screen;
 377    const struct gl_shader_compiler_options *options =
 378       &st->ctx->Const.ShaderCompilerOptions[stage];
 379
 380    /* Translate to NIR */
 381    nir_shader *nir = prog_to_nir(prog, options->NirOptions);
 382    NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
 383    nir_validate_shader(nir, "after st/ptn lower_regs_to_ssa");
 384
 385    NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, prog, screen);
 386    NIR_PASS_V(nir, nir_lower_system_values);
 387
 388    /* Optimise NIR */
 389    NIR_PASS_V(nir, nir_opt_constant_folding);
 390    st_nir_opts(nir);
 391    st_finalize_nir_before_variants(nir);
 392
 393    if (st->allow_st_finalize_nir_twice)
 394       st_finalize_nir(st, prog, NULL, nir, true);
 395
 396    nir_validate_shader(nir, "after st/glsl finalize_nir");
 397
 398    return nir;
 399 }
 400
 401 void
 402 st_prepare_vertex_program(struct st_program *stp)
 403 {
 404    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 405
 406    stvp->num_inputs = 0;
 407    memset(stvp->input_to_index, ~0, sizeof(stvp->input_to_index));
 408    memset(stvp->result_to_output, ~0, sizeof(stvp->result_to_output));
 409
 410    /* Determine number of inputs, the mappings between VERT_ATTRIB_x
 411     * and TGSI generic input indexes, plus input attrib semantic info.
 412     */
 413    for (unsigned attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
 414       if ((stp->Base.info.inputs_read & BITFIELD64_BIT(attr)) != 0) {
 415          stvp->input_to_index[attr] = stvp->num_inputs;
 416          stvp->index_to_input[stvp->num_inputs] = attr;
 417          stvp->num_inputs++;
 418
 419          if ((stp->Base.DualSlotInputs & BITFIELD64_BIT(attr)) != 0) {
 420             /* add placeholder for second part of a double attribute */
 421             stvp->index_to_input[stvp->num_inputs] = ST_DOUBLE_ATTRIB_PLACEHOLDER;
 422             stvp->num_inputs++;
 423          }
 424       }
 425    }
 426    /* pre-setup potentially unused edgeflag input */
 427    stvp->input_to_index[VERT_ATTRIB_EDGEFLAG] = stvp->num_inputs;
 428    stvp->index_to_input[stvp->num_inputs] = VERT_ATTRIB_EDGEFLAG;
 429
 430    /* Compute mapping of vertex program outputs to slots. */
 431    unsigned num_outputs = 0;
 432    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 433       if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr))
 434          stvp->result_to_output[attr] = num_outputs++;
 435    }
 436    /* pre-setup potentially unused edgeflag output */
 437    stvp->result_to_output[VARYING_SLOT_EDGE] = num_outputs;
 438 }
 439
 440 void
 441 st_translate_stream_output_info(struct gl_program *prog)
 442 {
 443    struct gl_transform_feedback_info *info = prog->sh.LinkedTransformFeedback;
 444    if (!info)
 445       return;
 446
 447    /* Determine the (default) output register mapping for each output. */
 448    unsigned num_outputs = 0;
 449    ubyte output_mapping[VARYING_SLOT_TESS_MAX];
 450    memset(output_mapping, 0, sizeof(output_mapping));
 451
 452    for (unsigned attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 453       if (prog->info.outputs_written & BITFIELD64_BIT(attr))
 454          output_mapping[attr] = num_outputs++;
 455    }
 456
 457    /* Translate stream output info. */
 458    struct pipe_stream_output_info *so_info =
 459       &((struct st_program*)prog)->state.stream_output;
 460
 461    for (unsigned i = 0; i < info->NumOutputs; i++) {
 462       so_info->output[i].register_index =
 463          output_mapping[info->Outputs[i].OutputRegister];
 464       so_info->output[i].start_component = info->Outputs[i].ComponentOffset;
 465       so_info->output[i].num_components = info->Outputs[i].NumComponents;
 466       so_info->output[i].output_buffer = info->Outputs[i].OutputBuffer;
 467       so_info->output[i].dst_offset = info->Outputs[i].DstOffset;
 468       so_info->output[i].stream = info->Outputs[i].StreamId;
 469    }
 470
 471    for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
 472       so_info->stride[i] = info->Buffers[i].Stride;
 473    }
 474    so_info->num_outputs = info->NumOutputs;
 475 }
 476
 477 /**
 478  * Translate a vertex program.
 479  */
 480 bool
 481 st_translate_vertex_program(struct st_context *st,
 482                             struct st_program *stp)
 483 {
 484    struct ureg_program *ureg;
 485    enum pipe_error error;
 486    unsigned num_outputs = 0;
 487    unsigned attr;
 488    ubyte output_semantic_name[VARYING_SLOT_MAX] = {0};
 489    ubyte output_semantic_index[VARYING_SLOT_MAX] = {0};
 490
 491    if (stp->Base.arb.IsPositionInvariant)
 492       _mesa_insert_mvp_code(st->ctx, &stp->Base);
 493
 494    /* ARB_vp: */
 495    if (!stp->glsl_to_tgsi) {
 496       _mesa_remove_output_reads(&stp->Base, PROGRAM_OUTPUT);
 497
 498       /* This determines which states will be updated when the assembly
 499        * shader is bound.
 500        */
 501       stp->affected_states = ST_NEW_VS_STATE |
 502                               ST_NEW_RASTERIZER |
 503                               ST_NEW_VERTEX_ARRAYS;
 504
 505       if (stp->Base.Parameters->NumParameters)
 506          stp->affected_states |= ST_NEW_VS_CONSTANTS;
 507
 508       /* Translate to NIR if preferred. */
 509       if (st->pipe->screen->get_shader_param(st->pipe->screen,
 510                                              PIPE_SHADER_VERTEX,
 511                                              PIPE_SHADER_CAP_PREFERRED_IR)) {
 512          assert(!stp->glsl_to_tgsi);
 513
 514          if (stp->Base.nir)
 515             ralloc_free(stp->Base.nir);
 516
 517          if (stp->serialized_nir) {
 518             free(stp->serialized_nir);
 519             stp->serialized_nir = NULL;
 520          }
 521
 522          stp->state.type = PIPE_SHADER_IR_NIR;
 523          stp->Base.nir = st_translate_prog_to_nir(st, &stp->Base,
 524                                                   MESA_SHADER_VERTEX);
 525
 526          /* We must update stp->Base.info after translation and before
 527           * st_prepare_vertex_program is called, because inputs_read
 528           * may become outdated after NIR optimization passes.
 529           *
 530           * For ffvp/ARB_vp inputs_read is populated based
 531           * on declared attributes without taking their usage into
 532           * consideration. When creating shader variants we expect
 533           * that their inputs_read would match the base ones for
 534           * input mapping to work properly.
 535           */
 536          nir_shader_gather_info(stp->Base.nir,
 537                                 nir_shader_get_entrypoint(stp->Base.nir));
 538          st_nir_assign_vs_in_locations(stp->Base.nir);
 539          stp->Base.info = stp->Base.nir->info;
 540
 541          /* For st_draw_feedback, we need to generate TGSI too if draw doesn't
 542           * use LLVM.
 543           */
 544          if (draw_has_llvm()) {
 545             st_prepare_vertex_program(stp);
 546             return true;
 547          }
 548       }
 549    }
 550
 551    st_prepare_vertex_program(stp);
 552
 553    /* Get semantic names and indices. */
 554    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 555       if (stp->Base.info.outputs_written & BITFIELD64_BIT(attr)) {
 556          unsigned slot = num_outputs++;
 557          unsigned semantic_name, semantic_index;
 558          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
 559                                       &semantic_name, &semantic_index);
 560          output_semantic_name[slot] = semantic_name;
 561          output_semantic_index[slot] = semantic_index;
 562       }
 563    }
 564    /* pre-setup potentially unused edgeflag output */
 565    output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
 566    output_semantic_index[num_outputs] = 0;
 567
 568    ureg = ureg_create_with_screen(PIPE_SHADER_VERTEX, st->pipe->screen);
 569    if (ureg == NULL)
 570       return false;
 571
 572    if (stp->Base.info.clip_distance_array_size)
 573       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
 574                     stp->Base.info.clip_distance_array_size);
 575    if (stp->Base.info.cull_distance_array_size)
 576       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
 577                     stp->Base.info.cull_distance_array_size);
 578
 579    if (ST_DEBUG & DEBUG_MESA) {
 580       _mesa_print_program(&stp->Base);
 581       _mesa_print_program_parameters(st->ctx, &stp->Base);
 582       debug_printf("\n");
 583    }
 584
 585    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 586
 587    if (stp->glsl_to_tgsi) {
 588       error = st_translate_program(st->ctx,
 589                                    PIPE_SHADER_VERTEX,
 590                                    ureg,
 591                                    stp->glsl_to_tgsi,
 592                                    &stp->Base,
 593                                    /* inputs */
 594                                    stvp->num_inputs,
 595                                    stvp->input_to_index,
 596                                    NULL, /* inputSlotToAttr */
 597                                    NULL, /* input semantic name */
 598                                    NULL, /* input semantic index */
 599                                    NULL, /* interp mode */
 600                                    /* outputs */
 601                                    num_outputs,
 602                                    stvp->result_to_output,
 603                                    output_semantic_name,
 604                                    output_semantic_index);
 605
 606       st_translate_stream_output_info(&stp->Base);
 607
 608       free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
 609    } else
 610       error = st_translate_mesa_program(st->ctx,
 611                                         PIPE_SHADER_VERTEX,
 612                                         ureg,
 613                                         &stp->Base,
 614                                         /* inputs */
 615                                         stvp->num_inputs,
 616                                         stvp->input_to_index,
 617                                         NULL, /* input semantic name */
 618                                         NULL, /* input semantic index */
 619                                         NULL,
 620                                         /* outputs */
 621                                         num_outputs,
 622                                         stvp->result_to_output,
 623                                         output_semantic_name,
 624                                         output_semantic_index);
 625
 626    if (error) {
 627       debug_printf("%s: failed to translate Mesa program:\n", __func__);
 628       _mesa_print_program(&stp->Base);
 629       debug_assert(0);
 630       return false;
 631    }
 632
 633    stp->state.tokens = ureg_get_tokens(ureg, NULL);
 634    ureg_destroy(ureg);
 635
 636    if (stp->glsl_to_tgsi) {
 637       stp->glsl_to_tgsi = NULL;
 638       st_store_ir_in_disk_cache(st, &stp->Base, false);
 639    }
 640
 641    return stp->state.tokens != NULL;
 642 }
 643
 644 static struct nir_shader *
 645 get_nir_shader(struct st_context *st, struct st_program *stp)
 646 {
 647    if (stp->Base.nir) {
 648       nir_shader *nir = stp->Base.nir;
 649
 650       /* The first shader variant takes ownership of NIR, so that there is
 651        * no cloning. Additional shader variants are always generated from
 652        * serialized NIR to save memory.
 653        */
 654       stp->Base.nir = NULL;
 655       assert(stp->serialized_nir && stp->serialized_nir_size);
 656       return nir;
 657    }
 658
 659    struct blob_reader blob_reader;
 660    const struct nir_shader_compiler_options *options =
 661       st->ctx->Const.ShaderCompilerOptions[stp->Base.info.stage].NirOptions;
 662
 663    blob_reader_init(&blob_reader, stp->serialized_nir, stp->serialized_nir_size);
 664    return nir_deserialize(NULL, options, &blob_reader);
 665 }
 666
 667 static const gl_state_index16 depth_range_state[STATE_LENGTH] =
 668    { STATE_DEPTH_RANGE };
 669
 670 static struct st_common_variant *
 671 st_create_vp_variant(struct st_context *st,
 672                      struct st_program *stvp,
 673                      const struct st_common_variant_key *key)
 674 {
 675    struct st_common_variant *vpv = CALLOC_STRUCT(st_common_variant);
 676    struct pipe_context *pipe = st->pipe;
 677    struct pipe_screen *screen = pipe->screen;
 678    struct pipe_shader_state state = {0};
 679
 680    static const gl_state_index16 point_size_state[STATE_LENGTH] =
 681       { STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED, 0 };
 682    struct gl_program_parameter_list *params = stvp->Base.Parameters;
 683
 684    vpv->key = *key;
 685
 686    state.stream_output = stvp->state.stream_output;
 687
 688    if (stvp->state.type == PIPE_SHADER_IR_NIR &&
 689        (!key->is_draw_shader || draw_has_llvm())) {
 690       bool finalize = false;
 691
 692       state.type = PIPE_SHADER_IR_NIR;
 693       state.ir.nir = get_nir_shader(st, stvp);
 694       if (key->clamp_color) {
 695          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
 696          finalize = true;
 697       }
 698       if (key->passthrough_edgeflags) {
 699          NIR_PASS_V(state.ir.nir, nir_lower_passthrough_edgeflags);
 700          finalize = true;
 701       }
 702
 703       if (key->lower_point_size) {
 704          _mesa_add_state_reference(params, point_size_state);
 705          NIR_PASS_V(state.ir.nir, nir_lower_point_size_mov,
 706                     point_size_state);
 707          finalize = true;
 708       }
 709
 710       if (key->lower_ucp) {
 711          bool can_compact = screen->get_param(screen,
 712                                               PIPE_CAP_NIR_COMPACT_ARRAYS);
 713
 714          bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;
 715          gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH];
 716          for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
 717             if (use_eye) {
 718                clipplane_state[i][0] = STATE_CLIPPLANE;
 719                clipplane_state[i][1] = i;
 720             } else {
 721                clipplane_state[i][0] = STATE_INTERNAL;
 722                clipplane_state[i][1] = STATE_CLIP_INTERNAL;
 723                clipplane_state[i][2] = i;
 724             }
 725             _mesa_add_state_reference(params, clipplane_state[i]);
 726          }
 727
 728          NIR_PASS_V(state.ir.nir, nir_lower_clip_vs, key->lower_ucp,
 729                     true, can_compact, clipplane_state);
 730          NIR_PASS_V(state.ir.nir, nir_lower_io_to_temporaries,
 731                     nir_shader_get_entrypoint(state.ir.nir), true, false);
 732          NIR_PASS_V(state.ir.nir, nir_lower_global_vars_to_local);
 733          finalize = true;
 734       }
 735
 736       if (finalize || !st->allow_st_finalize_nir_twice) {
 737          st_finalize_nir(st, &stvp->Base, stvp->shader_program, state.ir.nir,
 738                          true);
 739
 740          /* Some of the lowering above may have introduced new varyings */
 741          nir_shader_gather_info(state.ir.nir,
 742                                 nir_shader_get_entrypoint(state.ir.nir));
 743       }
 744
 745       if (ST_DEBUG & DEBUG_PRINT_IR)
 746          nir_print_shader(state.ir.nir, stderr);
 747
 748       if (key->is_draw_shader)
 749          vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
 750       else
 751          vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
 752
 753       return vpv;
 754    }
 755
 756    state.type = PIPE_SHADER_IR_TGSI;
 757    state.tokens = tgsi_dup_tokens(stvp->state.tokens);
 758
 759    /* Emulate features. */
 760    if (key->clamp_color || key->passthrough_edgeflags) {
 761       const struct tgsi_token *tokens;
 762       unsigned flags =
 763          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
 764          (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
 765
 766       tokens = tgsi_emulate(state.tokens, flags);
 767
 768       if (tokens) {
 769          tgsi_free_tokens(state.tokens);
 770          state.tokens = tokens;
 771       } else {
 772          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
 773       }
 774    }
 775
 776    if (key->lower_depth_clamp) {
 777       unsigned depth_range_const =
 778             _mesa_add_state_reference(params, depth_range_state);
 779
 780       const struct tgsi_token *tokens;
 781       tokens = st_tgsi_lower_depth_clamp(state.tokens, depth_range_const,
 782                                          key->clip_negative_one_to_one);
 783       if (tokens != state.tokens)
 784          tgsi_free_tokens(state.tokens);
 785       state.tokens = tokens;
 786    }
 787
 788    if (ST_DEBUG & DEBUG_PRINT_IR)
 789       tgsi_dump(state.tokens, 0);
 790
 791    if (key->is_draw_shader)
 792       vpv->base.driver_shader = draw_create_vertex_shader(st->draw, &state);
 793    else
 794       vpv->base.driver_shader = pipe->create_vs_state(pipe, &state);
 795
 796    if (state.tokens) {
 797       tgsi_free_tokens(state.tokens);
 798    }
 799
 800    return vpv;
 801 }
 802
 803
 804 /**
 805  * Find/create a vertex program variant.
 806  */
 807 struct st_common_variant *
 808 st_get_vp_variant(struct st_context *st,
 809                   struct st_program *stp,
 810                   const struct st_common_variant_key *key)
 811 {
 812    struct st_vertex_program *stvp = (struct st_vertex_program *)stp;
 813    struct st_common_variant *vpv;
 814
 815    /* Search for existing variant */
 816    for (vpv = st_common_variant(stp->variants); vpv;
 817         vpv = st_common_variant(vpv->base.next)) {
 818       if (memcmp(&vpv->key, key, sizeof(*key)) == 0) {
 819          break;
 820       }
 821    }
 822
 823    if (!vpv) {
 824       /* create now */
 825       vpv = st_create_vp_variant(st, stp, key);
 826       if (vpv) {
 827          vpv->base.st = key->st;
 828
 829          unsigned num_inputs = stvp->num_inputs + key->passthrough_edgeflags;
 830          for (unsigned index = 0; index < num_inputs; ++index) {
 831             unsigned attr = stvp->index_to_input[index];
 832             if (attr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
 833                continue;
 834             vpv->vert_attrib_mask |= 1u << attr;
 835          }
 836
 837          /* insert into list */
 838          vpv->base.next = stp->variants;
 839          stp->variants = &vpv->base;
 840       }
 841    }
 842
 843    return vpv;
 844 }
 845
 846
 847 /**
 848  * Translate a Mesa fragment shader into a TGSI shader.
 849  */
 850 bool
 851 st_translate_fragment_program(struct st_context *st,
 852                               struct st_program *stfp)
 853 {
 854    /* Non-GLSL programs: */
 855    if (!stfp->glsl_to_tgsi) {
 856       _mesa_remove_output_reads(&stfp->Base, PROGRAM_OUTPUT);
 857       if (st->ctx->Const.GLSLFragCoordIsSysVal)
 858          _mesa_program_fragment_position_to_sysval(&stfp->Base);
 859
 860       /* This determines which states will be updated when the assembly
 861        * shader is bound.
 862        *
 863        * fragment.position and glDrawPixels always use constants.
 864        */
 865       stfp->affected_states = ST_NEW_FS_STATE |
 866                               ST_NEW_SAMPLE_SHADING |
 867                               ST_NEW_FS_CONSTANTS;
 868
 869       if (stfp->ati_fs) {
 870          /* Just set them for ATI_fs unconditionally. */
 871          stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 872                                   ST_NEW_FS_SAMPLERS;
 873       } else {
 874          /* ARB_fp */
 875          if (stfp->Base.SamplersUsed)
 876             stfp->affected_states |= ST_NEW_FS_SAMPLER_VIEWS |
 877                                      ST_NEW_FS_SAMPLERS;
 878       }
 879
 880       /* Translate to NIR. */
 881       if (!stfp->ati_fs &&
 882           st->pipe->screen->get_shader_param(st->pipe->screen,
 883                                              PIPE_SHADER_FRAGMENT,
 884                                              PIPE_SHADER_CAP_PREFERRED_IR)) {
 885          nir_shader *nir =
 886             st_translate_prog_to_nir(st, &stfp->Base, MESA_SHADER_FRAGMENT);
 887
 888          if (stfp->Base.nir)
 889             ralloc_free(stfp->Base.nir);
 890          if (stfp->serialized_nir) {
 891             free(stfp->serialized_nir);
 892             stfp->serialized_nir = NULL;
 893          }
 894          stfp->state.type = PIPE_SHADER_IR_NIR;
 895          stfp->Base.nir = nir;
 896          return true;
 897       }
 898    }
 899
 900    ubyte outputMapping[2 * FRAG_RESULT_MAX];
 901    ubyte inputMapping[VARYING_SLOT_MAX];
 902    ubyte inputSlotToAttr[VARYING_SLOT_MAX];
 903    ubyte interpMode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
 904    GLuint attr;
 905    GLbitfield64 inputsRead;
 906    struct ureg_program *ureg;
 907
 908    GLboolean write_all = GL_FALSE;
 909
 910    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
 911    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
 912    uint fs_num_inputs = 0;
 913
 914    ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
 915    ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
 916    uint fs_num_outputs = 0;
 917
 918    memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
 919
 920    /*
 921     * Convert Mesa program inputs to TGSI input register semantics.
 922     */
 923    inputsRead = stfp->Base.info.inputs_read;
 924    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
 925       if ((inputsRead & BITFIELD64_BIT(attr)) != 0) {
 926          const GLuint slot = fs_num_inputs++;
 927
 928          inputMapping[attr] = slot;
 929          inputSlotToAttr[slot] = attr;
 930
 931          switch (attr) {
 932          case VARYING_SLOT_POS:
 933             input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
 934             input_semantic_index[slot] = 0;
 935             interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
 936             break;
 937          case VARYING_SLOT_COL0:
 938             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 939             input_semantic_index[slot] = 0;
 940             interpMode[slot] = stfp->glsl_to_tgsi ?
 941                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 942             break;
 943          case VARYING_SLOT_COL1:
 944             input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
 945             input_semantic_index[slot] = 1;
 946             interpMode[slot] = stfp->glsl_to_tgsi ?
 947                TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_COLOR;
 948             break;
 949          case VARYING_SLOT_FOGC:
 950             input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
 951             input_semantic_index[slot] = 0;
 952             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 953             break;
 954          case VARYING_SLOT_FACE:
 955             input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
 956             input_semantic_index[slot] = 0;
 957             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 958             break;
 959          case VARYING_SLOT_PRIMITIVE_ID:
 960             input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID;
 961             input_semantic_index[slot] = 0;
 962             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 963             break;
 964          case VARYING_SLOT_LAYER:
 965             input_semantic_name[slot] = TGSI_SEMANTIC_LAYER;
 966             input_semantic_index[slot] = 0;
 967             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 968             break;
 969          case VARYING_SLOT_VIEWPORT:
 970             input_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX;
 971             input_semantic_index[slot] = 0;
 972             interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
 973             break;
 974          case VARYING_SLOT_CLIP_DIST0:
 975             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
 976             input_semantic_index[slot] = 0;
 977             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 978             break;
 979          case VARYING_SLOT_CLIP_DIST1:
 980             input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST;
 981             input_semantic_index[slot] = 1;
 982             interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
 983             break;
 984          case VARYING_SLOT_CULL_DIST0:
 985          case VARYING_SLOT_CULL_DIST1:
 986             /* these should have been lowered by GLSL */
 987             assert(0);
 988             break;
 989             /* In most cases, there is nothing special about these
 990              * inputs, so adopt a convention to use the generic
 991              * semantic name and the mesa VARYING_SLOT_ number as the
 992              * index.
 993              *
 994              * All that is required is that the vertex shader labels
 995              * its own outputs similarly, and that the vertex shader
 996              * generates at least every output required by the
 997              * fragment shader plus fixed-function hardware (such as
 998              * BFC).
 999              *
1000              * However, some drivers may need us to identify the PNTC and TEXi
1001              * varyings if, for example, their capability to replace them with
1002              * sprite coordinates is limited.
1003              */
1004          case VARYING_SLOT_PNTC:
1005             if (st->needs_texcoord_semantic) {
1006                input_semantic_name[slot] = TGSI_SEMANTIC_PCOORD;
1007                input_semantic_index[slot] = 0;
1008                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
1009                break;
1010             }
1011             /* fall through */
1012          case VARYING_SLOT_TEX0:
1013          case VARYING_SLOT_TEX1:
1014          case VARYING_SLOT_TEX2:
1015          case VARYING_SLOT_TEX3:
1016          case VARYING_SLOT_TEX4:
1017          case VARYING_SLOT_TEX5:
1018          case VARYING_SLOT_TEX6:
1019          case VARYING_SLOT_TEX7:
1020             if (st->needs_texcoord_semantic) {
1021                input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD;
1022                input_semantic_index[slot] = attr - VARYING_SLOT_TEX0;
1023                interpMode[slot] = stfp->glsl_to_tgsi ?
1024                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
1025                break;
1026             }
1027             /* fall through */
1028          case VARYING_SLOT_VAR0:
1029          default:
1030             /* Semantic indices should be zero-based because drivers may choose
1031              * to assign a fixed slot determined by that index.
1032              * This is useful because ARB_separate_shader_objects uses location
1033              * qualifiers for linkage, and if the semantic index corresponds to
1034              * these locations, linkage passes in the driver become unecessary.
1035              *
1036              * If needs_texcoord_semantic is true, no semantic indices will be
1037              * consumed for the TEXi varyings, and we can base the locations of
1038              * the user varyings on VAR0.  Otherwise, we use TEX0 as base index.
1039              */
1040             assert(attr >= VARYING_SLOT_VAR0 || attr == VARYING_SLOT_PNTC ||
1041                    (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7));
1042             input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
1043             input_semantic_index[slot] = st_get_generic_varying_index(st, attr);
1044             if (attr == VARYING_SLOT_PNTC)
1045                interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
1046             else {
1047                interpMode[slot] = stfp->glsl_to_tgsi ?
1048                   TGSI_INTERPOLATE_COUNT : TGSI_INTERPOLATE_PERSPECTIVE;
1049             }
1050             break;
1051          }
1052       }
1053       else {
1054          inputMapping[attr] = -1;
1055       }
1056    }
1057
1058    /*
1059     * Semantics and mapping for outputs
1060     */
1061    GLbitfield64 outputsWritten = stfp->Base.info.outputs_written;
1062
1063    /* if z is written, emit that first */
1064    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
1065       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
1066       fs_output_semantic_index[fs_num_outputs] = 0;
1067       outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
1068       fs_num_outputs++;
1069       outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
1070    }
1071
1072    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
1073       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
1074       fs_output_semantic_index[fs_num_outputs] = 0;
1075       outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
1076       fs_num_outputs++;
1077       outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
1078    }
1079
1080    if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
1081       fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_SAMPLEMASK;
1082       fs_output_semantic_index[fs_num_outputs] = 0;
1083       outputMapping[FRAG_RESULT_SAMPLE_MASK] = fs_num_outputs;
1084       fs_num_outputs++;
1085       outputsWritten &= ~(1 << FRAG_RESULT_SAMPLE_MASK);
1086    }
1087
1088    /* handle remaining outputs (color) */
1089    for (attr = 0; attr < ARRAY_SIZE(outputMapping); attr++) {
1090       const GLbitfield64 written = attr < FRAG_RESULT_MAX ? outputsWritten :
1091          stfp->Base.SecondaryOutputsWritten;
1092       const unsigned loc = attr % FRAG_RESULT_MAX;
1093
1094       if (written & BITFIELD64_BIT(loc)) {
1095          switch (loc) {
1096          case FRAG_RESULT_DEPTH:
1097          case FRAG_RESULT_STENCIL:
1098          case FRAG_RESULT_SAMPLE_MASK:
1099             /* handled above */
1100             assert(0);
1101             break;
1102          case FRAG_RESULT_COLOR:
1103             write_all = GL_TRUE; /* fallthrough */
1104          default: {
1105             int index;
1106             assert(loc == FRAG_RESULT_COLOR ||
1107                    (FRAG_RESULT_DATA0 <= loc && loc < FRAG_RESULT_MAX));
1108
1109             index = (loc == FRAG_RESULT_COLOR) ? 0 : (loc - FRAG_RESULT_DATA0);
1110
1111             if (attr >= FRAG_RESULT_MAX) {
1112                /* Secondary color for dual source blending. */
1113                assert(index == 0);
1114                index++;
1115             }
1116
1117             fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
1118             fs_output_semantic_index[fs_num_outputs] = index;
1119             outputMapping[attr] = fs_num_outputs;
1120             break;
1121          }
1122          }
1123
1124          fs_num_outputs++;
1125       }
1126    }
1127
1128    ureg = ureg_create_with_screen(PIPE_SHADER_FRAGMENT, st->pipe->screen);
1129    if (ureg == NULL)
1130       return false;
1131
1132    if (ST_DEBUG & DEBUG_MESA) {
1133       _mesa_print_program(&stfp->Base);
1134       _mesa_print_program_parameters(st->ctx, &stfp->Base);
1135       debug_printf("\n");
1136    }
1137    if (write_all == GL_TRUE)
1138       ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1);
1139
1140    if (stfp->Base.info.fs.depth_layout != FRAG_DEPTH_LAYOUT_NONE) {
1141       switch (stfp->Base.info.fs.depth_layout) {
1142       case FRAG_DEPTH_LAYOUT_ANY:
1143          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1144                        TGSI_FS_DEPTH_LAYOUT_ANY);
1145          break;
1146       case FRAG_DEPTH_LAYOUT_GREATER:
1147          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1148                        TGSI_FS_DEPTH_LAYOUT_GREATER);
1149          break;
1150       case FRAG_DEPTH_LAYOUT_LESS:
1151          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1152                        TGSI_FS_DEPTH_LAYOUT_LESS);
1153          break;
1154       case FRAG_DEPTH_LAYOUT_UNCHANGED:
1155          ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
1156                        TGSI_FS_DEPTH_LAYOUT_UNCHANGED);
1157          break;
1158       default:
1159          assert(0);
1160       }
1161    }
1162
1163    if (stfp->glsl_to_tgsi) {
1164       st_translate_program(st->ctx,
1165                            PIPE_SHADER_FRAGMENT,
1166                            ureg,
1167                            stfp->glsl_to_tgsi,
1168                            &stfp->Base,
1169                            /* inputs */
1170                            fs_num_inputs,
1171                            inputMapping,
1172                            inputSlotToAttr,
1173                            input_semantic_name,
1174                            input_semantic_index,
1175                            interpMode,
1176                            /* outputs */
1177                            fs_num_outputs,
1178                            outputMapping,
1179                            fs_output_semantic_name,
1180                            fs_output_semantic_index);
1181
1182       free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
1183    } else if (stfp->ati_fs)
1184       st_translate_atifs_program(ureg,
1185                                  stfp->ati_fs,
1186                                  &stfp->Base,
1187                                  /* inputs */
1188                                  fs_num_inputs,
1189                                  inputMapping,
1190                                  input_semantic_name,
1191                                  input_semantic_index,
1192                                  interpMode,
1193                                  /* outputs */
1194                                  fs_num_outputs,
1195                                  outputMapping,
1196                                  fs_output_semantic_name,
1197                                  fs_output_semantic_index);
1198    else
1199       st_translate_mesa_program(st->ctx,
1200                                 PIPE_SHADER_FRAGMENT,
1201                                 ureg,
1202                                 &stfp->Base,
1203                                 /* inputs */
1204                                 fs_num_inputs,
1205                                 inputMapping,
1206                                 input_semantic_name,
1207                                 input_semantic_index,
1208                                 interpMode,
1209                                 /* outputs */
1210                                 fs_num_outputs,
1211                                 outputMapping,
1212                                 fs_output_semantic_name,
1213                                 fs_output_semantic_index);
1214
1215    stfp->state.tokens = ureg_get_tokens(ureg, NULL);
1216    ureg_destroy(ureg);
1217
1218    if (stfp->glsl_to_tgsi) {
1219       stfp->glsl_to_tgsi = NULL;
1220       st_store_ir_in_disk_cache(st, &stfp->Base, false);
1221    }
1222
1223    return stfp->state.tokens != NULL;
1224 }
1225
1226 static struct st_fp_variant *
1227 st_create_fp_variant(struct st_context *st,
1228                      struct st_program *stfp,
1229                      const struct st_fp_variant_key *key)
1230 {
1231    struct pipe_context *pipe = st->pipe;
1232    struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
1233    struct pipe_shader_state state = {0};
1234    struct gl_program_parameter_list *params = stfp->Base.Parameters;
1235    static const gl_state_index16 texcoord_state[STATE_LENGTH] =
1236       { STATE_INTERNAL, STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
1237    static const gl_state_index16 scale_state[STATE_LENGTH] =
1238       { STATE_INTERNAL, STATE_PT_SCALE };
1239    static const gl_state_index16 bias_state[STATE_LENGTH] =
1240       { STATE_INTERNAL, STATE_PT_BIAS };
1241    static const gl_state_index16 alpha_ref_state[STATE_LENGTH] =
1242       { STATE_INTERNAL, STATE_ALPHA_REF };
1243
1244    if (!variant)
1245       return NULL;
1246
1247    if (stfp->state.type == PIPE_SHADER_IR_NIR) {
1248       bool finalize = false;
1249
1250       state.type = PIPE_SHADER_IR_NIR;
1251       state.ir.nir = get_nir_shader(st, stfp);
1252
1253       if (key->clamp_color) {
1254          NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1255          finalize = true;
1256       }
1257
1258       if (key->lower_flatshade) {
1259          NIR_PASS_V(state.ir.nir, nir_lower_flatshade);
1260          finalize = true;
1261       }
1262
1263       if (key->lower_alpha_func != COMPARE_FUNC_NEVER) {
1264          _mesa_add_state_reference(params, alpha_ref_state);
1265          NIR_PASS_V(state.ir.nir, nir_lower_alpha_test, key->lower_alpha_func,
1266                     false, alpha_ref_state);
1267          finalize = true;
1268       }
1269
1270       if (key->lower_two_sided_color) {
1271          NIR_PASS_V(state.ir.nir, nir_lower_two_sided_color);
1272          finalize = true;
1273       }
1274
1275       if (key->persample_shading) {
1276           nir_shader *shader = state.ir.nir;
1277           nir_foreach_variable(var, &shader->inputs)
1278              var->data.sample = true;
1279           finalize = true;
1280       }
1281
1282       assert(!(key->bitmap && key->drawpixels));
1283
1284       /* glBitmap */
1285       if (key->bitmap) {
1286          nir_lower_bitmap_options options = {0};
1287
1288          variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1289          options.sampler = variant->bitmap_sampler;
1290          options.swizzle_xxxx = st->bitmap.tex_format == PIPE_FORMAT_R8_UNORM;
1291
1292          NIR_PASS_V(state.ir.nir, nir_lower_bitmap, &options);
1293          finalize = true;
1294       }
1295
1296       /* glDrawPixels (color only) */
1297       if (key->drawpixels) {
1298          nir_lower_drawpixels_options options = {{0}};
1299          unsigned samplers_used = stfp->Base.SamplersUsed;
1300
1301          /* Find the first unused slot. */
1302          variant->drawpix_sampler = ffs(~samplers_used) - 1;
1303          options.drawpix_sampler = variant->drawpix_sampler;
1304          samplers_used |= (1 << variant->drawpix_sampler);
1305
1306          options.pixel_maps = key->pixelMaps;
1307          if (key->pixelMaps) {
1308             variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1309             options.pixelmap_sampler = variant->pixelmap_sampler;
1310          }
1311
1312          options.scale_and_bias = key->scaleAndBias;
1313          if (key->scaleAndBias) {
1314             _mesa_add_state_reference(params, scale_state);
1315             memcpy(options.scale_state_tokens, scale_state,
1316                    sizeof(options.scale_state_tokens));
1317             _mesa_add_state_reference(params, bias_state);
1318             memcpy(options.bias_state_tokens, bias_state,
1319                    sizeof(options.bias_state_tokens));
1320          }
1321
1322          _mesa_add_state_reference(params, texcoord_state);
1323          memcpy(options.texcoord_state_tokens, texcoord_state,
1324                 sizeof(options.texcoord_state_tokens));
1325
1326          NIR_PASS_V(state.ir.nir, nir_lower_drawpixels, &options);
1327          finalize = true;
1328       }
1329
1330       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1331                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1332                    key->external.lower_ayuv || key->external.lower_xyuv)) {
1333
1334          st_nir_lower_samplers(pipe->screen, state.ir.nir,
1335                                stfp->shader_program, &stfp->Base);
1336
1337          nir_lower_tex_options options = {0};
1338          options.lower_y_uv_external = key->external.lower_nv12;
1339          options.lower_y_u_v_external = key->external.lower_iyuv;
1340          options.lower_xy_uxvx_external = key->external.lower_xy_uxvx;
1341          options.lower_yx_xuxv_external = key->external.lower_yx_xuxv;
1342          options.lower_ayuv_external = key->external.lower_ayuv;
1343          options.lower_xyuv_external = key->external.lower_xyuv;
1344          NIR_PASS_V(state.ir.nir, nir_lower_tex, &options);
1345          finalize = true;
1346       }
1347
1348       if (finalize || !st->allow_st_finalize_nir_twice) {
1349          st_finalize_nir(st, &stfp->Base, stfp->shader_program, state.ir.nir,
1350                          false);
1351       }
1352
1353       /* This pass needs to happen *after* nir_lower_sampler */
1354       if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1355                    key->external.lower_xy_uxvx || key->external.lower_yx_xuxv ||
1356                    key->external.lower_ayuv || key->external.lower_xyuv)) {
1357          NIR_PASS_V(state.ir.nir, st_nir_lower_tex_src_plane,
1358                     ~stfp->Base.SamplersUsed,
1359                     key->external.lower_nv12 || key->external.lower_xy_uxvx ||
1360                        key->external.lower_yx_xuxv,
1361                     key->external.lower_iyuv);
1362          finalize = true;
1363       }
1364
1365       if (finalize || !st->allow_st_finalize_nir_twice) {
1366          /* Some of the lowering above may have introduced new varyings */
1367          nir_shader_gather_info(state.ir.nir,
1368                                 nir_shader_get_entrypoint(state.ir.nir));
1369
1370          struct pipe_screen *screen = pipe->screen;
1371          if (screen->finalize_nir)
1372             screen->finalize_nir(screen, state.ir.nir, false);
1373       }
1374
1375       if (ST_DEBUG & DEBUG_PRINT_IR)
1376          nir_print_shader(state.ir.nir, stderr);
1377
1378       variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1379       variant->key = *key;
1380
1381       return variant;
1382    }
1383
1384    state.tokens = stfp->state.tokens;
1385
1386    assert(!(key->bitmap && key->drawpixels));
1387
1388    /* Fix texture targets and add fog for ATI_fs */
1389    if (stfp->ati_fs) {
1390       const struct tgsi_token *tokens = st_fixup_atifs(state.tokens, key);
1391
1392       if (tokens)
1393          state.tokens = tokens;
1394       else
1395          fprintf(stderr, "mesa: cannot post-process ATI_fs\n");
1396    }
1397
1398    /* Emulate features. */
1399    if (key->clamp_color || key->persample_shading) {
1400       const struct tgsi_token *tokens;
1401       unsigned flags =
1402          (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
1403          (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
1404
1405       tokens = tgsi_emulate(state.tokens, flags);
1406
1407       if (tokens) {
1408          if (state.tokens != stfp->state.tokens)
1409             tgsi_free_tokens(state.tokens);
1410          state.tokens = tokens;
1411       } else
1412          fprintf(stderr, "mesa: cannot emulate deprecated features\n");
1413    }
1414
1415    /* glBitmap */
1416    if (key->bitmap) {
1417       const struct tgsi_token *tokens;
1418
1419       variant->bitmap_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1420
1421       tokens = st_get_bitmap_shader(state.tokens,
1422                                     st->internal_target,
1423                                     variant->bitmap_sampler,
1424                                     st->needs_texcoord_semantic,
1425                                     st->bitmap.tex_format ==
1426                                     PIPE_FORMAT_R8_UNORM);
1427
1428       if (tokens) {
1429          if (state.tokens != stfp->state.tokens)
1430             tgsi_free_tokens(state.tokens);
1431          state.tokens = tokens;
1432       } else
1433          fprintf(stderr, "mesa: cannot create a shader for glBitmap\n");
1434    }
1435
1436    /* glDrawPixels (color only) */
1437    if (key->drawpixels) {
1438       const struct tgsi_token *tokens;
1439       unsigned scale_const = 0, bias_const = 0, texcoord_const = 0;
1440
1441       /* Find the first unused slot. */
1442       variant->drawpix_sampler = ffs(~stfp->Base.SamplersUsed) - 1;
1443
1444       if (key->pixelMaps) {
1445          unsigned samplers_used = stfp->Base.SamplersUsed |
1446                                   (1 << variant->drawpix_sampler);
1447
1448          variant->pixelmap_sampler = ffs(~samplers_used) - 1;
1449       }
1450
1451       if (key->scaleAndBias) {
1452          scale_const = _mesa_add_state_reference(params, scale_state);
1453          bias_const = _mesa_add_state_reference(params, bias_state);
1454       }
1455
1456       texcoord_const = _mesa_add_state_reference(params, texcoord_state);
1457
1458       tokens = st_get_drawpix_shader(state.tokens,
1459                                      st->needs_texcoord_semantic,
1460                                      key->scaleAndBias, scale_const,
1461                                      bias_const, key->pixelMaps,
1462                                      variant->drawpix_sampler,
1463                                      variant->pixelmap_sampler,
1464                                      texcoord_const, st->internal_target);
1465
1466       if (tokens) {
1467          if (state.tokens != stfp->state.tokens)
1468             tgsi_free_tokens(state.tokens);
1469          state.tokens = tokens;
1470       } else
1471          fprintf(stderr, "mesa: cannot create a shader for glDrawPixels\n");
1472    }
1473
1474    if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv ||
1475                 key->external.lower_xy_uxvx || key->external.lower_yx_xuxv)) {
1476       const struct tgsi_token *tokens;
1477
1478       /* samplers inserted would conflict, but this should be unpossible: */
1479       assert(!(key->bitmap || key->drawpixels));
1480
1481       tokens = st_tgsi_lower_yuv(state.tokens,
1482                                  ~stfp->Base.SamplersUsed,
1483                                  key->external.lower_nv12 ||
1484                                     key->external.lower_xy_uxvx ||
1485                                     key->external.lower_yx_xuxv,
1486                                  key->external.lower_iyuv);
1487       if (tokens) {
1488          if (state.tokens != stfp->state.tokens)
1489             tgsi_free_tokens(state.tokens);
1490          state.tokens = tokens;
1491       } else {
1492          fprintf(stderr, "mesa: cannot create a shader for samplerExternalOES\n");
1493       }
1494    }
1495
1496    if (key->lower_depth_clamp) {
1497       unsigned depth_range_const = _mesa_add_state_reference(params, depth_range_state);
1498
1499       const struct tgsi_token *tokens;
1500       tokens = st_tgsi_lower_depth_clamp_fs(state.tokens, depth_range_const);
1501       if (state.tokens != stfp->state.tokens)
1502          tgsi_free_tokens(state.tokens);
1503       state.tokens = tokens;
1504    }
1505
1506    if (ST_DEBUG & DEBUG_PRINT_IR)
1507       tgsi_dump(state.tokens, 0);
1508
1509    /* fill in variant */
1510    variant->base.driver_shader = pipe->create_fs_state(pipe, &state);
1511    variant->key = *key;
1512
1513    if (state.tokens != stfp->state.tokens)
1514       tgsi_free_tokens(state.tokens);
1515    return variant;
1516 }
1517
1518 /**
1519  * Translate fragment program if needed.
1520  */
1521 struct st_fp_variant *
1522 st_get_fp_variant(struct st_context *st,
1523                   struct st_program *stfp,
1524                   const struct st_fp_variant_key *key)
1525 {
1526    struct st_fp_variant *fpv;
1527
1528    /* Search for existing variant */
1529    for (fpv = st_fp_variant(stfp->variants); fpv;
1530         fpv = st_fp_variant(fpv->base.next)) {
1531       if (memcmp(&fpv->key, key, sizeof(*key)) == 0) {
1532          break;
1533       }
1534    }
1535
1536    if (!fpv) {
1537       /* create new */
1538       fpv = st_create_fp_variant(st, stfp, key);
1539       if (fpv) {
1540          fpv->base.st = key->st;
1541
1542          if (key->bitmap || key->drawpixels) {
1543             /* Regular variants should always come before the
1544              * bitmap & drawpixels variants, (unless there
1545              * are no regular variants) so that
1546              * st_update_fp can take a fast path when
1547              * shader_has_one_variant is set.
1548              */
1549             if (!stfp->variants) {
1550                stfp->variants = &fpv->base;
1551             } else {
1552                /* insert into list after the first one */
1553                fpv->base.next = stfp->variants->next;
1554                stfp->variants->next = &fpv->base;
1555             }
1556          } else {
1557             /* insert into list */
1558             fpv->base.next = stfp->variants;
1559             stfp->variants = &fpv->base;
1560          }
1561       }
1562    }
1563
1564    return fpv;
1565 }
1566
1567 /**
1568  * Translate a program. This is common code for geometry and tessellation
1569  * shaders.
1570  */
1571 bool
1572 st_translate_common_program(struct st_context *st,
1573                             struct st_program *stp)
1574 {
1575    struct gl_program *prog = &stp->Base;
1576    enum pipe_shader_type stage =
1577       pipe_shader_type_from_mesa(stp->Base.info.stage);
1578    struct ureg_program *ureg = ureg_create_with_screen(stage, st->pipe->screen);
1579
1580    if (ureg == NULL)
1581       return false;
1582
1583    switch (stage) {
1584    case PIPE_SHADER_TESS_CTRL:
1585       ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT,
1586                     stp->Base.info.tess.tcs_vertices_out);
1587       break;
1588
1589    case PIPE_SHADER_TESS_EVAL:
1590       if (stp->Base.info.tess.primitive_mode == GL_ISOLINES)
1591          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, GL_LINES);
1592       else
1593          ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE,
1594                        stp->Base.info.tess.primitive_mode);
1595
1596       STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL);
1597       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 ==
1598                     PIPE_TESS_SPACING_FRACTIONAL_ODD);
1599       STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 ==
1600                     PIPE_TESS_SPACING_FRACTIONAL_EVEN);
1601
1602       ureg_property(ureg, TGSI_PROPERTY_TES_SPACING,
1603                     (stp->Base.info.tess.spacing + 1) % 3);
1604
1605       ureg_property(ureg, TGSI_PROPERTY_TES_VERTEX_ORDER_CW,
1606                     !stp->Base.info.tess.ccw);
1607       ureg_property(ureg, TGSI_PROPERTY_TES_POINT_MODE,
1608                     stp->Base.info.tess.point_mode);
1609       break;
1610
1611    case PIPE_SHADER_GEOMETRY:
1612       ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM,
1613                     stp->Base.info.gs.input_primitive);
1614       ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM,
1615                     stp->Base.info.gs.output_primitive);
1616       ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
1617                     stp->Base.info.gs.vertices_out);
1618       ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS,
1619                     stp->Base.info.gs.invocations);
1620       break;
1621
1622    default:
1623       break;
1624    }
1625
1626    ubyte inputSlotToAttr[VARYING_SLOT_TESS_MAX];
1627    ubyte inputMapping[VARYING_SLOT_TESS_MAX];
1628    ubyte outputMapping[VARYING_SLOT_TESS_MAX];
1629    GLuint attr;
1630
1631    ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
1632    ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
1633    uint num_inputs = 0;
1634
1635    ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
1636    ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
1637    uint num_outputs = 0;
1638
1639    GLint i;
1640
1641    memset(inputSlotToAttr, 0, sizeof(inputSlotToAttr));
1642    memset(inputMapping, 0, sizeof(inputMapping));
1643    memset(outputMapping, 0, sizeof(outputMapping));
1644    memset(&stp->state, 0, sizeof(stp->state));
1645
1646    if (prog->info.clip_distance_array_size)
1647       ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
1648                     prog->info.clip_distance_array_size);
1649    if (prog->info.cull_distance_array_size)
1650       ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
1651                     prog->info.cull_distance_array_size);
1652
1653    /*
1654     * Convert Mesa program inputs to TGSI input register semantics.
1655     */
1656    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1657       if ((prog->info.inputs_read & BITFIELD64_BIT(attr)) == 0)
1658          continue;
1659
1660       unsigned slot = num_inputs++;
1661
1662       inputMapping[attr] = slot;
1663       inputSlotToAttr[slot] = attr;
1664
1665       unsigned semantic_name, semantic_index;
1666       tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1667                                    &semantic_name, &semantic_index);
1668       input_semantic_name[slot] = semantic_name;
1669       input_semantic_index[slot] = semantic_index;
1670    }
1671
1672    /* Also add patch inputs. */
1673    for (attr = 0; attr < 32; attr++) {
1674       if (prog->info.patch_inputs_read & (1u << attr)) {
1675          GLuint slot = num_inputs++;
1676          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1677
1678          inputMapping[patch_attr] = slot;
1679          inputSlotToAttr[slot] = patch_attr;
1680          input_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1681          input_semantic_index[slot] = attr;
1682       }
1683    }
1684
1685    /* initialize output semantics to defaults */
1686    for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
1687       output_semantic_name[i] = TGSI_SEMANTIC_GENERIC;
1688       output_semantic_index[i] = 0;
1689    }
1690
1691    /*
1692     * Determine number of outputs, the (default) output register
1693     * mapping and the semantic information for each output.
1694     */
1695    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
1696       if (prog->info.outputs_written & BITFIELD64_BIT(attr)) {
1697          GLuint slot = num_outputs++;
1698
1699          outputMapping[attr] = slot;
1700
1701          unsigned semantic_name, semantic_index;
1702          tgsi_get_gl_varying_semantic(attr, st->needs_texcoord_semantic,
1703                                       &semantic_name, &semantic_index);
1704          output_semantic_name[slot] = semantic_name;
1705          output_semantic_index[slot] = semantic_index;
1706       }
1707    }
1708
1709    /* Also add patch outputs. */
1710    for (attr = 0; attr < 32; attr++) {
1711       if (prog->info.patch_outputs_written & (1u << attr)) {
1712          GLuint slot = num_outputs++;
1713          GLuint patch_attr = VARYING_SLOT_PATCH0 + attr;
1714
1715          outputMapping[patch_attr] = slot;
1716          output_semantic_name[slot] = TGSI_SEMANTIC_PATCH;
1717          output_semantic_index[slot] = attr;
1718       }
1719    }
1720
1721    st_translate_program(st->ctx,
1722                         stage,
1723                         ureg,
1724                         stp->glsl_to_tgsi,
1725                         prog,
1726                         /* inputs */
1727                         num_inputs,
1728                         inputMapping,
1729                         inputSlotToAttr,
1730                         input_semantic_name,
1731                         input_semantic_index,
1732                         NULL,
1733                         /* outputs */
1734                         num_outputs,
1735                         outputMapping,
1736                         output_semantic_name,
1737                         output_semantic_index);
1738
1739    stp->state.tokens = ureg_get_tokens(ureg, NULL);
1740
1741    ureg_destroy(ureg);
1742
1743    st_translate_stream_output_info(prog);
1744
1745    st_store_ir_in_disk_cache(st, prog, false);
1746
1747    if (ST_DEBUG & DEBUG_PRINT_IR && ST_DEBUG & DEBUG_MESA)
1748       _mesa_print_program(prog);
1749
1750    free_glsl_to_tgsi_visitor(stp->glsl_to_tgsi);
1751    stp->glsl_to_tgsi = NULL;
1752    return true;
1753 }
1754
1755
1756 /**
1757  * Get/create a basic program variant.
1758  */
1759 struct st_variant *
1760 st_get_common_variant(struct st_context *st,
1761                       struct st_program *prog,
1762                       const struct st_common_variant_key *key)
1763 {
1764    struct pipe_context *pipe = st->pipe;
1765    struct st_variant *v;
1766    struct pipe_shader_state state = {0};
1767
1768    /* Search for existing variant */
1769    for (v = prog->variants; v; v = v->next) {
1770       if (memcmp(&st_common_variant(v)->key, key, sizeof(*key)) == 0)
1771          break;
1772    }
1773
1774    if (!v) {
1775       /* create new */
1776       v = (struct st_variant*)CALLOC_STRUCT(st_common_variant);
1777       if (v) {
1778          if (prog->state.type == PIPE_SHADER_IR_NIR) {
1779             bool finalize = false;
1780
1781             state.type = PIPE_SHADER_IR_NIR;
1782             state.ir.nir = get_nir_shader(st, prog);
1783
1784             if (key->clamp_color) {
1785                NIR_PASS_V(state.ir.nir, nir_lower_clamp_color_outputs);
1786                finalize = true;
1787             }
1788
1789             state.stream_output = prog->state.stream_output;
1790
1791             if (finalize || !st->allow_st_finalize_nir_twice) {
1792                st_finalize_nir(st, &prog->Base, prog->shader_program,
1793                                state.ir.nir, true);
1794             }
1795
1796             if (ST_DEBUG & DEBUG_PRINT_IR)
1797                nir_print_shader(state.ir.nir, stderr);
1798          } else {
1799             if (key->lower_depth_clamp) {
1800                struct gl_program_parameter_list *params = prog->Base.Parameters;
1801
1802                unsigned depth_range_const =
1803                      _mesa_add_state_reference(params, depth_range_state);
1804
1805                const struct tgsi_token *tokens;
1806                tokens =
1807                      st_tgsi_lower_depth_clamp(prog->state.tokens,
1808                                                depth_range_const,
1809                                                key->clip_negative_one_to_one);
1810
1811                if (tokens != prog->state.tokens)
1812                   tgsi_free_tokens(prog->state.tokens);
1813
1814                prog->state.tokens = tokens;
1815             }
1816             state = prog->state;
1817
1818             if (ST_DEBUG & DEBUG_PRINT_IR)
1819                tgsi_dump(state.tokens, 0);
1820          }
1821          /* fill in new variant */
1822          switch (prog->Base.info.stage) {
1823          case MESA_SHADER_TESS_CTRL:
1824             v->driver_shader = pipe->create_tcs_state(pipe, &state);
1825             break;
1826          case MESA_SHADER_TESS_EVAL:
1827             v->driver_shader = pipe->create_tes_state(pipe, &state);
1828             break;
1829          case MESA_SHADER_GEOMETRY:
1830             v->driver_shader = pipe->create_gs_state(pipe, &state);
1831             break;
1832          case MESA_SHADER_COMPUTE: {
1833             struct pipe_compute_state cs = {0};
1834             cs.ir_type = state.type;
1835             cs.req_local_mem = prog->Base.info.cs.shared_size;
1836
1837             if (state.type == PIPE_SHADER_IR_NIR)
1838                cs.prog = state.ir.nir;
1839             else
1840                cs.prog = state.tokens;
1841
1842             v->driver_shader = pipe->create_compute_state(pipe, &cs);
1843             break;
1844          }
1845          default:
1846             assert(!"unhandled shader type");
1847             free(v);
1848             return NULL;
1849          }
1850
1851          st_common_variant(v)->key = *key;
1852          v->st = key->st;
1853
1854          /* insert into list */
1855          v->next = prog->variants;
1856          prog->variants = v;
1857       }
1858    }
1859
1860    return v;
1861 }
1862
1863
1864 /**
1865  * Vert/Geom/Frag programs have per-context variants.  Free all the
1866  * variants attached to the given program which match the given context.
1867  */
1868 static void
1869 destroy_program_variants(struct st_context *st, struct gl_program *target)
1870 {
1871    if (!target || target == &_mesa_DummyProgram)
1872       return;
1873
1874    struct st_program *p = st_program(target);
1875    struct st_variant *v, **prevPtr = &p->variants;
1876    bool unbound = false;
1877
1878    for (v = p->variants; v; ) {
1879       struct st_variant *next = v->next;
1880       if (v->st == st) {
1881          if (!unbound) {
1882             st_unbind_program(st, p);
1883             unbound = true;
1884          }
1885
1886          /* unlink from list */
1887          *prevPtr = next;
1888          /* destroy this variant */
1889          delete_variant(st, v, target->Target);
1890       }
1891       else {
1892          prevPtr = &v->next;
1893       }
1894       v = next;
1895    }
1896 }
1897
1898
1899 /**
1900  * Callback for _mesa_HashWalk.  Free all the shader's program variants
1901  * which match the given context.
1902  */
1903 static void
1904 destroy_shader_program_variants_cb(GLuint key, void *data, void *userData)
1905 {
1906    struct st_context *st = (struct st_context *) userData;
1907    struct gl_shader *shader = (struct gl_shader *) data;
1908
1909    switch (shader->Type) {
1910    case GL_SHADER_PROGRAM_MESA:
1911       {
1912          struct gl_shader_program *shProg = (struct gl_shader_program *) data;
1913          GLuint i;
1914
1915          for (i = 0; i < ARRAY_SIZE(shProg->_LinkedShaders); i++) {
1916             if (shProg->_LinkedShaders[i])
1917                destroy_program_variants(st, shProg->_LinkedShaders[i]->Program);
1918          }
1919       }
1920       break;
1921    case GL_VERTEX_SHADER:
1922    case GL_FRAGMENT_SHADER:
1923    case GL_GEOMETRY_SHADER:
1924    case GL_TESS_CONTROL_SHADER:
1925    case GL_TESS_EVALUATION_SHADER:
1926    case GL_COMPUTE_SHADER:
1927       break;
1928    default:
1929       assert(0);
1930    }
1931 }
1932
1933
1934 /**
1935  * Callback for _mesa_HashWalk.  Free all the program variants which match
1936  * the given context.
1937  */
1938 static void
1939 destroy_program_variants_cb(GLuint key, void *data, void *userData)
1940 {
1941    struct st_context *st = (struct st_context *) userData;
1942    struct gl_program *program = (struct gl_program *) data;
1943    destroy_program_variants(st, program);
1944 }
1945
1946
1947 /**
1948  * Walk over all shaders and programs to delete any variants which
1949  * belong to the given context.
1950  * This is called during context tear-down.
1951  */
1952 void
1953 st_destroy_program_variants(struct st_context *st)
1954 {
1955    /* If shaders can be shared with other contexts, the last context will
1956     * call DeleteProgram on all shaders, releasing everything.
1957     */
1958    if (st->has_shareable_shaders)
1959       return;
1960
1961    /* ARB vert/frag program */
1962    _mesa_HashWalk(st->ctx->Shared->Programs,
1963                   destroy_program_variants_cb, st);
1964
1965    /* GLSL vert/frag/geom shaders */
1966    _mesa_HashWalk(st->ctx->Shared->ShaderObjects,
1967                   destroy_shader_program_variants_cb, st);
1968 }
1969
1970
1971 /**
1972  * Compile one shader variant.
1973  */
1974 static void
1975 st_precompile_shader_variant(struct st_context *st,
1976                              struct gl_program *prog)
1977 {
1978    switch (prog->Target) {
1979    case GL_VERTEX_PROGRAM_ARB: {
1980       struct st_program *p = (struct st_program *)prog;
1981       struct st_common_variant_key key;
1982
1983       memset(&key, 0, sizeof(key));
1984
1985       key.st = st->has_shareable_shaders ? NULL : st;
1986       st_get_vp_variant(st, p, &key);
1987       break;
1988    }
1989
1990    case GL_FRAGMENT_PROGRAM_ARB: {
1991       struct st_program *p = (struct st_program *)prog;
1992       struct st_fp_variant_key key;
1993
1994       memset(&key, 0, sizeof(key));
1995
1996       key.st = st->has_shareable_shaders ? NULL : st;
1997       st_get_fp_variant(st, p, &key);
1998       break;
1999    }
2000
2001    case GL_TESS_CONTROL_PROGRAM_NV:
2002    case GL_TESS_EVALUATION_PROGRAM_NV:
2003    case GL_GEOMETRY_PROGRAM_NV:
2004    case GL_COMPUTE_PROGRAM_NV: {
2005       struct st_program *p = st_program(prog);
2006       struct st_common_variant_key key;
2007
2008       memset(&key, 0, sizeof(key));
2009
2010       key.st = st->has_shareable_shaders ? NULL : st;
2011       st_get_common_variant(st, p, &key);
2012       break;
2013    }
2014
2015    default:
2016       assert(0);
2017    }
2018 }
2019
2020 void
2021 st_serialize_nir(struct st_program *stp)
2022 {
2023    if (!stp->serialized_nir) {
2024       struct blob blob;
2025       size_t size;
2026
2027       blob_init(&blob);
2028       nir_serialize(&blob, stp->Base.nir, false);
2029       blob_finish_get_buffer(&blob, &stp->serialized_nir, &size);
2030       stp->serialized_nir_size = size;
2031    }
2032 }
2033
2034 void
2035 st_finalize_program(struct st_context *st, struct gl_program *prog)
2036 {
2037    if (st->current_program[prog->info.stage] == prog) {
2038       if (prog->info.stage == MESA_SHADER_VERTEX)
2039          st->dirty |= ST_NEW_VERTEX_PROGRAM(st, (struct st_program *)prog);
2040       else
2041          st->dirty |= ((struct st_program *)prog)->affected_states;
2042    }
2043
2044    if (prog->nir) {
2045       nir_sweep(prog->nir);
2046
2047       /* This is only needed for ARB_vp/fp programs and when the disk cache
2048        * is disabled. If the disk cache is enabled, GLSL programs are
2049        * serialized in write_nir_to_cache.
2050        */
2051       st_serialize_nir(st_program(prog));
2052    }
2053
2054    /* Create Gallium shaders now instead of on demand. */
2055    if (ST_DEBUG & DEBUG_PRECOMPILE ||
2056        st->shader_has_one_variant[prog->info.stage])
2057       st_precompile_shader_variant(st, prog);
2058 }