src/mesa/state_tracker/st_glsl_to_nir.cpp

   1 /*
   2  * Copyright © 2015 Red Hat
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  */
  23
  24 #include "st_nir.h"
  25
  26 #include "pipe/p_defines.h"
  27 #include "pipe/p_screen.h"
  28 #include "pipe/p_context.h"
  29
  30 #include "program/program.h"
  31 #include "program/prog_statevars.h"
  32 #include "program/prog_parameter.h"
  33 #include "program/ir_to_mesa.h"
  34 #include "main/mtypes.h"
  35 #include "main/errors.h"
  36 #include "main/shaderapi.h"
  37 #include "main/uniforms.h"
  38
  39 #include "st_context.h"
  40 #include "st_glsl_types.h"
  41 #include "st_program.h"
  42
  43 #include "compiler/nir/nir.h"
  44 #include "compiler/glsl_types.h"
  45 #include "compiler/glsl/glsl_to_nir.h"
  46 #include "compiler/glsl/gl_nir.h"
  47 #include "compiler/glsl/ir.h"
  48 #include "compiler/glsl/ir_optimization.h"
  49 #include "compiler/glsl/string_to_uint_map.h"
  50
  51
  52 static int
  53 type_size(const struct glsl_type *type)
  54 {
  55    return type->count_attribute_slots(false);
  56 }
  57
  58 /* Depending on PIPE_CAP_TGSI_TEXCOORD (st->needs_texcoord_semantic) we
  59  * may need to fix up varying slots so the glsl->nir path is aligned
  60  * with the anything->tgsi->nir path.
  61  */
  62 static void
  63 st_nir_fixup_varying_slots(struct st_context *st, struct exec_list *var_list)
  64 {
  65    if (st->needs_texcoord_semantic)
  66       return;
  67
  68    nir_foreach_variable(var, var_list) {
  69       if (var->data.location >= VARYING_SLOT_VAR0) {
  70          var->data.location += 9;
  71       } else if ((var->data.location >= VARYING_SLOT_TEX0) &&
  72                (var->data.location <= VARYING_SLOT_TEX7)) {
  73          var->data.location += VARYING_SLOT_VAR0 - VARYING_SLOT_TEX0;
  74       }
  75    }
  76 }
  77
  78 /* input location assignment for VS inputs must be handled specially, so
  79  * that it is aligned w/ st's vbo state.
  80  * (This isn't the case with, for ex, FS inputs, which only need to agree
  81  * on varying-slot w/ the VS outputs)
  82  */
  83 static void
  84 st_nir_assign_vs_in_locations(struct gl_program *prog, nir_shader *nir)
  85 {
  86    unsigned attr, num_inputs = 0;
  87    unsigned input_to_index[VERT_ATTRIB_MAX] = {0};
  88
  89    /* TODO de-duplicate w/ similar code in st_translate_vertex_program()? */
  90    for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
  91       if ((prog->info.inputs_read & BITFIELD64_BIT(attr)) != 0) {
  92          input_to_index[attr] = num_inputs;
  93          num_inputs++;
  94          if ((prog->info.vs.double_inputs_read & BITFIELD64_BIT(attr)) != 0) {
  95             /* add placeholder for second part of a double attribute */
  96             num_inputs++;
  97          }
  98       } else {
  99          input_to_index[attr] = ~0;
 100       }
 101    }
 102
 103    /* bit of a hack, mirroring st_translate_vertex_program */
 104    input_to_index[VERT_ATTRIB_EDGEFLAG] = num_inputs;
 105
 106    nir->num_inputs = 0;
 107    nir_foreach_variable_safe(var, &nir->inputs) {
 108       attr = var->data.location;
 109       assert(attr < ARRAY_SIZE(input_to_index));
 110
 111       if (input_to_index[attr] != ~0u) {
 112          var->data.driver_location = input_to_index[attr];
 113          nir->num_inputs++;
 114       } else {
 115          /* Move unused input variables to the globals list (with no
 116           * initialization), to avoid confusing drivers looking through the
 117           * inputs array and expecting to find inputs with a driver_location
 118           * set.
 119           */
 120          exec_node_remove(&var->node);
 121          var->data.mode = nir_var_global;
 122          exec_list_push_tail(&nir->globals, &var->node);
 123       }
 124    }
 125 }
 126
 127 static void
 128 st_nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
 129                             gl_shader_stage stage)
 130 {
 131    unsigned location = 0;
 132    unsigned assigned_locations[VARYING_SLOT_TESS_MAX];
 133    uint64_t processed_locs[2] = {0};
 134
 135    const int base = stage == MESA_SHADER_FRAGMENT ?
 136       (int) FRAG_RESULT_DATA0 : (int) VARYING_SLOT_VAR0;
 137
 138    int UNUSED last_loc = 0;
 139    nir_foreach_variable(var, var_list) {
 140
 141       const struct glsl_type *type = var->type;
 142       if (nir_is_per_vertex_io(var, stage)) {
 143          assert(glsl_type_is_array(type));
 144          type = glsl_get_array_element(type);
 145       }
 146
 147       unsigned var_size = type_size(type);
 148
 149       /* Builtins don't allow component packing so we only need to worry about
 150        * user defined varyings sharing the same location.
 151        */
 152       bool processed = false;
 153       if (var->data.location >= base) {
 154          unsigned glsl_location = var->data.location - base;
 155
 156          for (unsigned i = 0; i < var_size; i++) {
 157             if (processed_locs[var->data.index] &
 158                 ((uint64_t)1 << (glsl_location + i)))
 159                processed = true;
 160             else
 161                processed_locs[var->data.index] |=
 162                   ((uint64_t)1 << (glsl_location + i));
 163          }
 164       }
 165
 166       /* Because component packing allows varyings to share the same location
 167        * we may have already have processed this location.
 168        */
 169       if (processed) {
 170          unsigned driver_location = assigned_locations[var->data.location];
 171          var->data.driver_location = driver_location;
 172          *size += type_size(type);
 173
 174          /* An array may be packed such that is crosses multiple other arrays
 175           * or variables, we need to make sure we have allocated the elements
 176           * consecutively if the previously proccessed var was shorter than
 177           * the current array we are processing.
 178           *
 179           * NOTE: The code below assumes the var list is ordered in ascending
 180           * location order.
 181           */
 182          assert(last_loc <= var->data.location);
 183          last_loc = var->data.location;
 184          unsigned last_slot_location = driver_location + var_size;
 185          if (last_slot_location > location) {
 186             unsigned num_unallocated_slots = last_slot_location - location;
 187             unsigned first_unallocated_slot = var_size - num_unallocated_slots;
 188             for (unsigned i = first_unallocated_slot; i < num_unallocated_slots; i++) {
 189                assigned_locations[var->data.location + i] = location;
 190                location++;
 191             }
 192          }
 193          continue;
 194       }
 195
 196       for (unsigned i = 0; i < var_size; i++) {
 197          assigned_locations[var->data.location + i] = location + i;
 198       }
 199
 200       var->data.driver_location = location;
 201       location += var_size;
 202    }
 203
 204    *size += location;
 205 }
 206
 207 static int
 208 st_nir_lookup_parameter_index(const struct gl_program_parameter_list *params,
 209                               const char *name)
 210 {
 211    int loc = _mesa_lookup_parameter_index(params, name);
 212
 213    /* is there a better way to do this?  If we have something like:
 214     *
 215     *    struct S {
 216     *           float f;
 217     *           vec4 v;
 218     *    };
 219     *    uniform S color;
 220     *
 221     * Then what we get in prog->Parameters looks like:
 222     *
 223     *    0: Name=color.f, Type=6, DataType=1406, Size=1
 224     *    1: Name=color.v, Type=6, DataType=8b52, Size=4
 225     *
 226     * So the name doesn't match up and _mesa_lookup_parameter_index()
 227     * fails.  In this case just find the first matching "color.*"..
 228     *
 229     * Note for arrays you could end up w/ color[n].f, for example.
 230     *
 231     * glsl_to_tgsi works slightly differently in this regard.  It is
 232     * emitting something more low level, so it just translates the
 233     * params list 1:1 to CONST[] regs.  Going from GLSL IR to TGSI,
 234     * it just calculates the additional offset of struct field members
 235     * in glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) or
 236     * glsl_to_tgsi_visitor::visit(ir_dereference_array *ir).  It never
 237     * needs to work backwards to get base var loc from the param-list
 238     * which already has them separated out.
 239     */
 240    if (loc < 0) {
 241       int namelen = strlen(name);
 242       for (unsigned i = 0; i < params->NumParameters; i++) {
 243          struct gl_program_parameter *p = &params->Parameters[i];
 244          if ((strncmp(p->Name, name, namelen) == 0) &&
 245              ((p->Name[namelen] == '.') || (p->Name[namelen] == '['))) {
 246             loc = i;
 247             break;
 248          }
 249       }
 250    }
 251
 252    return loc;
 253 }
 254
 255 static void
 256 st_nir_assign_uniform_locations(struct gl_context *ctx,
 257                                 struct gl_program *prog,
 258                                 struct gl_shader_program *shader_program,
 259                                 struct exec_list *uniform_list, unsigned *size)
 260 {
 261    int max = 0;
 262    int shaderidx = 0;
 263    int imageidx = 0;
 264
 265    nir_foreach_variable(uniform, uniform_list) {
 266       int loc;
 267
 268       /*
 269        * UBO's have their own address spaces, so don't count them towards the
 270        * number of global uniforms
 271        */
 272       if ((uniform->data.mode == nir_var_uniform || uniform->data.mode == nir_var_shader_storage) &&
 273           uniform->interface_type != NULL)
 274          continue;
 275
 276       const struct glsl_type *type = glsl_without_array(uniform->type);
 277       if (!uniform->data.bindless && (type->is_sampler() || type->is_image())) {
 278          if (type->is_sampler()) {
 279             loc = shaderidx;
 280             shaderidx += type_size(uniform->type);
 281          } else {
 282             loc = imageidx;
 283             imageidx += type_size(uniform->type);
 284          }
 285       } else if (strncmp(uniform->name, "gl_", 3) == 0) {
 286          const gl_state_index16 *const stateTokens = uniform->state_slots[0].tokens;
 287          /* This state reference has already been setup by ir_to_mesa, but we'll
 288           * get the same index back here.
 289           */
 290
 291          unsigned comps;
 292          if (glsl_type_is_struct(type)) {
 293             comps = 4;
 294          } else {
 295             comps = glsl_get_vector_elements(type);
 296          }
 297
 298          if (ctx->Const.PackedDriverUniformStorage) {
 299             loc = _mesa_add_sized_state_reference(prog->Parameters,
 300                                                   stateTokens, comps, false);
 301             loc = prog->Parameters->ParameterValueOffset[loc];
 302          } else {
 303             loc = _mesa_add_state_reference(prog->Parameters, stateTokens);
 304          }
 305       } else {
 306          loc = st_nir_lookup_parameter_index(prog->Parameters, uniform->name);
 307
 308          if (ctx->Const.PackedDriverUniformStorage) {
 309             loc = prog->Parameters->ParameterValueOffset[loc];
 310          }
 311       }
 312
 313       uniform->data.driver_location = loc;
 314
 315       max = MAX2(max, loc + type_size(uniform->type));
 316    }
 317    *size = max;
 318 }
 319
 320 void
 321 st_nir_opts(nir_shader *nir, bool scalar)
 322 {
 323    bool progress;
 324    do {
 325       progress = false;
 326
 327       NIR_PASS_V(nir, nir_lower_vars_to_ssa);
 328
 329       if (scalar) {
 330          NIR_PASS_V(nir, nir_lower_alu_to_scalar);
 331          NIR_PASS_V(nir, nir_lower_phis_to_scalar);
 332       }
 333
 334       NIR_PASS_V(nir, nir_lower_alu);
 335       NIR_PASS_V(nir, nir_lower_pack);
 336       NIR_PASS(progress, nir, nir_copy_prop);
 337       NIR_PASS(progress, nir, nir_opt_remove_phis);
 338       NIR_PASS(progress, nir, nir_opt_dce);
 339       if (nir_opt_trivial_continues(nir)) {
 340          progress = true;
 341          NIR_PASS(progress, nir, nir_copy_prop);
 342          NIR_PASS(progress, nir, nir_opt_dce);
 343       }
 344       NIR_PASS(progress, nir, nir_opt_if);
 345       NIR_PASS(progress, nir, nir_opt_dead_cf);
 346       NIR_PASS(progress, nir, nir_opt_cse);
 347       NIR_PASS(progress, nir, nir_opt_peephole_select, 8);
 348
 349       NIR_PASS(progress, nir, nir_opt_algebraic);
 350       NIR_PASS(progress, nir, nir_opt_constant_folding);
 351
 352       NIR_PASS(progress, nir, nir_opt_undef);
 353       NIR_PASS(progress, nir, nir_opt_conditional_discard);
 354       if (nir->options->max_unroll_iterations) {
 355          NIR_PASS(progress, nir, nir_opt_loop_unroll, (nir_variable_mode)0);
 356       }
 357    } while (progress);
 358 }
 359
 360 /* First third of converting glsl_to_nir.. this leaves things in a pre-
 361  * nir_lower_io state, so that shader variants can more easily insert/
 362  * replace variables, etc.
 363  */
 364 static nir_shader *
 365 st_glsl_to_nir(struct st_context *st, struct gl_program *prog,
 366                struct gl_shader_program *shader_program,
 367                gl_shader_stage stage)
 368 {
 369    const nir_shader_compiler_options *options =
 370       st->ctx->Const.ShaderCompilerOptions[prog->info.stage].NirOptions;
 371    enum pipe_shader_type type = pipe_shader_type_from_mesa(stage);
 372    struct pipe_screen *screen = st->pipe->screen;
 373    bool is_scalar = screen->get_shader_param(screen, type, PIPE_SHADER_CAP_SCALAR_ISA);
 374    assert(options);
 375
 376    if (prog->nir)
 377       return prog->nir;
 378
 379    nir_shader *nir = glsl_to_nir(shader_program, stage, options);
 380
 381    /* Set the next shader stage hint for VS and TES. */
 382    if (!nir->info.separate_shader &&
 383        (nir->info.stage == MESA_SHADER_VERTEX ||
 384         nir->info.stage == MESA_SHADER_TESS_EVAL)) {
 385
 386       unsigned prev_stages = (1 << (prog->info.stage + 1)) - 1;
 387       unsigned stages_mask =
 388          ~prev_stages & shader_program->data->linked_stages;
 389
 390       nir->info.next_stage = stages_mask ?
 391          (gl_shader_stage) ffs(stages_mask) : MESA_SHADER_FRAGMENT;
 392    } else {
 393       nir->info.next_stage = MESA_SHADER_FRAGMENT;
 394    }
 395
 396    nir_variable_mode mask =
 397       (nir_variable_mode) (nir_var_shader_in | nir_var_shader_out);
 398    nir_remove_dead_variables(nir, mask);
 399
 400    if (options->lower_all_io_to_temps ||
 401        nir->info.stage == MESA_SHADER_VERTEX ||
 402        nir->info.stage == MESA_SHADER_GEOMETRY) {
 403       NIR_PASS_V(nir, nir_lower_io_to_temporaries,
 404                  nir_shader_get_entrypoint(nir),
 405                  true, true);
 406    } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
 407       NIR_PASS_V(nir, nir_lower_io_to_temporaries,
 408                  nir_shader_get_entrypoint(nir),
 409                  true, false);
 410    }
 411
 412    NIR_PASS_V(nir, nir_lower_global_vars_to_local);
 413    NIR_PASS_V(nir, nir_split_var_copies);
 414    NIR_PASS_V(nir, nir_lower_var_copies);
 415
 416    st_nir_opts(nir, is_scalar);
 417
 418    return nir;
 419 }
 420
 421 /* Second third of converting glsl_to_nir. This creates uniforms, gathers
 422  * info on varyings, etc after NIR link time opts have been applied.
 423  */
 424 static void
 425 st_glsl_to_nir_post_opts(struct st_context *st, struct gl_program *prog,
 426                          struct gl_shader_program *shader_program)
 427 {
 428    nir_shader *nir = prog->nir;
 429
 430    /* Make a pass over the IR to add state references for any built-in
 431     * uniforms that are used.  This has to be done now (during linking).
 432     * Code generation doesn't happen until the first time this shader is
 433     * used for rendering.  Waiting until then to generate the parameters is
 434     * too late.  At that point, the values for the built-in uniforms won't
 435     * get sent to the shader.
 436     */
 437    nir_foreach_variable(var, &nir->uniforms) {
 438       if (strncmp(var->name, "gl_", 3) == 0) {
 439          const nir_state_slot *const slots = var->state_slots;
 440          assert(var->state_slots != NULL);
 441
 442          const struct glsl_type *type = glsl_without_array(var->type);
 443          for (unsigned int i = 0; i < var->num_state_slots; i++) {
 444             unsigned comps;
 445             if (glsl_type_is_struct(type)) {
 446                /* Builtin struct require specical handling for now we just
 447                 * make all members vec4. See st_nir_lower_builtin.
 448                 */
 449                comps = 4;
 450             } else {
 451                comps = glsl_get_vector_elements(type);
 452             }
 453
 454             if (st->ctx->Const.PackedDriverUniformStorage) {
 455                _mesa_add_sized_state_reference(prog->Parameters,
 456                                                slots[i].tokens,
 457                                                comps, false);
 458             } else {
 459                _mesa_add_state_reference(prog->Parameters,
 460                                          slots[i].tokens);
 461             }
 462          }
 463       }
 464    }
 465
 466    /* Avoid reallocation of the program parameter list, because the uniform
 467     * storage is only associated with the original parameter list.
 468     * This should be enough for Bitmap and DrawPixels constants.
 469     */
 470    _mesa_reserve_parameter_storage(prog->Parameters, 8);
 471
 472    /* This has to be done last.  Any operation the can cause
 473     * prog->ParameterValues to get reallocated (e.g., anything that adds a
 474     * program constant) has to happen before creating this linkage.
 475     */
 476    _mesa_associate_uniform_storage(st->ctx, shader_program, prog, true);
 477
 478    st_set_prog_affected_state_flags(prog);
 479
 480    NIR_PASS_V(nir, st_nir_lower_builtin);
 481    NIR_PASS_V(nir, gl_nir_lower_atomics, shader_program, true);
 482
 483    if (st->ctx->_Shader->Flags & GLSL_DUMP) {
 484       _mesa_log("\n");
 485       _mesa_log("NIR IR for linked %s program %d:\n",
 486              _mesa_shader_stage_to_string(prog->info.stage),
 487              shader_program->Name);
 488       nir_print_shader(nir, _mesa_get_log_file());
 489       _mesa_log("\n\n");
 490    }
 491 }
 492
 493 /* TODO any better helper somewhere to sort a list? */
 494
 495 static void
 496 insert_sorted(struct exec_list *var_list, nir_variable *new_var)
 497 {
 498    nir_foreach_variable(var, var_list) {
 499       if (var->data.location > new_var->data.location) {
 500          exec_node_insert_node_before(&var->node, &new_var->node);
 501          return;
 502       }
 503    }
 504    exec_list_push_tail(var_list, &new_var->node);
 505 }
 506
 507 static void
 508 sort_varyings(struct exec_list *var_list)
 509 {
 510    struct exec_list new_list;
 511    exec_list_make_empty(&new_list);
 512    nir_foreach_variable_safe(var, var_list) {
 513       exec_node_remove(&var->node);
 514       insert_sorted(&new_list, var);
 515    }
 516    exec_list_move_nodes_to(&new_list, var_list);
 517 }
 518
 519 static void
 520 set_st_program(struct gl_program *prog,
 521                struct gl_shader_program *shader_program,
 522                nir_shader *nir)
 523 {
 524    struct st_vertex_program *stvp;
 525    struct st_common_program *stp;
 526    struct st_fragment_program *stfp;
 527    struct st_compute_program *stcp;
 528
 529    switch (prog->info.stage) {
 530    case MESA_SHADER_VERTEX:
 531       stvp = (struct st_vertex_program *)prog;
 532       stvp->shader_program = shader_program;
 533       stvp->tgsi.type = PIPE_SHADER_IR_NIR;
 534       stvp->tgsi.ir.nir = nir;
 535       break;
 536    case MESA_SHADER_GEOMETRY:
 537    case MESA_SHADER_TESS_CTRL:
 538    case MESA_SHADER_TESS_EVAL:
 539       stp = (struct st_common_program *)prog;
 540       stp->shader_program = shader_program;
 541       stp->tgsi.type = PIPE_SHADER_IR_NIR;
 542       stp->tgsi.ir.nir = nir;
 543       break;
 544    case MESA_SHADER_FRAGMENT:
 545       stfp = (struct st_fragment_program *)prog;
 546       stfp->shader_program = shader_program;
 547       stfp->tgsi.type = PIPE_SHADER_IR_NIR;
 548       stfp->tgsi.ir.nir = nir;
 549       break;
 550    case MESA_SHADER_COMPUTE:
 551       stcp = (struct st_compute_program *)prog;
 552       stcp->shader_program = shader_program;
 553       stcp->tgsi.ir_type = PIPE_SHADER_IR_NIR;
 554       stcp->tgsi.prog = nir;
 555       break;
 556    default:
 557       unreachable("unknown shader stage");
 558    }
 559 }
 560
 561 static void
 562 st_nir_get_mesa_program(struct gl_context *ctx,
 563                         struct gl_shader_program *shader_program,
 564                         struct gl_linked_shader *shader)
 565 {
 566    struct st_context *st = st_context(ctx);
 567    struct pipe_screen *pscreen = ctx->st->pipe->screen;
 568    struct gl_program *prog;
 569
 570    validate_ir_tree(shader->ir);
 571
 572    prog = shader->Program;
 573
 574    prog->Parameters = _mesa_new_parameter_list();
 575
 576    _mesa_copy_linked_program_data(shader_program, shader);
 577    _mesa_generate_parameters_list_for_uniforms(ctx, shader_program, shader,
 578                                                prog->Parameters);
 579
 580    /* Remove reads from output registers. */
 581    if (!pscreen->get_param(pscreen, PIPE_CAP_TGSI_CAN_READ_OUTPUTS))
 582       lower_output_reads(shader->Stage, shader->ir);
 583
 584    if (ctx->_Shader->Flags & GLSL_DUMP) {
 585       _mesa_log("\n");
 586       _mesa_log("GLSL IR for linked %s program %d:\n",
 587              _mesa_shader_stage_to_string(shader->Stage),
 588              shader_program->Name);
 589       _mesa_print_ir(_mesa_get_log_file(), shader->ir, NULL);
 590       _mesa_log("\n\n");
 591    }
 592
 593    prog->ExternalSamplersUsed = gl_external_samplers(prog);
 594    _mesa_update_shader_textures_used(shader_program, prog);
 595
 596    nir_shader *nir = st_glsl_to_nir(st, prog, shader_program, shader->Stage);
 597
 598    set_st_program(prog, shader_program, nir);
 599    prog->nir = nir;
 600 }
 601
 602 static void
 603 st_nir_link_shaders(nir_shader **producer, nir_shader **consumer, bool scalar)
 604 {
 605    nir_lower_io_arrays_to_elements(*producer, *consumer);
 606
 607    NIR_PASS_V(*producer, nir_remove_dead_variables, nir_var_shader_out);
 608    NIR_PASS_V(*consumer, nir_remove_dead_variables, nir_var_shader_in);
 609
 610    if (nir_remove_unused_varyings(*producer, *consumer)) {
 611       NIR_PASS_V(*producer, nir_lower_global_vars_to_local);
 612       NIR_PASS_V(*consumer, nir_lower_global_vars_to_local);
 613
 614       /* The backend might not be able to handle indirects on
 615        * temporaries so we need to lower indirects on any of the
 616        * varyings we have demoted here.
 617        *
 618        * TODO: radeonsi shouldn't need to do this, however LLVM isn't
 619        * currently smart enough to handle indirects without causing excess
 620        * spilling causing the gpu to hang.
 621        *
 622        * See the following thread for more details of the problem:
 623        * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html
 624        */
 625       nir_variable_mode indirect_mask = nir_var_local;
 626
 627       NIR_PASS_V(*producer, nir_lower_indirect_derefs, indirect_mask);
 628       NIR_PASS_V(*consumer, nir_lower_indirect_derefs, indirect_mask);
 629
 630       st_nir_opts(*producer, scalar);
 631       st_nir_opts(*consumer, scalar);
 632    }
 633 }
 634
 635 extern "C" {
 636
 637 bool
 638 st_link_nir(struct gl_context *ctx,
 639             struct gl_shader_program *shader_program)
 640 {
 641    struct st_context *st = st_context(ctx);
 642    struct pipe_screen *screen = st->pipe->screen;
 643    bool is_scalar[MESA_SHADER_STAGES];
 644
 645    /* Determine scalar property of each shader stage */
 646    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
 647       struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
 648       enum pipe_shader_type type;
 649
 650       if (shader == NULL)
 651          continue;
 652
 653       type = pipe_shader_type_from_mesa(shader->Stage);
 654       is_scalar[i] = screen->get_shader_param(screen, type, PIPE_SHADER_CAP_SCALAR_ISA);
 655    }
 656
 657    /* Determine first and last stage. */
 658    unsigned first = MESA_SHADER_STAGES;
 659    unsigned last = 0;
 660    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
 661       if (!shader_program->_LinkedShaders[i])
 662          continue;
 663       if (first == MESA_SHADER_STAGES)
 664          first = i;
 665       last = i;
 666    }
 667
 668    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
 669       struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
 670       if (shader == NULL)
 671          continue;
 672
 673       st_nir_get_mesa_program(ctx, shader_program, shader);
 674
 675       nir_variable_mode mask = (nir_variable_mode) 0;
 676       if (i != first)
 677          mask = (nir_variable_mode)(mask | nir_var_shader_in);
 678
 679       if (i != last)
 680          mask = (nir_variable_mode)(mask | nir_var_shader_out);
 681
 682       nir_shader *nir = shader->Program->nir;
 683       NIR_PASS_V(nir, nir_lower_io_to_scalar_early, mask);
 684       st_nir_opts(nir, is_scalar[i]);
 685    }
 686
 687    /* Linking the stages in the opposite order (from fragment to vertex)
 688     * ensures that inter-shader outputs written to in an earlier stage
 689     * are eliminated if they are (transitively) not used in a later
 690     * stage.
 691     */
 692    int next = last;
 693    for (int i = next - 1; i >= 0; i--) {
 694       struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
 695       if (shader == NULL)
 696          continue;
 697
 698       st_nir_link_shaders(&shader->Program->nir,
 699                           &shader_program->_LinkedShaders[next]->Program->nir,
 700                           is_scalar[i]);
 701       next = i;
 702    }
 703
 704    int prev = -1;
 705    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
 706       struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
 707       if (shader == NULL)
 708          continue;
 709
 710       nir_shader *nir = shader->Program->nir;
 711
 712       /* fragment shaders may need : */
 713       if (nir->info.stage == MESA_SHADER_FRAGMENT) {
 714          static const gl_state_index16 wposTransformState[STATE_LENGTH] = {
 715             STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM
 716          };
 717          nir_lower_wpos_ytransform_options wpos_options = { { 0 } };
 718          struct pipe_screen *pscreen = st->pipe->screen;
 719
 720          memcpy(wpos_options.state_tokens, wposTransformState,
 721                 sizeof(wpos_options.state_tokens));
 722          wpos_options.fs_coord_origin_upper_left =
 723             pscreen->get_param(pscreen,
 724                                PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
 725          wpos_options.fs_coord_origin_lower_left =
 726             pscreen->get_param(pscreen,
 727                                PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
 728          wpos_options.fs_coord_pixel_center_integer =
 729             pscreen->get_param(pscreen,
 730                                PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
 731          wpos_options.fs_coord_pixel_center_half_integer =
 732             pscreen->get_param(pscreen,
 733                                PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER);
 734
 735          if (nir_lower_wpos_ytransform(nir, &wpos_options)) {
 736             nir_validate_shader(nir);
 737             _mesa_add_state_reference(shader->Program->Parameters,
 738                                       wposTransformState);
 739          }
 740       }
 741
 742       NIR_PASS_V(nir, nir_lower_system_values);
 743
 744       nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
 745       shader->Program->info = nir->info;
 746
 747       if (prev != -1) {
 748          struct gl_program *prev_shader =
 749             shader_program->_LinkedShaders[prev]->Program;
 750
 751          /* We can't use nir_compact_varyings with transform feedback, since
 752           * the pipe_stream_output->output_register field is based on the
 753           * pre-compacted driver_locations.
 754           */
 755          if (!prev_shader->sh.LinkedTransformFeedback)
 756             nir_compact_varyings(shader_program->_LinkedShaders[prev]->Program->nir,
 757                               nir, ctx->API != API_OPENGL_COMPAT);
 758       }
 759       prev = i;
 760    }
 761
 762    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
 763       struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
 764       if (shader == NULL)
 765          continue;
 766
 767       st_glsl_to_nir_post_opts(st, shader->Program, shader_program);
 768
 769       assert(shader->Program);
 770       if (!ctx->Driver.ProgramStringNotify(ctx,
 771                                            _mesa_shader_stage_to_program(i),
 772                                            shader->Program)) {
 773          _mesa_reference_program(ctx, &shader->Program, NULL);
 774          return false;
 775       }
 776
 777       nir_sweep(shader->Program->nir);
 778    }
 779
 780    return true;
 781 }
 782
 783 /* Last third of preparing nir from glsl, which happens after shader
 784  * variant lowering.
 785  */
 786 void
 787 st_finalize_nir(struct st_context *st, struct gl_program *prog,
 788                 struct gl_shader_program *shader_program, nir_shader *nir)
 789 {
 790    struct pipe_screen *screen = st->pipe->screen;
 791    const nir_shader_compiler_options *options =
 792       st->ctx->Const.ShaderCompilerOptions[prog->info.stage].NirOptions;
 793
 794    NIR_PASS_V(nir, nir_split_var_copies);
 795    NIR_PASS_V(nir, nir_lower_var_copies);
 796    if (options->lower_all_io_to_temps ||
 797        nir->info.stage == MESA_SHADER_VERTEX ||
 798        nir->info.stage == MESA_SHADER_GEOMETRY) {
 799       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
 800    } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
 801       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
 802    }
 803
 804    if (nir->info.stage == MESA_SHADER_VERTEX) {
 805       /* Needs special handling so drvloc matches the vbo state: */
 806       st_nir_assign_vs_in_locations(prog, nir);
 807       /* Re-lower global vars, to deal with any dead VS inputs. */
 808       NIR_PASS_V(nir, nir_lower_global_vars_to_local);
 809
 810       sort_varyings(&nir->outputs);
 811       st_nir_assign_var_locations(&nir->outputs,
 812                                   &nir->num_outputs,
 813                                   nir->info.stage);
 814       st_nir_fixup_varying_slots(st, &nir->outputs);
 815    } else if (nir->info.stage == MESA_SHADER_GEOMETRY ||
 816               nir->info.stage == MESA_SHADER_TESS_CTRL ||
 817               nir->info.stage == MESA_SHADER_TESS_EVAL) {
 818       sort_varyings(&nir->inputs);
 819       st_nir_assign_var_locations(&nir->inputs,
 820                                   &nir->num_inputs,
 821                                   nir->info.stage);
 822       st_nir_fixup_varying_slots(st, &nir->inputs);
 823
 824       sort_varyings(&nir->outputs);
 825       st_nir_assign_var_locations(&nir->outputs,
 826                                   &nir->num_outputs,
 827                                   nir->info.stage);
 828       st_nir_fixup_varying_slots(st, &nir->outputs);
 829    } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
 830       sort_varyings(&nir->inputs);
 831       st_nir_assign_var_locations(&nir->inputs,
 832                                   &nir->num_inputs,
 833                                   nir->info.stage);
 834       st_nir_fixup_varying_slots(st, &nir->inputs);
 835       st_nir_assign_var_locations(&nir->outputs,
 836                                   &nir->num_outputs,
 837                                   nir->info.stage);
 838    } else if (nir->info.stage == MESA_SHADER_COMPUTE) {
 839        /* TODO? */
 840    } else {
 841       unreachable("invalid shader type for tgsi bypass\n");
 842    }
 843
 844    NIR_PASS_V(nir, nir_lower_atomics_to_ssbo,
 845          st->ctx->Const.Program[nir->info.stage].MaxAtomicBuffers);
 846
 847    st_nir_assign_uniform_locations(st->ctx, prog, shader_program,
 848                                    &nir->uniforms, &nir->num_uniforms);
 849
 850    if (st->ctx->Const.PackedDriverUniformStorage) {
 851       NIR_PASS_V(nir, nir_lower_io, nir_var_uniform, st_glsl_type_dword_size,
 852                  (nir_lower_io_options)0);
 853       NIR_PASS_V(nir, st_nir_lower_uniforms_to_ubo);
 854    }
 855
 856    if (screen->get_param(screen, PIPE_CAP_NIR_SAMPLERS_AS_DEREF))
 857       NIR_PASS_V(nir, gl_nir_lower_samplers_as_deref, shader_program);
 858    else
 859       NIR_PASS_V(nir, gl_nir_lower_samplers, shader_program);
 860 }
 861
 862 } /* extern "C" */