src/mesa/state_tracker/st_glsl_to_nir.cpp

   1 /*
   2  * Copyright © 2015 Red Hat
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  */
  23
  24 #include "st_nir.h"
  25
  26 #include "pipe/p_defines.h"
  27 #include "pipe/p_screen.h"
  28 #include "pipe/p_context.h"
  29
  30 #include "program/program.h"
  31 #include "program/prog_statevars.h"
  32 #include "program/prog_parameter.h"
  33 #include "program/ir_to_mesa.h"
  34 #include "main/mtypes.h"
  35 #include "main/errors.h"
  36 #include "main/shaderapi.h"
  37 #include "main/uniforms.h"
  38
  39 #include "st_context.h"
  40 #include "st_glsl_types.h"
  41 #include "st_program.h"
  42
  43 #include "compiler/nir/nir.h"
  44 #include "compiler/glsl_types.h"
  45 #include "compiler/glsl/glsl_to_nir.h"
  46 #include "compiler/glsl/gl_nir.h"
  47 #include "compiler/glsl/ir.h"
  48 #include "compiler/glsl/string_to_uint_map.h"
  49
  50
  51 static int
  52 type_size(const struct glsl_type *type)
  53 {
  54    return type->count_attribute_slots(false);
  55 }
  56
  57 /* Depending on PIPE_CAP_TGSI_TEXCOORD (st->needs_texcoord_semantic) we
  58  * may need to fix up varying slots so the glsl->nir path is aligned
  59  * with the anything->tgsi->nir path.
  60  */
  61 static void
  62 st_nir_fixup_varying_slots(struct st_context *st, struct exec_list *var_list)
  63 {
  64    if (st->needs_texcoord_semantic)
  65       return;
  66
  67    nir_foreach_variable(var, var_list) {
  68       if (var->data.location >= VARYING_SLOT_VAR0) {
  69          var->data.location += 9;
  70       } else if ((var->data.location >= VARYING_SLOT_TEX0) &&
  71                (var->data.location <= VARYING_SLOT_TEX7)) {
  72          var->data.location += VARYING_SLOT_VAR0 - VARYING_SLOT_TEX0;
  73       }
  74    }
  75 }
  76
  77 /* input location assignment for VS inputs must be handled specially, so
  78  * that it is aligned w/ st's vbo state.
  79  * (This isn't the case with, for ex, FS inputs, which only need to agree
  80  * on varying-slot w/ the VS outputs)
  81  */
  82 static void
  83 st_nir_assign_vs_in_locations(struct gl_program *prog, nir_shader *nir)
  84 {
  85    unsigned attr, num_inputs = 0;
  86    unsigned input_to_index[VERT_ATTRIB_MAX] = {0};
  87
  88    /* TODO de-duplicate w/ similar code in st_translate_vertex_program()? */
  89    for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
  90       if ((prog->info.inputs_read & BITFIELD64_BIT(attr)) != 0) {
  91          input_to_index[attr] = num_inputs;
  92          num_inputs++;
  93          if ((prog->info.vs.double_inputs_read & BITFIELD64_BIT(attr)) != 0) {
  94             /* add placeholder for second part of a double attribute */
  95             num_inputs++;
  96          }
  97       } else {
  98          input_to_index[attr] = ~0;
  99       }
 100    }
 101
 102    /* bit of a hack, mirroring st_translate_vertex_program */
 103    input_to_index[VERT_ATTRIB_EDGEFLAG] = num_inputs;
 104
 105    nir->num_inputs = 0;
 106    nir_foreach_variable_safe(var, &nir->inputs) {
 107       attr = var->data.location;
 108       assert(attr < ARRAY_SIZE(input_to_index));
 109
 110       if (input_to_index[attr] != ~0u) {
 111          var->data.driver_location = input_to_index[attr];
 112          nir->num_inputs++;
 113       } else {
 114          /* Move unused input variables to the globals list (with no
 115           * initialization), to avoid confusing drivers looking through the
 116           * inputs array and expecting to find inputs with a driver_location
 117           * set.
 118           */
 119          exec_node_remove(&var->node);
 120          var->data.mode = nir_var_global;
 121          exec_list_push_tail(&nir->globals, &var->node);
 122       }
 123    }
 124 }
 125
 126 static void
 127 st_nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
 128                             gl_shader_stage stage)
 129 {
 130    unsigned location = 0;
 131    unsigned assigned_locations[VARYING_SLOT_TESS_MAX];
 132    uint64_t processed_locs[2] = {0};
 133
 134    const int base = stage == MESA_SHADER_FRAGMENT ?
 135       (int) FRAG_RESULT_DATA0 : (int) VARYING_SLOT_VAR0;
 136
 137    int UNUSED last_loc = 0;
 138    nir_foreach_variable(var, var_list) {
 139
 140       const struct glsl_type *type = var->type;
 141       if (nir_is_per_vertex_io(var, stage)) {
 142          assert(glsl_type_is_array(type));
 143          type = glsl_get_array_element(type);
 144       }
 145
 146       unsigned var_size = type_size(type);
 147
 148       /* Builtins don't allow component packing so we only need to worry about
 149        * user defined varyings sharing the same location.
 150        */
 151       bool processed = false;
 152       if (var->data.location >= base) {
 153          unsigned glsl_location = var->data.location - base;
 154
 155          for (unsigned i = 0; i < var_size; i++) {
 156             if (processed_locs[var->data.index] &
 157                 ((uint64_t)1 << (glsl_location + i)))
 158                processed = true;
 159             else
 160                processed_locs[var->data.index] |=
 161                   ((uint64_t)1 << (glsl_location + i));
 162          }
 163       }
 164
 165       /* Because component packing allows varyings to share the same location
 166        * we may have already have processed this location.
 167        */
 168       if (processed) {
 169          unsigned driver_location = assigned_locations[var->data.location];
 170          var->data.driver_location = driver_location;
 171          *size += type_size(type);
 172
 173          /* An array may be packed such that is crosses multiple other arrays
 174           * or variables, we need to make sure we have allocated the elements
 175           * consecutively if the previously proccessed var was shorter than
 176           * the current array we are processing.
 177           *
 178           * NOTE: The code below assumes the var list is ordered in ascending
 179           * location order.
 180           */
 181          assert(last_loc <= var->data.location);
 182          last_loc = var->data.location;
 183          unsigned last_slot_location = driver_location + var_size;
 184          if (last_slot_location > location) {
 185             unsigned num_unallocated_slots = last_slot_location - location;
 186             unsigned first_unallocated_slot = var_size - num_unallocated_slots;
 187             for (unsigned i = first_unallocated_slot; i < num_unallocated_slots; i++) {
 188                assigned_locations[var->data.location + i] = location;
 189                location++;
 190             }
 191          }
 192          continue;
 193       }
 194
 195       for (unsigned i = 0; i < var_size; i++) {
 196          assigned_locations[var->data.location + i] = location + i;
 197       }
 198
 199       var->data.driver_location = location;
 200       location += var_size;
 201    }
 202
 203    *size += location;
 204 }
 205
 206 static int
 207 st_nir_lookup_parameter_index(const struct gl_program_parameter_list *params,
 208                               const char *name)
 209 {
 210    int loc = _mesa_lookup_parameter_index(params, name);
 211
 212    /* is there a better way to do this?  If we have something like:
 213     *
 214     *    struct S {
 215     *           float f;
 216     *           vec4 v;
 217     *    };
 218     *    uniform S color;
 219     *
 220     * Then what we get in prog->Parameters looks like:
 221     *
 222     *    0: Name=color.f, Type=6, DataType=1406, Size=1
 223     *    1: Name=color.v, Type=6, DataType=8b52, Size=4
 224     *
 225     * So the name doesn't match up and _mesa_lookup_parameter_index()
 226     * fails.  In this case just find the first matching "color.*"..
 227     *
 228     * Note for arrays you could end up w/ color[n].f, for example.
 229     *
 230     * glsl_to_tgsi works slightly differently in this regard.  It is
 231     * emitting something more low level, so it just translates the
 232     * params list 1:1 to CONST[] regs.  Going from GLSL IR to TGSI,
 233     * it just calculates the additional offset of struct field members
 234     * in glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) or
 235     * glsl_to_tgsi_visitor::visit(ir_dereference_array *ir).  It never
 236     * needs to work backwards to get base var loc from the param-list
 237     * which already has them separated out.
 238     */
 239    if (loc < 0) {
 240       int namelen = strlen(name);
 241       for (unsigned i = 0; i < params->NumParameters; i++) {
 242          struct gl_program_parameter *p = &params->Parameters[i];
 243          if ((strncmp(p->Name, name, namelen) == 0) &&
 244              ((p->Name[namelen] == '.') || (p->Name[namelen] == '['))) {
 245             loc = i;
 246             break;
 247          }
 248       }
 249    }
 250
 251    return loc;
 252 }
 253
 254 static void
 255 st_nir_assign_uniform_locations(struct gl_context *ctx,
 256                                 struct gl_program *prog,
 257                                 struct gl_shader_program *shader_program,
 258                                 struct exec_list *uniform_list, unsigned *size)
 259 {
 260    int max = 0;
 261    int shaderidx = 0;
 262    int imageidx = 0;
 263
 264    nir_foreach_variable(uniform, uniform_list) {
 265       int loc;
 266
 267       /*
 268        * UBO's have their own address spaces, so don't count them towards the
 269        * number of global uniforms
 270        */
 271       if ((uniform->data.mode == nir_var_uniform || uniform->data.mode == nir_var_shader_storage) &&
 272           uniform->interface_type != NULL)
 273          continue;
 274
 275       const struct glsl_type *type = glsl_without_array(uniform->type);
 276       if (!uniform->data.bindless && (type->is_sampler() || type->is_image())) {
 277          if (type->is_sampler()) {
 278             loc = shaderidx;
 279             shaderidx += type_size(uniform->type);
 280          } else {
 281             loc = imageidx;
 282             imageidx += type_size(uniform->type);
 283          }
 284       } else if (strncmp(uniform->name, "gl_", 3) == 0) {
 285          const gl_state_index16 *const stateTokens = uniform->state_slots[0].tokens;
 286          /* This state reference has already been setup by ir_to_mesa, but we'll
 287           * get the same index back here.
 288           */
 289
 290          unsigned comps;
 291          if (glsl_type_is_struct(type)) {
 292             comps = 4;
 293          } else {
 294             comps = glsl_get_vector_elements(type);
 295          }
 296
 297          if (ctx->Const.PackedDriverUniformStorage) {
 298             loc = _mesa_add_sized_state_reference(prog->Parameters,
 299                                                   stateTokens, comps, false);
 300             loc = prog->Parameters->ParameterValueOffset[loc];
 301          } else {
 302             loc = _mesa_add_state_reference(prog->Parameters, stateTokens);
 303          }
 304       } else {
 305          loc = st_nir_lookup_parameter_index(prog->Parameters, uniform->name);
 306
 307          if (ctx->Const.PackedDriverUniformStorage) {
 308             loc = prog->Parameters->ParameterValueOffset[loc];
 309          }
 310       }
 311
 312       uniform->data.driver_location = loc;
 313
 314       max = MAX2(max, loc + type_size(uniform->type));
 315    }
 316    *size = max;
 317 }
 318
 319 void
 320 st_nir_opts(nir_shader *nir, bool scalar)
 321 {
 322    bool progress;
 323    do {
 324       progress = false;
 325
 326       NIR_PASS_V(nir, nir_lower_vars_to_ssa);
 327
 328       if (scalar) {
 329          NIR_PASS_V(nir, nir_lower_alu_to_scalar);
 330          NIR_PASS_V(nir, nir_lower_phis_to_scalar);
 331       }
 332
 333       NIR_PASS_V(nir, nir_lower_alu);
 334       NIR_PASS_V(nir, nir_lower_pack);
 335       NIR_PASS(progress, nir, nir_copy_prop);
 336       NIR_PASS(progress, nir, nir_opt_remove_phis);
 337       NIR_PASS(progress, nir, nir_opt_dce);
 338       if (nir_opt_trivial_continues(nir)) {
 339          progress = true;
 340          NIR_PASS(progress, nir, nir_copy_prop);
 341          NIR_PASS(progress, nir, nir_opt_dce);
 342       }
 343       NIR_PASS(progress, nir, nir_opt_if);
 344       NIR_PASS(progress, nir, nir_opt_dead_cf);
 345       NIR_PASS(progress, nir, nir_opt_cse);
 346       NIR_PASS(progress, nir, nir_opt_peephole_select, 8);
 347
 348       NIR_PASS(progress, nir, nir_opt_algebraic);
 349       NIR_PASS(progress, nir, nir_opt_constant_folding);
 350
 351       NIR_PASS(progress, nir, nir_opt_undef);
 352       NIR_PASS(progress, nir, nir_opt_conditional_discard);
 353       if (nir->options->max_unroll_iterations) {
 354          NIR_PASS(progress, nir, nir_opt_loop_unroll, (nir_variable_mode)0);
 355       }
 356    } while (progress);
 357 }
 358
 359 /* First third of converting glsl_to_nir.. this leaves things in a pre-
 360  * nir_lower_io state, so that shader variants can more easily insert/
 361  * replace variables, etc.
 362  */
 363 static nir_shader *
 364 st_glsl_to_nir(struct st_context *st, struct gl_program *prog,
 365                struct gl_shader_program *shader_program,
 366                gl_shader_stage stage)
 367 {
 368    const nir_shader_compiler_options *options =
 369       st->ctx->Const.ShaderCompilerOptions[prog->info.stage].NirOptions;
 370    enum pipe_shader_type type = pipe_shader_type_from_mesa(stage);
 371    struct pipe_screen *screen = st->pipe->screen;
 372    bool is_scalar = screen->get_shader_param(screen, type, PIPE_SHADER_CAP_SCALAR_ISA);
 373    assert(options);
 374
 375    if (prog->nir)
 376       return prog->nir;
 377
 378    nir_shader *nir = glsl_to_nir(shader_program, stage, options);
 379    nir_lower_deref_instrs(nir, (nir_lower_deref_flags)~0);
 380
 381    /* Set the next shader stage hint for VS and TES. */
 382    if (!nir->info.separate_shader &&
 383        (nir->info.stage == MESA_SHADER_VERTEX ||
 384         nir->info.stage == MESA_SHADER_TESS_EVAL)) {
 385
 386       unsigned prev_stages = (1 << (prog->info.stage + 1)) - 1;
 387       unsigned stages_mask =
 388          ~prev_stages & shader_program->data->linked_stages;
 389
 390       nir->info.next_stage = stages_mask ?
 391          (gl_shader_stage) ffs(stages_mask) : MESA_SHADER_FRAGMENT;
 392    } else {
 393       nir->info.next_stage = MESA_SHADER_FRAGMENT;
 394    }
 395
 396    nir_variable_mode mask =
 397       (nir_variable_mode) (nir_var_shader_in | nir_var_shader_out);
 398    nir_remove_dead_variables(nir, mask);
 399
 400    if (options->lower_all_io_to_temps ||
 401        nir->info.stage == MESA_SHADER_VERTEX ||
 402        nir->info.stage == MESA_SHADER_GEOMETRY) {
 403       NIR_PASS_V(nir, nir_lower_io_to_temporaries,
 404                  nir_shader_get_entrypoint(nir),
 405                  true, true);
 406    } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
 407       NIR_PASS_V(nir, nir_lower_io_to_temporaries,
 408                  nir_shader_get_entrypoint(nir),
 409                  true, false);
 410    }
 411
 412    NIR_PASS_V(nir, nir_lower_global_vars_to_local);
 413    NIR_PASS_V(nir, nir_split_var_copies);
 414    NIR_PASS_V(nir, nir_lower_var_copies);
 415
 416    st_nir_opts(nir, is_scalar);
 417
 418    return nir;
 419 }
 420
 421 /* Second third of converting glsl_to_nir. This creates uniforms, gathers
 422  * info on varyings, etc after NIR link time opts have been applied.
 423  */
 424 static void
 425 st_glsl_to_nir_post_opts(struct st_context *st, struct gl_program *prog,
 426                          struct gl_shader_program *shader_program)
 427 {
 428    nir_shader *nir = prog->nir;
 429
 430    /* Make a pass over the IR to add state references for any built-in
 431     * uniforms that are used.  This has to be done now (during linking).
 432     * Code generation doesn't happen until the first time this shader is
 433     * used for rendering.  Waiting until then to generate the parameters is
 434     * too late.  At that point, the values for the built-in uniforms won't
 435     * get sent to the shader.
 436     */
 437    nir_foreach_variable(var, &nir->uniforms) {
 438       if (strncmp(var->name, "gl_", 3) == 0) {
 439          const nir_state_slot *const slots = var->state_slots;
 440          assert(var->state_slots != NULL);
 441
 442          const struct glsl_type *type = glsl_without_array(var->type);
 443          for (unsigned int i = 0; i < var->num_state_slots; i++) {
 444             unsigned comps;
 445             if (glsl_type_is_struct(type)) {
 446                /* Builtin struct require specical handling for now we just
 447                 * make all members vec4. See st_nir_lower_builtin.
 448                 */
 449                comps = 4;
 450             } else {
 451                comps = glsl_get_vector_elements(type);
 452             }
 453
 454             if (st->ctx->Const.PackedDriverUniformStorage) {
 455                _mesa_add_sized_state_reference(prog->Parameters,
 456                                                slots[i].tokens,
 457                                                comps, false);
 458             } else {
 459                _mesa_add_state_reference(prog->Parameters,
 460                                          slots[i].tokens);
 461             }
 462          }
 463       }
 464    }
 465
 466    /* Avoid reallocation of the program parameter list, because the uniform
 467     * storage is only associated with the original parameter list.
 468     * This should be enough for Bitmap and DrawPixels constants.
 469     */
 470    _mesa_reserve_parameter_storage(prog->Parameters, 8);
 471
 472    /* This has to be done last.  Any operation the can cause
 473     * prog->ParameterValues to get reallocated (e.g., anything that adds a
 474     * program constant) has to happen before creating this linkage.
 475     */
 476    _mesa_associate_uniform_storage(st->ctx, shader_program, prog, true);
 477
 478    st_set_prog_affected_state_flags(prog);
 479
 480    NIR_PASS_V(nir, st_nir_lower_builtin);
 481    NIR_PASS_V(nir, gl_nir_lower_atomics, shader_program, true);
 482
 483    if (st->ctx->_Shader->Flags & GLSL_DUMP) {
 484       _mesa_log("\n");
 485       _mesa_log("NIR IR for linked %s program %d:\n",
 486              _mesa_shader_stage_to_string(prog->info.stage),
 487              shader_program->Name);
 488       nir_print_shader(nir, _mesa_get_log_file());
 489       _mesa_log("\n\n");
 490    }
 491 }
 492
 493 /* TODO any better helper somewhere to sort a list? */
 494
 495 static void
 496 insert_sorted(struct exec_list *var_list, nir_variable *new_var)
 497 {
 498    nir_foreach_variable(var, var_list) {
 499       if (var->data.location > new_var->data.location) {
 500          exec_node_insert_node_before(&var->node, &new_var->node);
 501          return;
 502       }
 503    }
 504    exec_list_push_tail(var_list, &new_var->node);
 505 }
 506
 507 static void
 508 sort_varyings(struct exec_list *var_list)
 509 {
 510    struct exec_list new_list;
 511    exec_list_make_empty(&new_list);
 512    nir_foreach_variable_safe(var, var_list) {
 513       exec_node_remove(&var->node);
 514       insert_sorted(&new_list, var);
 515    }
 516    exec_list_move_nodes_to(&new_list, var_list);
 517 }
 518
 519 static void
 520 set_st_program(struct gl_program *prog,
 521                struct gl_shader_program *shader_program,
 522                nir_shader *nir)
 523 {
 524    struct st_vertex_program *stvp;
 525    struct st_common_program *stp;
 526    struct st_fragment_program *stfp;
 527    struct st_compute_program *stcp;
 528
 529    switch (prog->info.stage) {
 530    case MESA_SHADER_VERTEX:
 531       stvp = (struct st_vertex_program *)prog;
 532       stvp->shader_program = shader_program;
 533       stvp->tgsi.type = PIPE_SHADER_IR_NIR;
 534       stvp->tgsi.ir.nir = nir;
 535       break;
 536    case MESA_SHADER_GEOMETRY:
 537    case MESA_SHADER_TESS_CTRL:
 538    case MESA_SHADER_TESS_EVAL:
 539       stp = (struct st_common_program *)prog;
 540       stp->shader_program = shader_program;
 541       stp->tgsi.type = PIPE_SHADER_IR_NIR;
 542       stp->tgsi.ir.nir = nir;
 543       break;
 544    case MESA_SHADER_FRAGMENT:
 545       stfp = (struct st_fragment_program *)prog;
 546       stfp->shader_program = shader_program;
 547       stfp->tgsi.type = PIPE_SHADER_IR_NIR;
 548       stfp->tgsi.ir.nir = nir;
 549       break;
 550    case MESA_SHADER_COMPUTE:
 551       stcp = (struct st_compute_program *)prog;
 552       stcp->shader_program = shader_program;
 553       stcp->tgsi.ir_type = PIPE_SHADER_IR_NIR;
 554       stcp->tgsi.prog = nir;
 555       break;
 556    default:
 557       unreachable("unknown shader stage");
 558    }
 559 }
 560
 561 static void
 562 st_nir_get_mesa_program(struct gl_context *ctx,
 563                         struct gl_shader_program *shader_program,
 564                         struct gl_linked_shader *shader)
 565 {
 566    struct st_context *st = st_context(ctx);
 567    struct gl_program *prog;
 568
 569    validate_ir_tree(shader->ir);
 570
 571    prog = shader->Program;
 572
 573    prog->Parameters = _mesa_new_parameter_list();
 574
 575    _mesa_copy_linked_program_data(shader_program, shader);
 576    _mesa_generate_parameters_list_for_uniforms(ctx, shader_program, shader,
 577                                                prog->Parameters);
 578
 579    if (ctx->_Shader->Flags & GLSL_DUMP) {
 580       _mesa_log("\n");
 581       _mesa_log("GLSL IR for linked %s program %d:\n",
 582              _mesa_shader_stage_to_string(shader->Stage),
 583              shader_program->Name);
 584       _mesa_print_ir(_mesa_get_log_file(), shader->ir, NULL);
 585       _mesa_log("\n\n");
 586    }
 587
 588    prog->ExternalSamplersUsed = gl_external_samplers(prog);
 589    _mesa_update_shader_textures_used(shader_program, prog);
 590
 591    nir_shader *nir = st_glsl_to_nir(st, prog, shader_program, shader->Stage);
 592
 593    set_st_program(prog, shader_program, nir);
 594    prog->nir = nir;
 595 }
 596
 597 static void
 598 st_nir_link_shaders(nir_shader **producer, nir_shader **consumer, bool scalar)
 599 {
 600    nir_lower_io_arrays_to_elements(*producer, *consumer);
 601
 602    NIR_PASS_V(*producer, nir_remove_dead_variables, nir_var_shader_out);
 603    NIR_PASS_V(*consumer, nir_remove_dead_variables, nir_var_shader_in);
 604
 605    if (nir_remove_unused_varyings(*producer, *consumer)) {
 606       NIR_PASS_V(*producer, nir_lower_global_vars_to_local);
 607       NIR_PASS_V(*consumer, nir_lower_global_vars_to_local);
 608
 609       /* The backend might not be able to handle indirects on
 610        * temporaries so we need to lower indirects on any of the
 611        * varyings we have demoted here.
 612        *
 613        * TODO: radeonsi shouldn't need to do this, however LLVM isn't
 614        * currently smart enough to handle indirects without causing excess
 615        * spilling causing the gpu to hang.
 616        *
 617        * See the following thread for more details of the problem:
 618        * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html
 619        */
 620       nir_variable_mode indirect_mask = nir_var_local;
 621
 622       NIR_PASS_V(*producer, nir_lower_indirect_derefs, indirect_mask);
 623       NIR_PASS_V(*consumer, nir_lower_indirect_derefs, indirect_mask);
 624
 625       st_nir_opts(*producer, scalar);
 626       st_nir_opts(*consumer, scalar);
 627    }
 628 }
 629
 630 extern "C" {
 631
 632 bool
 633 st_link_nir(struct gl_context *ctx,
 634             struct gl_shader_program *shader_program)
 635 {
 636    struct st_context *st = st_context(ctx);
 637    struct pipe_screen *screen = st->pipe->screen;
 638    bool is_scalar[MESA_SHADER_STAGES];
 639
 640    /* Determine scalar property of each shader stage */
 641    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
 642       struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
 643       enum pipe_shader_type type;
 644
 645       if (shader == NULL)
 646          continue;
 647
 648       type = pipe_shader_type_from_mesa(shader->Stage);
 649       is_scalar[i] = screen->get_shader_param(screen, type, PIPE_SHADER_CAP_SCALAR_ISA);
 650    }
 651
 652    /* Determine first and last stage. */
 653    unsigned first = MESA_SHADER_STAGES;
 654    unsigned last = 0;
 655    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
 656       if (!shader_program->_LinkedShaders[i])
 657          continue;
 658       if (first == MESA_SHADER_STAGES)
 659          first = i;
 660       last = i;
 661    }
 662
 663    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
 664       struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
 665       if (shader == NULL)
 666          continue;
 667
 668       st_nir_get_mesa_program(ctx, shader_program, shader);
 669
 670       nir_variable_mode mask = (nir_variable_mode) 0;
 671       if (i != first)
 672          mask = (nir_variable_mode)(mask | nir_var_shader_in);
 673
 674       if (i != last)
 675          mask = (nir_variable_mode)(mask | nir_var_shader_out);
 676
 677       nir_shader *nir = shader->Program->nir;
 678       NIR_PASS_V(nir, nir_lower_io_to_scalar_early, mask);
 679       st_nir_opts(nir, is_scalar[i]);
 680    }
 681
 682    /* Linking the stages in the opposite order (from fragment to vertex)
 683     * ensures that inter-shader outputs written to in an earlier stage
 684     * are eliminated if they are (transitively) not used in a later
 685     * stage.
 686     */
 687    int next = last;
 688    for (int i = next - 1; i >= 0; i--) {
 689       struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
 690       if (shader == NULL)
 691          continue;
 692
 693       st_nir_link_shaders(&shader->Program->nir,
 694                           &shader_program->_LinkedShaders[next]->Program->nir,
 695                           is_scalar[i]);
 696       next = i;
 697    }
 698
 699    int prev = -1;
 700    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
 701       struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
 702       if (shader == NULL)
 703          continue;
 704
 705       nir_shader *nir = shader->Program->nir;
 706
 707       /* fragment shaders may need : */
 708       if (nir->info.stage == MESA_SHADER_FRAGMENT) {
 709          static const gl_state_index16 wposTransformState[STATE_LENGTH] = {
 710             STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM
 711          };
 712          nir_lower_wpos_ytransform_options wpos_options = { { 0 } };
 713          struct pipe_screen *pscreen = st->pipe->screen;
 714
 715          memcpy(wpos_options.state_tokens, wposTransformState,
 716                 sizeof(wpos_options.state_tokens));
 717          wpos_options.fs_coord_origin_upper_left =
 718             pscreen->get_param(pscreen,
 719                                PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
 720          wpos_options.fs_coord_origin_lower_left =
 721             pscreen->get_param(pscreen,
 722                                PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
 723          wpos_options.fs_coord_pixel_center_integer =
 724             pscreen->get_param(pscreen,
 725                                PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
 726          wpos_options.fs_coord_pixel_center_half_integer =
 727             pscreen->get_param(pscreen,
 728                                PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER);
 729
 730          if (nir_lower_wpos_ytransform(nir, &wpos_options)) {
 731             nir_validate_shader(nir);
 732             _mesa_add_state_reference(shader->Program->Parameters,
 733                                       wposTransformState);
 734          }
 735       }
 736
 737       NIR_PASS_V(nir, nir_lower_system_values);
 738
 739       nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
 740       shader->Program->info = nir->info;
 741
 742       if (prev != -1) {
 743          nir_compact_varyings(shader_program->_LinkedShaders[prev]->Program->nir,
 744                               nir, ctx->API != API_OPENGL_COMPAT);
 745       }
 746       prev = i;
 747    }
 748
 749    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
 750       struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
 751       if (shader == NULL)
 752          continue;
 753
 754       st_glsl_to_nir_post_opts(st, shader->Program, shader_program);
 755
 756       assert(shader->Program);
 757       if (!ctx->Driver.ProgramStringNotify(ctx,
 758                                            _mesa_shader_stage_to_program(i),
 759                                            shader->Program)) {
 760          _mesa_reference_program(ctx, &shader->Program, NULL);
 761          return false;
 762       }
 763    }
 764
 765    return true;
 766 }
 767
 768 /* Last third of preparing nir from glsl, which happens after shader
 769  * variant lowering.
 770  */
 771 void
 772 st_finalize_nir(struct st_context *st, struct gl_program *prog,
 773                 struct gl_shader_program *shader_program, nir_shader *nir)
 774 {
 775    struct pipe_screen *screen = st->pipe->screen;
 776    const nir_shader_compiler_options *options =
 777       st->ctx->Const.ShaderCompilerOptions[prog->info.stage].NirOptions;
 778
 779    NIR_PASS_V(nir, nir_split_var_copies);
 780    NIR_PASS_V(nir, nir_lower_var_copies);
 781    if (options->lower_all_io_to_temps ||
 782        nir->info.stage == MESA_SHADER_VERTEX ||
 783        nir->info.stage == MESA_SHADER_GEOMETRY) {
 784       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
 785    } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
 786       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
 787    }
 788
 789    if (nir->info.stage == MESA_SHADER_VERTEX) {
 790       /* Needs special handling so drvloc matches the vbo state: */
 791       st_nir_assign_vs_in_locations(prog, nir);
 792       /* Re-lower global vars, to deal with any dead VS inputs. */
 793       NIR_PASS_V(nir, nir_lower_global_vars_to_local);
 794
 795       sort_varyings(&nir->outputs);
 796       st_nir_assign_var_locations(&nir->outputs,
 797                                   &nir->num_outputs,
 798                                   nir->info.stage);
 799       st_nir_fixup_varying_slots(st, &nir->outputs);
 800    } else if (nir->info.stage == MESA_SHADER_GEOMETRY ||
 801               nir->info.stage == MESA_SHADER_TESS_CTRL ||
 802               nir->info.stage == MESA_SHADER_TESS_EVAL) {
 803       sort_varyings(&nir->inputs);
 804       st_nir_assign_var_locations(&nir->inputs,
 805                                   &nir->num_inputs,
 806                                   nir->info.stage);
 807       st_nir_fixup_varying_slots(st, &nir->inputs);
 808
 809       sort_varyings(&nir->outputs);
 810       st_nir_assign_var_locations(&nir->outputs,
 811                                   &nir->num_outputs,
 812                                   nir->info.stage);
 813       st_nir_fixup_varying_slots(st, &nir->outputs);
 814    } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
 815       sort_varyings(&nir->inputs);
 816       st_nir_assign_var_locations(&nir->inputs,
 817                                   &nir->num_inputs,
 818                                   nir->info.stage);
 819       st_nir_fixup_varying_slots(st, &nir->inputs);
 820       st_nir_assign_var_locations(&nir->outputs,
 821                                   &nir->num_outputs,
 822                                   nir->info.stage);
 823    } else if (nir->info.stage == MESA_SHADER_COMPUTE) {
 824        /* TODO? */
 825    } else {
 826       unreachable("invalid shader type for tgsi bypass\n");
 827    }
 828
 829    NIR_PASS_V(nir, nir_lower_atomics_to_ssbo,
 830          st->ctx->Const.Program[nir->info.stage].MaxAtomicBuffers);
 831
 832    st_nir_assign_uniform_locations(st->ctx, prog, shader_program,
 833                                    &nir->uniforms, &nir->num_uniforms);
 834
 835    if (st->ctx->Const.PackedDriverUniformStorage) {
 836       NIR_PASS_V(nir, nir_lower_io, nir_var_uniform, st_glsl_type_dword_size,
 837                  (nir_lower_io_options)0);
 838       NIR_PASS_V(nir, st_nir_lower_uniforms_to_ubo);
 839    }
 840
 841    if (screen->get_param(screen, PIPE_CAP_NIR_SAMPLERS_AS_DEREF))
 842       NIR_PASS_V(nir, gl_nir_lower_samplers_as_deref, shader_program);
 843    else
 844       NIR_PASS_V(nir, gl_nir_lower_samplers, shader_program);
 845 }
 846
 847 } /* extern "C" */