nir: Use the flrp lowering pass instead of nir_opt_algebraic

[mesa.git] / src / mesa / state_tracker / st_glsl_to_nir.cpp
diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp

index 303b36e9467ae58428f8a506aa57f0c624a88366..0a67d4532eba88302630128c0609a27d70d95bbf 100644 (file)
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -35,17 +35,25 @@
  #include "main/errors.h"
  #include "main/shaderapi.h"
  #include "main/uniforms.h"
-#include "util/string_to_uint_map.h"
  
+#include "main/shaderobj.h"
  #include "st_context.h"
-#include "st_program.h"
  #include "st_glsl_types.h"
+#include "st_program.h"
  
  #include "compiler/nir/nir.h"
  #include "compiler/glsl_types.h"
  #include "compiler/glsl/glsl_to_nir.h"
+#include "compiler/glsl/gl_nir.h"
  #include "compiler/glsl/ir.h"
+#include "compiler/glsl/ir_optimization.h"
+#include "compiler/glsl/string_to_uint_map.h"
  
+static int
+type_size(const struct glsl_type *type)
+{
+   return type->count_attribute_slots(false);
+}
  
  /* Depending on PIPE_CAP_TGSI_TEXCOORD (st->needs_texcoord_semantic) we
   * may need to fix up varying slots so the glsl->nir path is aligned
@@ -73,32 +81,20 @@ st_nir_fixup_varying_slots(struct st_context *st, struct exec_list *var_list)
   * on varying-slot w/ the VS outputs)
   */
  static void
-st_nir_assign_vs_in_locations(struct gl_program *prog, nir_shader *nir)
+st_nir_assign_vs_in_locations(nir_shader *nir)
  {
-   unsigned attr, num_inputs = 0;
-   unsigned input_to_index[VERT_ATTRIB_MAX] = {0};
-
-   /* TODO de-duplicate w/ similar code in st_translate_vertex_program()? */
-   for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
-      if ((prog->InputsRead & BITFIELD64_BIT(attr)) != 0) {
-         input_to_index[attr] = num_inputs;
-         num_inputs++;
-         if ((prog->DoubleInputsRead & BITFIELD64_BIT(attr)) != 0) {
-            /* add placeholder for second part of a double attribute */
-            num_inputs++;
-         }
-      } else {
-         input_to_index[attr] = ~0;
-      }
-   }
-
     nir->num_inputs = 0;
     nir_foreach_variable_safe(var, &nir->inputs) {
-      attr = var->data.location;
-      assert(attr < ARRAY_SIZE(input_to_index));
-
-      if (input_to_index[attr] != ~0u) {
-         var->data.driver_location = input_to_index[attr];
+      /* NIR already assigns dual-slot inputs to two locations so all we have
+       * to do is compact everything down.
+       */
+      if (var->data.location == VERT_ATTRIB_EDGEFLAG) {
+         /* bit of a hack, mirroring st_translate_vertex_program */
+         var->data.driver_location = util_bitcount64(nir->info.inputs_read);
+      } else if (nir->info.inputs_read & BITFIELD64_BIT(var->data.location)) {
+         var->data.driver_location =
+            util_bitcount64(nir->info.inputs_read &
+                              BITFIELD64_MASK(var->data.location));
           nir->num_inputs++;
        } else {
           /* Move unused input variables to the globals list (with no
@@ -107,12 +103,92 @@ st_nir_assign_vs_in_locations(struct gl_program *prog, nir_shader *nir)
            * set.
            */
           exec_node_remove(&var->node);
-         var->data.mode = nir_var_global;
+         var->data.mode = nir_var_shader_temp;
           exec_list_push_tail(&nir->globals, &var->node);
        }
     }
  }
  
+static void
+st_nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
+                            gl_shader_stage stage)
+{
+   unsigned location = 0;
+   unsigned assigned_locations[VARYING_SLOT_TESS_MAX];
+   uint64_t processed_locs[2] = {0};
+
+   const int base = stage == MESA_SHADER_FRAGMENT ?
+      (int) FRAG_RESULT_DATA0 : (int) VARYING_SLOT_VAR0;
+
+   int UNUSED last_loc = 0;
+   nir_foreach_variable(var, var_list) {
+
+      const struct glsl_type *type = var->type;
+      if (nir_is_per_vertex_io(var, stage)) {
+         assert(glsl_type_is_array(type));
+         type = glsl_get_array_element(type);
+      }
+
+      unsigned var_size = type_size(type);
+
+      /* Builtins don't allow component packing so we only need to worry about
+       * user defined varyings sharing the same location.
+       */
+      bool processed = false;
+      if (var->data.location >= base) {
+         unsigned glsl_location = var->data.location - base;
+
+         for (unsigned i = 0; i < var_size; i++) {
+            if (processed_locs[var->data.index] &
+                ((uint64_t)1 << (glsl_location + i)))
+               processed = true;
+            else
+               processed_locs[var->data.index] |=
+                  ((uint64_t)1 << (glsl_location + i));
+         }
+      }
+
+      /* Because component packing allows varyings to share the same location
+       * we may have already have processed this location.
+       */
+      if (processed) {
+         unsigned driver_location = assigned_locations[var->data.location];
+         var->data.driver_location = driver_location;
+         *size += type_size(type);
+
+         /* An array may be packed such that is crosses multiple other arrays
+          * or variables, we need to make sure we have allocated the elements
+          * consecutively if the previously proccessed var was shorter than
+          * the current array we are processing.
+          *
+          * NOTE: The code below assumes the var list is ordered in ascending
+          * location order.
+          */
+         assert(last_loc <= var->data.location);
+         last_loc = var->data.location;
+         unsigned last_slot_location = driver_location + var_size;
+         if (last_slot_location > location) {
+            unsigned num_unallocated_slots = last_slot_location - location;
+            unsigned first_unallocated_slot = var_size - num_unallocated_slots;
+            for (unsigned i = first_unallocated_slot; i < num_unallocated_slots; i++) {
+               assigned_locations[var->data.location + i] = location;
+               location++;
+            }
+         }
+         continue;
+      }
+
+      for (unsigned i = 0; i < var_size; i++) {
+         assigned_locations[var->data.location + i] = location + i;
+      }
+
+      var->data.driver_location = location;
+      location += var_size;
+   }
+
+   *size += location;
+}
+
  static int
  st_nir_lookup_parameter_index(const struct gl_program_parameter_list *params,
                                const char *name)
@@ -162,12 +238,12 @@ st_nir_lookup_parameter_index(const struct gl_program_parameter_list *params,
  }
  
  static void
-st_nir_assign_uniform_locations(struct gl_program *prog,
-                                struct gl_shader_program *shader_program,
-                                struct exec_list *uniform_list, unsigned *size)
+st_nir_assign_uniform_locations(struct gl_context *ctx,
+                                struct gl_program *prog,
+                                struct exec_list *uniform_list)
  {
-   int max = 0;
     int shaderidx = 0;
+   int imageidx = 0;
  
     nir_foreach_variable(uniform, uniform_list) {
        int loc;
@@ -176,107 +252,297 @@ st_nir_assign_uniform_locations(struct gl_program *prog,
         * UBO's have their own address spaces, so don't count them towards the
         * number of global uniforms
         */
-      if ((uniform->data.mode == nir_var_uniform || uniform->data.mode == nir_var_shader_storage) &&
-          uniform->interface_type != NULL)
+      if (uniform->data.mode == nir_var_mem_ubo || uniform->data.mode == nir_var_mem_ssbo)
           continue;
  
-      if (uniform->type->is_sampler()) {
-         unsigned val;
-         bool found = shader_program->UniformHash->get(val, uniform->name);
-         loc = shaderidx++;
-         assert(found);
-         /* this ensure that nir_lower_samplers looks at the correct
-          * shader_program->UniformStorage[location]:
-          */
-         uniform->data.location = val;
+      const struct glsl_type *type = glsl_without_array(uniform->type);
+      if (!uniform->data.bindless && (type->is_sampler() || type->is_image())) {
+         if (type->is_sampler()) {
+            loc = shaderidx;
+            shaderidx += type_size(uniform->type);
+         } else {
+            loc = imageidx;
+            imageidx += type_size(uniform->type);
+         }
        } else if (strncmp(uniform->name, "gl_", 3) == 0) {
-         const gl_state_index *const stateTokens = (gl_state_index *)uniform->state_slots[0].tokens;
+         const gl_state_index16 *const stateTokens = uniform->state_slots[0].tokens;
           /* This state reference has already been setup by ir_to_mesa, but we'll
            * get the same index back here.
            */
-         loc = _mesa_add_state_reference(prog->Parameters, stateTokens);
+
+         unsigned comps;
+         if (glsl_type_is_struct_or_ifc(type)) {
+            comps = 4;
+         } else {
+            comps = glsl_get_vector_elements(type);
+         }
+
+         if (ctx->Const.PackedDriverUniformStorage) {
+            loc = _mesa_add_sized_state_reference(prog->Parameters,
+                                                  stateTokens, comps, false);
+            loc = prog->Parameters->ParameterValueOffset[loc];
+         } else {
+            loc = _mesa_add_state_reference(prog->Parameters, stateTokens);
+         }
        } else {
           loc = st_nir_lookup_parameter_index(prog->Parameters, uniform->name);
+
+         /* We need to check that loc is not -1 here before accessing the
+          * array. It can be negative for example when we have a struct that
+          * only contains opaque types.
+          */
+         if (loc >= 0 && ctx->Const.PackedDriverUniformStorage) {
+            loc = prog->Parameters->ParameterValueOffset[loc];
+         }
        }
  
        uniform->data.driver_location = loc;
-
-      max = MAX2(max, loc + st_glsl_type_size(uniform->type));
     }
-   *size = max;
  }
  
-extern "C" {
+void
+st_nir_opts(nir_shader *nir, bool scalar)
+{
+   bool progress;
+   unsigned lower_flrp =
+      (nir->options->lower_flrp16 ? 16 : 0) |
+      (nir->options->lower_flrp32 ? 32 : 0) |
+      (nir->options->lower_flrp64 ? 64 : 0);
+
+   do {
+      progress = false;
+
+      NIR_PASS_V(nir, nir_lower_vars_to_ssa);
+
+      if (scalar) {
+         NIR_PASS_V(nir, nir_lower_alu_to_scalar);
+         NIR_PASS_V(nir, nir_lower_phis_to_scalar);
+      }
+
+      NIR_PASS_V(nir, nir_lower_alu);
+      NIR_PASS_V(nir, nir_lower_pack);
+      NIR_PASS(progress, nir, nir_copy_prop);
+      NIR_PASS(progress, nir, nir_opt_remove_phis);
+      NIR_PASS(progress, nir, nir_opt_dce);
+      if (nir_opt_trivial_continues(nir)) {
+         progress = true;
+         NIR_PASS(progress, nir, nir_copy_prop);
+         NIR_PASS(progress, nir, nir_opt_dce);
+      }
+      NIR_PASS(progress, nir, nir_opt_if, false);
+      NIR_PASS(progress, nir, nir_opt_dead_cf);
+      NIR_PASS(progress, nir, nir_opt_cse);
+      NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
+
+      NIR_PASS(progress, nir, nir_opt_algebraic);
+      NIR_PASS(progress, nir, nir_opt_constant_folding);
+
+      if (lower_flrp != 0) {
+         bool lower_flrp_progress;
+
+         NIR_PASS(lower_flrp_progress, nir, nir_lower_flrp,
+                  lower_flrp,
+                  false /* always_precise */,
+                  nir->options->lower_ffma);
+         if (lower_flrp_progress) {
+            NIR_PASS(progress, nir,
+                     nir_opt_constant_folding);
+            progress = true;
+         }
+
+         /* Nothing should rematerialize any flrps, so we only need to do this
+          * lowering once.
+          */
+         lower_flrp = 0;
+      }
  
-/* First half of converting glsl_to_nir.. this leaves things in a pre-
+      NIR_PASS(progress, nir, nir_opt_undef);
+      NIR_PASS(progress, nir, nir_opt_conditional_discard);
+      if (nir->options->max_unroll_iterations) {
+         NIR_PASS(progress, nir, nir_opt_loop_unroll, (nir_variable_mode)0);
+      }
+   } while (progress);
+}
+
+/* First third of converting glsl_to_nir.. this leaves things in a pre-
   * nir_lower_io state, so that shader variants can more easily insert/
   * replace variables, etc.
   */
-nir_shader *
+static nir_shader *
  st_glsl_to_nir(struct st_context *st, struct gl_program *prog,
                 struct gl_shader_program *shader_program,
                 gl_shader_stage stage)
  {
-   struct pipe_screen *pscreen = st->pipe->screen;
-   enum pipe_shader_type ptarget = st_shader_stage_to_ptarget(stage);
-   const nir_shader_compiler_options *options;
-   nir_shader *nir;
-
-   assert(pscreen->get_compiler_options);   /* drivers using NIR must implement this */
-
-   options = (const nir_shader_compiler_options *)
-      pscreen->get_compiler_options(pscreen, PIPE_SHADER_IR_NIR, ptarget);
+   const nir_shader_compiler_options *options =
+      st->ctx->Const.ShaderCompilerOptions[prog->info.stage].NirOptions;
+   enum pipe_shader_type type = pipe_shader_type_from_mesa(stage);
+   struct pipe_screen *screen = st->pipe->screen;
+   bool is_scalar = screen->get_shader_param(screen, type, PIPE_SHADER_CAP_SCALAR_ISA);
     assert(options);
+   bool lower_64bit =
+      options->lower_int64_options || options->lower_doubles_options;
  
     if (prog->nir)
        return prog->nir;
  
-   nir = glsl_to_nir(shader_program, stage, options);
-   prog->nir = nir;
+   nir_shader *nir = glsl_to_nir(st->ctx, shader_program, stage, options);
+
+   /* Set the next shader stage hint for VS and TES. */
+   if (!nir->info.separate_shader &&
+       (nir->info.stage == MESA_SHADER_VERTEX ||
+        nir->info.stage == MESA_SHADER_TESS_EVAL)) {
+
+      unsigned prev_stages = (1 << (prog->info.stage + 1)) - 1;
+      unsigned stages_mask =
+         ~prev_stages & shader_program->data->linked_stages;
+
+      nir->info.next_stage = stages_mask ?
+         (gl_shader_stage) u_bit_scan(&stages_mask) : MESA_SHADER_FRAGMENT;
+   } else {
+      nir->info.next_stage = MESA_SHADER_FRAGMENT;
+   }
+
+   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+   nir_shader *softfp64 = NULL;
+   if (nir->info.uses_64bit &&
+       (options->lower_doubles_options & nir_lower_fp64_full_software) != 0) {
+      softfp64 = glsl_float64_funcs_to_nir(st->ctx, options);
+      ralloc_steal(ralloc_parent(nir), softfp64);
+   }
+
+   nir_variable_mode mask =
+      (nir_variable_mode) (nir_var_shader_in | nir_var_shader_out);
+   nir_remove_dead_variables(nir, mask);
+
+   if (options->lower_all_io_to_temps ||
+       nir->info.stage == MESA_SHADER_VERTEX ||
+       nir->info.stage == MESA_SHADER_GEOMETRY) {
+      NIR_PASS_V(nir, nir_lower_io_to_temporaries,
+                 nir_shader_get_entrypoint(nir),
+                 true, true);
+   } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+      NIR_PASS_V(nir, nir_lower_io_to_temporaries,
+                 nir_shader_get_entrypoint(nir),
+                 true, false);
+   }
  
-   NIR_PASS_V(nir, nir_lower_io_to_temporaries,
-         nir_shader_get_entrypoint(nir),
-         true, true);
     NIR_PASS_V(nir, nir_lower_global_vars_to_local);
     NIR_PASS_V(nir, nir_split_var_copies);
     NIR_PASS_V(nir, nir_lower_var_copies);
-   NIR_PASS_V(nir, st_nir_lower_builtin);
  
-   /* fragment shaders may need : */
-   if (stage == MESA_SHADER_FRAGMENT) {
-      static const gl_state_index wposTransformState[STATE_LENGTH] = {
-         STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM
-      };
-      nir_lower_wpos_ytransform_options wpos_options = {0};
-      struct pipe_screen *pscreen = st->pipe->screen;
-
-      memcpy(wpos_options.state_tokens, wposTransformState,
-             sizeof(wpos_options.state_tokens));
-      wpos_options.fs_coord_origin_upper_left =
-         pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
-      wpos_options.fs_coord_origin_lower_left =
-         pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
-      wpos_options.fs_coord_pixel_center_integer =
-         pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
-      wpos_options.fs_coord_pixel_center_half_integer =
-         pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER);
-
-      if (nir_lower_wpos_ytransform(nir, &wpos_options)) {
-         nir_validate_shader(nir);
-         _mesa_add_state_reference(prog->Parameters, wposTransformState);
+   if (is_scalar) {
+     NIR_PASS_V(nir, nir_lower_alu_to_scalar);
+   }
+
+   /* before buffers and vars_to_ssa */
+   NIR_PASS_V(nir, gl_nir_lower_bindless_images);
+   st_nir_opts(nir, is_scalar);
+
+   NIR_PASS_V(nir, gl_nir_lower_buffers, shader_program);
+   /* Do a round of constant folding to clean up address calculations */
+   NIR_PASS_V(nir, nir_opt_constant_folding);
+
+   if (lower_64bit) {
+      bool lowered_64bit_ops = false;
+      bool progress = false;
+
+      NIR_PASS_V(nir, nir_opt_algebraic);
+
+      do {
+         progress = false;
+         if (options->lower_int64_options) {
+            NIR_PASS(progress, nir, nir_lower_int64,
+                     options->lower_int64_options);
+         }
+         if (options->lower_doubles_options) {
+            NIR_PASS(progress, nir, nir_lower_doubles,
+                     softfp64, options->lower_doubles_options);
+         }
+         NIR_PASS(progress, nir, nir_opt_algebraic);
+         lowered_64bit_ops |= progress;
+      } while (progress);
+
+      if (lowered_64bit_ops)
+         st_nir_opts(nir, is_scalar);
+   }
+
+   return nir;
+}
+
+/* Second third of converting glsl_to_nir. This creates uniforms, gathers
+ * info on varyings, etc after NIR link time opts have been applied.
+ */
+static void
+st_glsl_to_nir_post_opts(struct st_context *st, struct gl_program *prog,
+                         struct gl_shader_program *shader_program)
+{
+   nir_shader *nir = prog->nir;
+
+   /* Make a pass over the IR to add state references for any built-in
+    * uniforms that are used.  This has to be done now (during linking).
+    * Code generation doesn't happen until the first time this shader is
+    * used for rendering.  Waiting until then to generate the parameters is
+    * too late.  At that point, the values for the built-in uniforms won't
+    * get sent to the shader.
+    */
+   nir_foreach_variable(var, &nir->uniforms) {
+      if (strncmp(var->name, "gl_", 3) == 0) {
+         const nir_state_slot *const slots = var->state_slots;
+         assert(var->state_slots != NULL);
+
+         const struct glsl_type *type = glsl_without_array(var->type);
+         for (unsigned int i = 0; i < var->num_state_slots; i++) {
+            unsigned comps;
+            if (glsl_type_is_struct_or_ifc(type)) {
+               /* Builtin struct require specical handling for now we just
+                * make all members vec4. See st_nir_lower_builtin.
+                */
+               comps = 4;
+            } else {
+               comps = glsl_get_vector_elements(type);
+            }
+
+            if (st->ctx->Const.PackedDriverUniformStorage) {
+               _mesa_add_sized_state_reference(prog->Parameters,
+                                               slots[i].tokens,
+                                               comps, false);
+            } else {
+               _mesa_add_state_reference(prog->Parameters,
+                                         slots[i].tokens);
+            }
+         }
        }
     }
  
+   /* Avoid reallocation of the program parameter list, because the uniform
+    * storage is only associated with the original parameter list.
+    * This should be enough for Bitmap and DrawPixels constants.
+    */
+   _mesa_reserve_parameter_storage(prog->Parameters, 8);
+
+   /* This has to be done last.  Any operation the can cause
+    * prog->ParameterValues to get reallocated (e.g., anything that adds a
+    * program constant) has to happen before creating this linkage.
+    */
+   _mesa_associate_uniform_storage(st->ctx, shader_program, prog, true);
+
+   st_set_prog_affected_state_flags(prog);
+
+   NIR_PASS_V(nir, st_nir_lower_builtin);
+   NIR_PASS_V(nir, gl_nir_lower_atomics, shader_program, true);
+   NIR_PASS_V(nir, nir_opt_intrinsics);
+
+   nir_variable_mode mask = nir_var_function_temp;
+   nir_remove_dead_variables(nir, mask);
+
     if (st->ctx->_Shader->Flags & GLSL_DUMP) {
        _mesa_log("\n");
        _mesa_log("NIR IR for linked %s program %d:\n",
-             _mesa_shader_stage_to_string(stage),
+             _mesa_shader_stage_to_string(prog->info.stage),
               shader_program->Name);
        nir_print_shader(nir, _mesa_get_log_file());
        _mesa_log("\n\n");
     }
-
-   return nir;
  }
  
  /* TODO any better helper somewhere to sort a list? */
@@ -305,111 +571,70 @@ sort_varyings(struct exec_list *var_list)
     exec_list_move_nodes_to(&new_list, var_list);
  }
  
-/* Second half of preparing nir from glsl, which happens after shader
- * variant lowering.
- */
-void
-st_finalize_nir(struct st_context *st, struct gl_program *prog, nir_shader *nir)
+static void
+set_st_program(struct gl_program *prog,
+               struct gl_shader_program *shader_program,
+               nir_shader *nir)
  {
-   NIR_PASS_V(nir, nir_split_var_copies);
-   NIR_PASS_V(nir, nir_lower_var_copies);
-   NIR_PASS_V(nir, nir_lower_io_types);
-
-   if (nir->stage == MESA_SHADER_VERTEX) {
-      /* Needs special handling so drvloc matches the vbo state: */
-      st_nir_assign_vs_in_locations(prog, nir);
-      /* Re-lower global vars, to deal with any dead VS inputs. */
-      NIR_PASS_V(nir, nir_lower_global_vars_to_local);
-
-      sort_varyings(&nir->outputs);
-      nir_assign_var_locations(&nir->outputs,
-                               &nir->num_outputs,
-                               VARYING_SLOT_VAR0,
-                               st_glsl_type_size);
-      st_nir_fixup_varying_slots(st, &nir->outputs);
-   } else if (nir->stage == MESA_SHADER_FRAGMENT) {
-      sort_varyings(&nir->inputs);
-      nir_assign_var_locations(&nir->inputs,
-                               &nir->num_inputs,
-                               VARYING_SLOT_VAR0,
-                               st_glsl_type_size);
-      st_nir_fixup_varying_slots(st, &nir->inputs);
-      nir_assign_var_locations(&nir->outputs,
-                               &nir->num_outputs,
-                               FRAG_RESULT_DATA0,
-                               st_glsl_type_size);
-   } else {
-      unreachable("invalid shader type for tgsi bypass\n");
-   }
+   struct st_vertex_program *stvp;
+   struct st_common_program *stp;
+   struct st_fragment_program *stfp;
+   struct st_compute_program *stcp;
  
-   struct gl_shader_program *shader_program;
-   switch (nir->stage) {
+   switch (prog->info.stage) {
     case MESA_SHADER_VERTEX:
-      shader_program = ((struct st_vertex_program *)prog)->shader_program;
+      stvp = (struct st_vertex_program *)prog;
+      stvp->shader_program = shader_program;
+      stvp->tgsi.type = PIPE_SHADER_IR_NIR;
+      stvp->tgsi.ir.nir = nir;
+      break;
+   case MESA_SHADER_GEOMETRY:
+   case MESA_SHADER_TESS_CTRL:
+   case MESA_SHADER_TESS_EVAL:
+      stp = (struct st_common_program *)prog;
+      stp->shader_program = shader_program;
+      stp->tgsi.type = PIPE_SHADER_IR_NIR;
+      stp->tgsi.ir.nir = nir;
        break;
     case MESA_SHADER_FRAGMENT:
-      shader_program = ((struct st_fragment_program *)prog)->shader_program;
+      stfp = (struct st_fragment_program *)prog;
+      stfp->shader_program = shader_program;
+      stfp->tgsi.type = PIPE_SHADER_IR_NIR;
+      stfp->tgsi.ir.nir = nir;
+      break;
+   case MESA_SHADER_COMPUTE:
+      stcp = (struct st_compute_program *)prog;
+      stcp->shader_program = shader_program;
+      stcp->tgsi.ir_type = PIPE_SHADER_IR_NIR;
+      stcp->tgsi.prog = nir;
        break;
     default:
-      assert(!"should not be reached");
-      return;
+      unreachable("unknown shader stage");
     }
-
-   st_nir_assign_uniform_locations(prog, shader_program,
-                                   &nir->uniforms, &nir->num_uniforms);
-
-   NIR_PASS_V(nir, nir_lower_system_values);
-   NIR_PASS_V(nir, nir_lower_io, nir_var_all, st_glsl_type_size,
-              (nir_lower_io_options)0);
-   NIR_PASS_V(nir, nir_lower_samplers, shader_program);
  }
  
-struct gl_program *
+static void
  st_nir_get_mesa_program(struct gl_context *ctx,
                          struct gl_shader_program *shader_program,
                          struct gl_linked_shader *shader)
  {
+   struct st_context *st = st_context(ctx);
+   struct pipe_screen *pscreen = ctx->st->pipe->screen;
     struct gl_program *prog;
-   GLenum target = _mesa_shader_stage_to_program(shader->Stage);
  
     validate_ir_tree(shader->ir);
  
-   prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
-   if (!prog)
-      return NULL;
-
-   _mesa_reference_program(ctx, &shader->Program, prog);
+   prog = shader->Program;
  
     prog->Parameters = _mesa_new_parameter_list();
  
-   do_set_program_inouts(shader->ir, prog, shader->Stage);
-
     _mesa_copy_linked_program_data(shader_program, shader);
-   _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
+   _mesa_generate_parameters_list_for_uniforms(ctx, shader_program, shader,
                                                 prog->Parameters);
  
-   /* Make a pass over the IR to add state references for any built-in
-    * uniforms that are used.  This has to be done now (during linking).
-    * Code generation doesn't happen until the first time this shader is
-    * used for rendering.  Waiting until then to generate the parameters is
-    * too late.  At that point, the values for the built-in uniforms won't
-    * get sent to the shader.
-    */
-   foreach_in_list(ir_instruction, node, shader->ir) {
-      ir_variable *var = node->as_variable();
-
-      if ((var == NULL) || (var->data.mode != ir_var_uniform) ||
-          (strncmp(var->name, "gl_", 3) != 0))
-         continue;
-
-      const ir_state_slot *const slots = var->get_state_slots();
-      assert(slots != NULL);
-
-      for (unsigned int i = 0; i < var->get_num_state_slots(); i++) {
-         _mesa_add_state_reference(prog->Parameters,
-                                   (gl_state_index *) slots[i].tokens);
-      }
-   }
+   /* Remove reads from output registers. */
+   if (!pscreen->get_param(pscreen, PIPE_CAP_TGSI_CAN_READ_OUTPUTS))
+      lower_output_reads(shader->Stage, shader->ir);
  
     if (ctx->_Shader->Flags & GLSL_DUMP) {
        _mesa_log("\n");
@@ -420,44 +645,339 @@ st_nir_get_mesa_program(struct gl_context *ctx,
        _mesa_log("\n\n");
     }
  
-   prog->Instructions = NULL;
-   prog->NumInstructions = 0;
-
-   prog->SamplersUsed = shader->active_samplers;
-   prog->ShadowSamplers = shader->shadow_samplers;
-   prog->ExternalSamplersUsed = gl_external_samplers(shader);
+   prog->ExternalSamplersUsed = gl_external_samplers(prog);
     _mesa_update_shader_textures_used(shader_program, prog);
  
-   /* Avoid reallocation of the program parameter list, because the uniform
-    * storage is only associated with the original parameter list.
-    * This should be enough for Bitmap and DrawPixels constants.
-    */
-   _mesa_reserve_parameter_storage(prog->Parameters, 8);
+   nir_shader *nir = st_glsl_to_nir(st, prog, shader_program, shader->Stage);
  
-   /* This has to be done last.  Any operation the can cause
-    * prog->ParameterValues to get reallocated (e.g., anything that adds a
-    * program constant) has to happen before creating this linkage.
+   set_st_program(prog, shader_program, nir);
+   prog->nir = nir;
+}
+
+static void
+st_nir_link_shaders(nir_shader **producer, nir_shader **consumer, bool scalar)
+{
+   if (scalar) {
+      NIR_PASS_V(*producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
+      NIR_PASS_V(*consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
+   }
+
+   nir_lower_io_arrays_to_elements(*producer, *consumer);
+
+   st_nir_opts(*producer, scalar);
+   st_nir_opts(*consumer, scalar);
+
+   if (nir_link_opt_varyings(*producer, *consumer))
+      st_nir_opts(*consumer, scalar);
+
+   NIR_PASS_V(*producer, nir_remove_dead_variables, nir_var_shader_out);
+   NIR_PASS_V(*consumer, nir_remove_dead_variables, nir_var_shader_in);
+
+   if (nir_remove_unused_varyings(*producer, *consumer)) {
+      NIR_PASS_V(*producer, nir_lower_global_vars_to_local);
+      NIR_PASS_V(*consumer, nir_lower_global_vars_to_local);
+
+      /* The backend might not be able to handle indirects on
+       * temporaries so we need to lower indirects on any of the
+       * varyings we have demoted here.
+       *
+       * TODO: radeonsi shouldn't need to do this, however LLVM isn't
+       * currently smart enough to handle indirects without causing excess
+       * spilling causing the gpu to hang.
+       *
+       * See the following thread for more details of the problem:
+       * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html
+       */
+      nir_variable_mode indirect_mask = nir_var_function_temp;
+
+      NIR_PASS_V(*producer, nir_lower_indirect_derefs, indirect_mask);
+      NIR_PASS_V(*consumer, nir_lower_indirect_derefs, indirect_mask);
+
+      st_nir_opts(*producer, scalar);
+      st_nir_opts(*consumer, scalar);
+
+      /* Lowering indirects can cause varying to become unused.
+       * nir_compact_varyings() depends on all dead varyings being removed so
+       * we need to call nir_remove_dead_variables() again here.
+       */
+      NIR_PASS_V(*producer, nir_remove_dead_variables, nir_var_shader_out);
+      NIR_PASS_V(*consumer, nir_remove_dead_variables, nir_var_shader_in);
+   }
+}
+
+static void
+st_lower_patch_vertices_in(struct gl_shader_program *shader_prog)
+{
+   struct gl_linked_shader *linked_tcs =
+      shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL];
+   struct gl_linked_shader *linked_tes =
+      shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
+
+   /* If we have a TCS and TES linked together, lower TES patch vertices. */
+   if (linked_tcs && linked_tes) {
+      nir_shader *tcs_nir = linked_tcs->Program->nir;
+      nir_shader *tes_nir = linked_tes->Program->nir;
+
+      /* The TES input vertex count is the TCS output vertex count,
+       * lower TES gl_PatchVerticesIn to a constant.
+       */
+      uint32_t tes_patch_verts = tcs_nir->info.tess.tcs_vertices_out;
+      NIR_PASS_V(tes_nir, nir_lower_patch_vertices, tes_patch_verts, NULL);
+   }
+}
+
+extern "C" {
+
+void
+st_nir_lower_wpos_ytransform(struct nir_shader *nir,
+                             struct gl_program *prog,
+                             struct pipe_screen *pscreen)
+{
+   if (nir->info.stage != MESA_SHADER_FRAGMENT)
+      return;
+
+   static const gl_state_index16 wposTransformState[STATE_LENGTH] = {
+      STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM
+   };
+   nir_lower_wpos_ytransform_options wpos_options = { { 0 } };
+
+   memcpy(wpos_options.state_tokens, wposTransformState,
+          sizeof(wpos_options.state_tokens));
+   wpos_options.fs_coord_origin_upper_left =
+      pscreen->get_param(pscreen,
+                         PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
+   wpos_options.fs_coord_origin_lower_left =
+      pscreen->get_param(pscreen,
+                         PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
+   wpos_options.fs_coord_pixel_center_integer =
+      pscreen->get_param(pscreen,
+                         PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
+   wpos_options.fs_coord_pixel_center_half_integer =
+      pscreen->get_param(pscreen,
+                         PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER);
+
+   if (nir_lower_wpos_ytransform(nir, &wpos_options)) {
+      nir_validate_shader(nir, "after nir_lower_wpos_ytransform");
+      _mesa_add_state_reference(prog->Parameters, wposTransformState);
+   }
+}
+
+bool
+st_link_nir(struct gl_context *ctx,
+            struct gl_shader_program *shader_program)
+{
+   struct st_context *st = st_context(ctx);
+   struct pipe_screen *screen = st->pipe->screen;
+   bool is_scalar[MESA_SHADER_STAGES];
+
+   unsigned last_stage = 0;
+   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+      struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
+      if (shader == NULL)
+         continue;
+
+      /* Determine scalar property of each shader stage */
+      enum pipe_shader_type type = pipe_shader_type_from_mesa(shader->Stage);
+      is_scalar[i] = screen->get_shader_param(screen, type,
+                                              PIPE_SHADER_CAP_SCALAR_ISA);
+
+      st_nir_get_mesa_program(ctx, shader_program, shader);
+      last_stage = i;
+
+      if (is_scalar[i]) {
+         NIR_PASS_V(shader->Program->nir, nir_lower_load_const_to_scalar);
+      }
+   }
+
+   /* Linking the stages in the opposite order (from fragment to vertex)
+    * ensures that inter-shader outputs written to in an earlier stage
+    * are eliminated if they are (transitively) not used in a later
+    * stage.
      */
-   _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters);
+   int next = last_stage;
+   for (int i = next - 1; i >= 0; i--) {
+      struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
+      if (shader == NULL)
+         continue;
  
-   struct st_vertex_program *stvp;
-   struct st_fragment_program *stfp;
+      st_nir_link_shaders(&shader->Program->nir,
+                          &shader_program->_LinkedShaders[next]->Program->nir,
+                          is_scalar[i]);
+      next = i;
+   }
  
-   switch (shader->Stage) {
-   case MESA_SHADER_VERTEX:
-      stvp = (struct st_vertex_program *)prog;
-      stvp->shader_program = shader_program;
-      break;
-   case MESA_SHADER_FRAGMENT:
-      stfp = (struct st_fragment_program *)prog;
-      stfp->shader_program = shader_program;
-      break;
-   default:
-      assert(!"should not be reached");
-      return NULL;
+   int prev = -1;
+   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+      struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
+      if (shader == NULL)
+         continue;
+
+      nir_shader *nir = shader->Program->nir;
+
+      NIR_PASS_V(nir, st_nir_lower_wpos_ytransform, shader->Program,
+                 st->pipe->screen);
+
+      NIR_PASS_V(nir, nir_lower_system_values);
+      NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
+
+      nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+      shader->Program->info = nir->info;
+      if (i == MESA_SHADER_VERTEX) {
+         /* NIR expands dual-slot inputs out to two locations.  We need to
+          * compact things back down GL-style single-slot inputs to avoid
+          * confusing the state tracker.
+          */
+         shader->Program->info.inputs_read =
+            nir_get_single_slot_attribs_mask(nir->info.inputs_read,
+                                             shader->Program->DualSlotInputs);
+      }
+
+      if (prev != -1) {
+         struct gl_program *prev_shader =
+            shader_program->_LinkedShaders[prev]->Program;
+
+         /* We can't use nir_compact_varyings with transform feedback, since
+          * the pipe_stream_output->output_register field is based on the
+          * pre-compacted driver_locations.
+          */
+         if (!(prev_shader->sh.LinkedTransformFeedback &&
+               prev_shader->sh.LinkedTransformFeedback->NumVarying > 0))
+            nir_compact_varyings(shader_program->_LinkedShaders[prev]->Program->nir,
+                              nir, ctx->API != API_OPENGL_COMPAT);
+      }
+      prev = i;
+   }
+
+   st_lower_patch_vertices_in(shader_program);
+
+   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+      struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
+      if (shader == NULL)
+         continue;
+
+      st_glsl_to_nir_post_opts(st, shader->Program, shader_program);
+
+      assert(shader->Program);
+      if (!ctx->Driver.ProgramStringNotify(ctx,
+                                           _mesa_shader_stage_to_program(i),
+                                           shader->Program)) {
+         _mesa_reference_program(ctx, &shader->Program, NULL);
+         return false;
+      }
+
+      nir_sweep(shader->Program->nir);
+
+      /* The GLSL IR won't be needed anymore. */
+      ralloc_free(shader->ir);
+      shader->ir = NULL;
+   }
+
+   return true;
+}
+
+void
+st_nir_assign_varying_locations(struct st_context *st, nir_shader *nir)
+{
+   if (nir->info.stage == MESA_SHADER_VERTEX) {
+      /* Needs special handling so drvloc matches the vbo state: */
+      st_nir_assign_vs_in_locations(nir);
+      /* Re-lower global vars, to deal with any dead VS inputs. */
+      NIR_PASS_V(nir, nir_lower_global_vars_to_local);
+
+      sort_varyings(&nir->outputs);
+      st_nir_assign_var_locations(&nir->outputs,
+                                  &nir->num_outputs,
+                                  nir->info.stage);
+      st_nir_fixup_varying_slots(st, &nir->outputs);
+   } else if (nir->info.stage == MESA_SHADER_GEOMETRY ||
+              nir->info.stage == MESA_SHADER_TESS_CTRL ||
+              nir->info.stage == MESA_SHADER_TESS_EVAL) {
+      sort_varyings(&nir->inputs);
+      st_nir_assign_var_locations(&nir->inputs,
+                                  &nir->num_inputs,
+                                  nir->info.stage);
+      st_nir_fixup_varying_slots(st, &nir->inputs);
+
+      sort_varyings(&nir->outputs);
+      st_nir_assign_var_locations(&nir->outputs,
+                                  &nir->num_outputs,
+                                  nir->info.stage);
+      st_nir_fixup_varying_slots(st, &nir->outputs);
+   } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+      sort_varyings(&nir->inputs);
+      st_nir_assign_var_locations(&nir->inputs,
+                                  &nir->num_inputs,
+                                  nir->info.stage);
+      st_nir_fixup_varying_slots(st, &nir->inputs);
+      st_nir_assign_var_locations(&nir->outputs,
+                                  &nir->num_outputs,
+                                  nir->info.stage);
+   } else if (nir->info.stage == MESA_SHADER_COMPUTE) {
+       /* TODO? */
+   } else {
+      unreachable("invalid shader type");
+   }
+}
+
+void
+st_nir_lower_samplers(struct pipe_screen *screen, nir_shader *nir,
+                      struct gl_shader_program *shader_program,
+                      struct gl_program *prog)
+{
+   if (screen->get_param(screen, PIPE_CAP_NIR_SAMPLERS_AS_DEREF))
+      NIR_PASS_V(nir, gl_nir_lower_samplers_as_deref, shader_program);
+   else
+      NIR_PASS_V(nir, gl_nir_lower_samplers, shader_program);
+
+   if (prog) {
+      prog->info.textures_used = nir->info.textures_used;
+      prog->info.textures_used_by_txf = nir->info.textures_used_by_txf;
+   }
+}
+
+/* Last third of preparing nir from glsl, which happens after shader
+ * variant lowering.
+ */
+void
+st_finalize_nir(struct st_context *st, struct gl_program *prog,
+                struct gl_shader_program *shader_program, nir_shader *nir)
+{
+   struct pipe_screen *screen = st->pipe->screen;
+   const nir_shader_compiler_options *options =
+      st->ctx->Const.ShaderCompilerOptions[prog->info.stage].NirOptions;
+
+   NIR_PASS_V(nir, nir_split_var_copies);
+   NIR_PASS_V(nir, nir_lower_var_copies);
+   if (options->lower_all_io_to_temps ||
+       options->lower_all_io_to_elements ||
+       nir->info.stage == MESA_SHADER_VERTEX ||
+       nir->info.stage == MESA_SHADER_GEOMETRY) {
+      NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
+   } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+      NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
+   }
+
+   st_nir_assign_varying_locations(st, nir);
+
+   NIR_PASS_V(nir, nir_lower_atomics_to_ssbo,
+         st->ctx->Const.Program[nir->info.stage].MaxAtomicBuffers);
+
+   st_nir_assign_uniform_locations(st->ctx, prog,
+                                   &nir->uniforms);
+
+   /* Set num_uniforms in number of attribute slots (vec4s) */
+   nir->num_uniforms = DIV_ROUND_UP(prog->Parameters->NumParameterValues, 4);
+
+   if (st->ctx->Const.PackedDriverUniformStorage) {
+      NIR_PASS_V(nir, nir_lower_io, nir_var_uniform, st_glsl_type_dword_size,
+                 (nir_lower_io_options)0);
+      NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, 4);
+   } else {
+      NIR_PASS_V(nir, nir_lower_io, nir_var_uniform, st_glsl_uniforms_type_size,
+                 (nir_lower_io_options)0);
     }
  
-   return prog;
+   st_nir_lower_samplers(screen, nir, shader_program, prog);
  }
  
  } /* extern "C" */