glsl_to_nir: fix bitfield_extract with 16-bit operands
[mesa.git] / src / compiler / glsl / glsl_to_nir.cpp
index 9bb0f5d4044cc633ea9bcb346bad8dd77a10610b..2dc0df32a93ddb183652d23d3c855aba40dd335c 100644 (file)
  *
  */
 
+#include "float64_glsl.h"
 #include "glsl_to_nir.h"
 #include "ir_visitor.h"
 #include "ir_hierarchical_visitor.h"
 #include "ir.h"
+#include "ir_optimization.h"
+#include "program.h"
 #include "compiler/nir/nir_control_flow.h"
 #include "compiler/nir/nir_builder.h"
-#include "main/imports.h"
+#include "compiler/nir/nir_builtin_builder.h"
+#include "compiler/nir/nir_deref.h"
+#include "main/errors.h"
 #include "main/mtypes.h"
+#include "main/shaderobj.h"
+#include "util/u_math.h"
 
 /*
  * pass to lower GLSL IR to NIR
@@ -47,7 +54,7 @@ namespace {
 class nir_visitor : public ir_visitor
 {
 public:
-   nir_visitor(nir_shader *shader);
+   nir_visitor(gl_context *ctx, nir_shader *shader);
    ~nir_visitor();
 
    virtual void visit(ir_variable *);
@@ -56,6 +63,7 @@ public:
    virtual void visit(ir_loop *);
    virtual void visit(ir_if *);
    virtual void visit(ir_discard *);
+   virtual void visit(ir_demote *);
    virtual void visit(ir_loop_jump *);
    virtual void visit(ir_return *);
    virtual void visit(ir_call *);
@@ -84,7 +92,7 @@ private:
    nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1,
                        nir_ssa_def *src2, nir_ssa_def *src3);
 
-   bool supports_ints;
+   bool supports_std430;
 
    nir_shader *shader;
    nir_function_impl *impl;
@@ -93,14 +101,16 @@ private:
 
    nir_deref_instr *evaluate_deref(ir_instruction *ir);
 
+   nir_constant *constant_copy(ir_constant *ir, void *mem_ctx);
+
    /* most recent deref instruction created */
    nir_deref_instr *deref;
 
-   nir_variable *var; /* variable created by ir_variable visitor */
-
    /* whether the IR we're operating on is per-function or global */
    bool is_global;
 
+   ir_function_signature *sig;
+
    /* map of ir_variable -> nir_variable */
    struct hash_table *var_table;
 
@@ -126,31 +136,109 @@ private:
    nir_visitor *visitor;
 };
 
+/* glsl_to_nir can only handle converting certain function paramaters
+ * to NIR. This visitor checks for parameters it can't currently handle.
+ */
+class ir_function_param_visitor : public ir_hierarchical_visitor
+{
+public:
+   ir_function_param_visitor()
+      : unsupported(false)
+   {
+   }
+
+   virtual ir_visitor_status visit_enter(ir_function_signature *ir)
+   {
+
+      if (ir->is_intrinsic())
+         return visit_continue;
+
+      foreach_in_list(ir_variable, param, &ir->parameters) {
+         if (!param->type->is_vector() || !param->type->is_scalar()) {
+            unsupported = true;
+            return visit_stop;
+         }
+
+         if (param->data.mode == ir_var_function_inout) {
+            unsupported = true;
+            return visit_stop;
+         }
+      }
+
+      if (!glsl_type_is_vector_or_scalar(ir->return_type) &&
+          !ir->return_type->is_void()) {
+         unsupported = true;
+         return visit_stop;
+      }
+
+      return visit_continue;
+   }
+
+   bool unsupported;
+};
+
 } /* end of anonymous namespace */
 
+
+static bool
+has_unsupported_function_param(exec_list *ir)
+{
+   ir_function_param_visitor visitor;
+   visit_list_elements(&visitor, ir);
+   return visitor.unsupported;
+}
+
 nir_shader *
-glsl_to_nir(const struct gl_shader_program *shader_prog,
+glsl_to_nir(struct gl_context *ctx,
+            const struct gl_shader_program *shader_prog,
             gl_shader_stage stage,
             const nir_shader_compiler_options *options)
 {
    struct gl_linked_shader *sh = shader_prog->_LinkedShaders[stage];
 
+   const struct gl_shader_compiler_options *gl_options =
+      &ctx->Const.ShaderCompilerOptions[stage];
+
+   /* glsl_to_nir can only handle converting certain function paramaters
+    * to NIR. If we find something we can't handle then we get the GLSL IR
+    * opts to remove it before we continue on.
+    *
+    * TODO: add missing glsl ir to nir support and remove this loop.
+    */
+   while (has_unsupported_function_param(sh->ir)) {
+      do_common_optimization(sh->ir, true, true, gl_options,
+                             ctx->Const.NativeIntegers);
+   }
+
    nir_shader *shader = nir_shader_create(NULL, stage, options,
                                           &sh->Program->info);
 
-   nir_visitor v1(shader);
+   nir_visitor v1(ctx, shader);
    nir_function_visitor v2(&v1);
    v2.run(sh->ir);
    visit_exec_list(sh->ir, &v1);
 
-   nir_lower_constant_initializers(shader, (nir_variable_mode)~0);
+   nir_validate_shader(shader, "after glsl to nir, before function inline");
+
+   /* We have to lower away local constant initializers right before we
+    * inline functions.  That way they get properly initialized at the top
+    * of the function and not at the top of its caller.
+    */
+   nir_lower_variable_initializers(shader, (nir_variable_mode)~0);
+   nir_lower_returns(shader);
+   nir_inline_functions(shader);
+   nir_opt_deref(shader);
+
+   nir_validate_shader(shader, "after function inlining and return lowering");
 
-   /* Remap the locations to slots so those requiring two slots will occupy
-    * two locations. For instance, if we have in the IR code a dvec3 attr0 in
-    * location 0 and vec4 attr1 in location 1, in NIR attr0 will use
-    * locations/slots 0 and 1, and attr1 will use location/slot 2 */
-   if (shader->info.stage == MESA_SHADER_VERTEX)
-      nir_remap_dual_slot_attributes(shader, &sh->Program->DualSlotInputs);
+   /* Now that we have inlined everything remove all of the functions except
+    * main().
+    */
+   foreach_list_typed_safe(nir_function, function, node, &(shader)->functions){
+      if (strcmp("main", function->name) != 0) {
+         exec_node_remove(&function->node);
+      }
+   }
 
    shader->info.name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name);
    if (shader_prog->Label)
@@ -165,21 +253,25 @@ glsl_to_nir(const struct gl_shader_program *shader_prog,
       shader->info.has_transform_feedback_varyings |=
          shader_prog->last_vert_prog->sh.LinkedTransformFeedback->NumVarying > 0;
 
+   if (shader->info.stage == MESA_SHADER_FRAGMENT) {
+      shader->info.fs.pixel_center_integer = sh->Program->info.fs.pixel_center_integer;
+      shader->info.fs.origin_upper_left = sh->Program->info.fs.origin_upper_left;
+   }
+
    return shader;
 }
 
-nir_visitor::nir_visitor(nir_shader *shader)
+nir_visitor::nir_visitor(gl_context *ctx, nir_shader *shader)
 {
-   this->supports_ints = shader->options->native_integers;
+   this->supports_std430 = ctx->Const.UseSTD430AsDefaultPacking;
    this->shader = shader;
    this->is_global = true;
-   this->var_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                                             _mesa_key_pointer_equal);
-   this->overload_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                                                  _mesa_key_pointer_equal);
+   this->var_table = _mesa_pointer_hash_table_create(NULL);
+   this->overload_table = _mesa_pointer_hash_table_create(NULL);
    this->result = NULL;
    this->impl = NULL;
-   this->var = NULL;
+   this->deref = NULL;
+   this->sig = NULL;
    memset(&this->b, 0, sizeof(this->b));
 }
 
@@ -196,8 +288,8 @@ nir_visitor::evaluate_deref(ir_instruction *ir)
    return this->deref;
 }
 
-static nir_constant *
-constant_copy(ir_constant *ir, void *mem_ctx)
+nir_constant *
+nir_visitor::constant_copy(ir_constant *ir, void *mem_ctx)
 {
    if (ir == NULL)
       return NULL;
@@ -215,8 +307,16 @@ constant_copy(ir_constant *ir, void *mem_ctx)
       assert(cols == 1);
 
       for (unsigned r = 0; r < rows; r++)
-         ret->values[0].u32[r] = ir->value.u[r];
+         ret->values[r].u32 = ir->value.u[r];
+
+      break;
 
+   case GLSL_TYPE_UINT16:
+      /* Only float base types can be matrices. */
+      assert(cols == 1);
+
+      for (unsigned r = 0; r < rows; r++)
+         ret->values[r].u16 = ir->value.u16[r];
       break;
 
    case GLSL_TYPE_INT:
@@ -224,21 +324,68 @@ constant_copy(ir_constant *ir, void *mem_ctx)
       assert(cols == 1);
 
       for (unsigned r = 0; r < rows; r++)
-         ret->values[0].i32[r] = ir->value.i[r];
+         ret->values[r].i32 = ir->value.i[r];
 
       break;
 
-   case GLSL_TYPE_FLOAT:
-      for (unsigned c = 0; c < cols; c++) {
-         for (unsigned r = 0; r < rows; r++)
-            ret->values[c].f32[r] = ir->value.f[c * rows + r];
-      }
+   case GLSL_TYPE_INT16:
+      /* Only float base types can be matrices. */
+      assert(cols == 1);
+
+      for (unsigned r = 0; r < rows; r++)
+         ret->values[r].i16 = ir->value.i16[r];
       break;
 
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_FLOAT16:
    case GLSL_TYPE_DOUBLE:
-      for (unsigned c = 0; c < cols; c++) {
-         for (unsigned r = 0; r < rows; r++)
-            ret->values[c].f64[r] = ir->value.d[c * rows + r];
+      if (cols > 1) {
+         ret->elements = ralloc_array(mem_ctx, nir_constant *, cols);
+         ret->num_elements = cols;
+         for (unsigned c = 0; c < cols; c++) {
+            nir_constant *col_const = rzalloc(mem_ctx, nir_constant);
+            col_const->num_elements = 0;
+            switch (ir->type->base_type) {
+            case GLSL_TYPE_FLOAT:
+               for (unsigned r = 0; r < rows; r++)
+                  col_const->values[r].f32 = ir->value.f[c * rows + r];
+               break;
+
+            case GLSL_TYPE_FLOAT16:
+               for (unsigned r = 0; r < rows; r++)
+                  col_const->values[r].u16 = ir->value.f16[c * rows + r];
+               break;
+
+            case GLSL_TYPE_DOUBLE:
+               for (unsigned r = 0; r < rows; r++)
+                  col_const->values[r].f64 = ir->value.d[c * rows + r];
+               break;
+
+            default:
+               unreachable("Cannot get here from the first level switch");
+            }
+            ret->elements[c] = col_const;
+         }
+      } else {
+         switch (ir->type->base_type) {
+         case GLSL_TYPE_FLOAT:
+            for (unsigned r = 0; r < rows; r++)
+               ret->values[r].f32 = ir->value.f[r];
+            break;
+
+         case GLSL_TYPE_FLOAT16:
+            for (unsigned r = 0; r < rows; r++)
+               ret->values[r].u16 = ir->value.f16[r];
+            break;
+
+         case GLSL_TYPE_DOUBLE:
+            for (unsigned r = 0; r < rows; r++)
+               ret->values[r].f64 = ir->value.d[r];
+            break;
+
+         default:
+            unreachable("Cannot get here from the first level switch");
+         }
       }
       break;
 
@@ -247,7 +394,7 @@ constant_copy(ir_constant *ir, void *mem_ctx)
       assert(cols == 1);
 
       for (unsigned r = 0; r < rows; r++)
-         ret->values[0].u64[r] = ir->value.u64[r];
+         ret->values[r].u64 = ir->value.u64[r];
       break;
 
    case GLSL_TYPE_INT64:
@@ -255,7 +402,7 @@ constant_copy(ir_constant *ir, void *mem_ctx)
       assert(cols == 1);
 
       for (unsigned r = 0; r < rows; r++)
-         ret->values[0].i64[r] = ir->value.i64[r];
+         ret->values[r].i64 = ir->value.i64[r];
       break;
 
    case GLSL_TYPE_BOOL:
@@ -263,7 +410,7 @@ constant_copy(ir_constant *ir, void *mem_ctx)
       assert(cols == 1);
 
       for (unsigned r = 0; r < rows; r++)
-         ret->values[0].u32[r] = ir->value.b[r] ? NIR_TRUE : NIR_FALSE;
+         ret->values[r].b = ir->value.b[r];
 
       break;
 
@@ -284,6 +431,26 @@ constant_copy(ir_constant *ir, void *mem_ctx)
    return ret;
 }
 
+static const glsl_type *
+wrap_type_in_array(const glsl_type *elem_type, const glsl_type *array_type)
+{
+   if (!array_type->is_array())
+      return elem_type;
+
+   elem_type = wrap_type_in_array(elem_type, array_type->fields.array);
+
+   return glsl_type::get_array_instance(elem_type, array_type->length);
+}
+
+static unsigned
+get_nir_how_declared(unsigned how_declared)
+{
+   if (how_declared == ir_var_hidden)
+      return nir_var_hidden;
+
+   return nir_var_declared_normally;
+}
+
 void
 nir_visitor::visit(ir_variable *ir)
 {
@@ -294,6 +461,12 @@ nir_visitor::visit(ir_variable *ir)
    if (ir->data.mode == ir_var_shader_shared)
       return;
 
+   /* FINISHME: inout parameters */
+   assert(ir->data.mode != ir_var_function_inout);
+
+   if (ir->data.mode == ir_var_function_out)
+      return;
+
    nir_variable *var = rzalloc(shader, nir_variable);
    var->type = ir->type;
    var->name = ralloc_strdup(var, ir->name);
@@ -303,35 +476,36 @@ nir_visitor::visit(ir_variable *ir)
    var->data.centroid = ir->data.centroid;
    var->data.sample = ir->data.sample;
    var->data.patch = ir->data.patch;
+   var->data.how_declared = get_nir_how_declared(ir->data.how_declared);
    var->data.invariant = ir->data.invariant;
    var->data.location = ir->data.location;
    var->data.stream = ir->data.stream;
+   if (ir->data.stream & (1u << 31))
+      var->data.stream |= NIR_STREAM_PACKED;
+
+   var->data.precision = ir->data.precision;
+   var->data.explicit_location = ir->data.explicit_location;
+   var->data.matrix_layout = ir->data.matrix_layout;
+   var->data.from_named_ifc_block = ir->data.from_named_ifc_block;
    var->data.compact = false;
 
    switch(ir->data.mode) {
    case ir_var_auto:
    case ir_var_temporary:
       if (is_global)
-         var->data.mode = nir_var_global;
+         var->data.mode = nir_var_shader_temp;
       else
-         var->data.mode = nir_var_local;
+         var->data.mode = nir_var_function_temp;
       break;
 
    case ir_var_function_in:
-   case ir_var_function_out:
-   case ir_var_function_inout:
    case ir_var_const_in:
-      var->data.mode = nir_var_local;
+      var->data.mode = nir_var_function_temp;
       break;
 
    case ir_var_shader_in:
-      if (shader->info.stage == MESA_SHADER_FRAGMENT &&
-          ir->data.location == VARYING_SLOT_FACE) {
-         /* For whatever reason, GLSL IR makes gl_FrontFacing an input */
-         var->data.location = SYSTEM_VALUE_FRONT_FACE;
-         var->data.mode = nir_var_system_value;
-      } else if (shader->info.stage == MESA_SHADER_GEOMETRY &&
-                 ir->data.location == VARYING_SLOT_PRIMITIVE_ID) {
+      if (shader->info.stage == MESA_SHADER_GEOMETRY &&
+          ir->data.location == VARYING_SLOT_PRIMITIVE_ID) {
          /* For whatever reason, GLSL IR makes gl_PrimitiveIDIn an input */
          var->data.location = SYSTEM_VALUE_PRIMITIVE_ID;
          var->data.mode = nir_var_system_value;
@@ -343,6 +517,12 @@ nir_visitor::visit(ir_variable *ir)
               ir->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)) {
             var->data.compact = ir->type->without_array()->is_scalar();
          }
+
+         if (shader->info.stage > MESA_SHADER_VERTEX &&
+             ir->data.location >= VARYING_SLOT_CLIP_DIST0 &&
+             ir->data.location <= VARYING_SLOT_CULL_DIST1) {
+            var->data.compact = ir->type->without_array()->is_scalar();
+         }
       }
       break;
 
@@ -353,14 +533,23 @@ nir_visitor::visit(ir_variable *ir)
            ir->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)) {
          var->data.compact = ir->type->without_array()->is_scalar();
       }
+
+      if (shader->info.stage <= MESA_SHADER_GEOMETRY &&
+          ir->data.location >= VARYING_SLOT_CLIP_DIST0 &&
+          ir->data.location <= VARYING_SLOT_CULL_DIST1) {
+         var->data.compact = ir->type->without_array()->is_scalar();
+      }
       break;
 
    case ir_var_uniform:
-      var->data.mode = nir_var_uniform;
+      if (ir->get_interface_type())
+         var->data.mode = nir_var_mem_ubo;
+      else
+         var->data.mode = nir_var_uniform;
       break;
 
    case ir_var_shader_storage:
-      var->data.mode = nir_var_shader_storage;
+      var->data.mode = nir_var_mem_ssbo;
       break;
 
    case ir_var_system_value:
@@ -371,16 +560,63 @@ nir_visitor::visit(ir_variable *ir)
       unreachable("not reached");
    }
 
-   var->data.interpolation = ir->data.interpolation;
-   var->data.origin_upper_left = ir->data.origin_upper_left;
-   var->data.pixel_center_integer = ir->data.pixel_center_integer;
-   var->data.location_frac = ir->data.location_frac;
+   unsigned mem_access = 0;
+   if (ir->data.memory_read_only)
+      mem_access |= ACCESS_NON_WRITEABLE;
+   if (ir->data.memory_write_only)
+      mem_access |= ACCESS_NON_READABLE;
+   if (ir->data.memory_coherent)
+      mem_access |= ACCESS_COHERENT;
+   if (ir->data.memory_volatile)
+      mem_access |= ACCESS_VOLATILE;
+   if (ir->data.memory_restrict)
+      mem_access |= ACCESS_RESTRICT;
+
+   var->interface_type = ir->get_interface_type();
+
+   /* For UBO and SSBO variables, we need explicit types */
+   if (var->data.mode & (nir_var_mem_ubo | nir_var_mem_ssbo)) {
+      const glsl_type *explicit_ifc_type =
+         ir->get_interface_type()->get_explicit_interface_type(supports_std430);
+
+      var->interface_type = explicit_ifc_type;
 
-   if (var->data.pixel_center_integer) {
-      assert(shader->info.stage == MESA_SHADER_FRAGMENT);
-      shader->info.fs.pixel_center_integer = true;
+      if (ir->type->without_array()->is_interface()) {
+         /* If the type contains the interface, wrap the explicit type in the
+          * right number of arrays.
+          */
+         var->type = wrap_type_in_array(explicit_ifc_type, ir->type);
+      } else {
+         /* Otherwise, this variable is one entry in the interface */
+         UNUSED bool found = false;
+         for (unsigned i = 0; i < explicit_ifc_type->length; i++) {
+            const glsl_struct_field *field =
+               &explicit_ifc_type->fields.structure[i];
+            if (strcmp(ir->name, field->name) != 0)
+               continue;
+
+            var->type = field->type;
+            if (field->memory_read_only)
+               mem_access |= ACCESS_NON_WRITEABLE;
+            if (field->memory_write_only)
+               mem_access |= ACCESS_NON_READABLE;
+            if (field->memory_coherent)
+               mem_access |= ACCESS_COHERENT;
+            if (field->memory_volatile)
+               mem_access |= ACCESS_VOLATILE;
+            if (field->memory_restrict)
+               mem_access |= ACCESS_RESTRICT;
+
+            found = true;
+            break;
+         }
+         assert(found);
+      }
    }
 
+   var->data.interpolation = ir->data.interpolation;
+   var->data.location_frac = ir->data.location_frac;
+
    switch (ir->data.depth_layout) {
    case ir_depth_layout_none:
       var->data.depth_layout = nir_depth_layout_none;
@@ -407,26 +643,18 @@ nir_visitor::visit(ir_variable *ir)
    var->data.explicit_binding = ir->data.explicit_binding;
    var->data.bindless = ir->data.bindless;
    var->data.offset = ir->data.offset;
+   var->data.access = (gl_access_qualifier)mem_access;
 
-   unsigned image_access = 0;
-   if (ir->data.memory_read_only)
-      image_access |= ACCESS_NON_WRITEABLE;
-   if (ir->data.memory_write_only)
-      image_access |= ACCESS_NON_READABLE;
-   if (ir->data.memory_coherent)
-      image_access |= ACCESS_COHERENT;
-   if (ir->data.memory_volatile)
-      image_access |= ACCESS_VOLATILE;
-   if (ir->data.memory_restrict)
-      image_access |= ACCESS_RESTRICT;
-   var->data.image.access = (gl_access_qualifier)image_access;
-   var->data.image.format = ir->data.image_format;
+   if (var->type->without_array()->is_image()) {
+      var->data.image.format = ir->data.image_format;
+   } else if (var->data.mode == nir_var_shader_out) {
+      var->data.xfb.buffer = ir->data.xfb_buffer;
+      var->data.xfb.stride = ir->data.xfb_stride;
+   }
 
    var->data.fb_fetch_output = ir->data.fb_fetch_output;
    var->data.explicit_xfb_buffer = ir->data.explicit_xfb_buffer;
    var->data.explicit_xfb_stride = ir->data.explicit_xfb_stride;
-   var->data.xfb_buffer = ir->data.xfb_buffer;
-   var->data.xfb_stride = ir->data.xfb_stride;
 
    var->num_state_slots = ir->get_num_state_slots();
    if (var->num_state_slots > 0) {
@@ -445,15 +673,12 @@ nir_visitor::visit(ir_variable *ir)
 
    var->constant_initializer = constant_copy(ir->constant_initializer, var);
 
-   var->interface_type = ir->get_interface_type();
-
-   if (var->data.mode == nir_var_local)
+   if (var->data.mode == nir_var_function_temp)
       nir_function_impl_add_variable(impl, var);
    else
       nir_shader_add_variable(shader, var);
 
    _mesa_hash_table_insert(var_table, ir, var);
-   this->var = var;
 }
 
 ir_visitor_status
@@ -472,9 +697,36 @@ nir_visitor::create_function(ir_function_signature *ir)
       return;
 
    nir_function *func = nir_function_create(shader, ir->function_name());
+   if (strcmp(ir->function_name(), "main") == 0)
+      func->is_entrypoint = true;
+
+   func->num_params = ir->parameters.length() +
+                      (ir->return_type != glsl_type::void_type);
+   func->params = ralloc_array(shader, nir_parameter, func->num_params);
+
+   unsigned np = 0;
+
+   if (ir->return_type != glsl_type::void_type) {
+      /* The return value is a variable deref (basically an out parameter) */
+      func->params[np].num_components = 1;
+      func->params[np].bit_size = 32;
+      np++;
+   }
 
-   assert(ir->parameters.is_empty());
-   assert(ir->return_type == glsl_type::void_type);
+   foreach_in_list(ir_variable, param, &ir->parameters) {
+      /* FINISHME: pass arrays, structs, etc by reference? */
+      assert(param->type->is_vector() || param->type->is_scalar());
+
+      if (param->data.mode == ir_var_function_in) {
+         func->params[np].num_components = param->type->vector_elements;
+         func->params[np].bit_size = glsl_get_bit_size(param->type);
+      } else {
+         func->params[np].num_components = 1;
+         func->params[np].bit_size = 32;
+      }
+      np++;
+   }
+   assert(np == func->num_params);
 
    _mesa_hash_table_insert(this->overload_table, ir, func);
 }
@@ -492,6 +744,8 @@ nir_visitor::visit(ir_function_signature *ir)
    if (ir->is_intrinsic())
       return;
 
+   this->sig = ir;
+
    struct hash_entry *entry =
       _mesa_hash_table_search(this->overload_table, ir);
 
@@ -502,13 +756,25 @@ nir_visitor::visit(ir_function_signature *ir)
       nir_function_impl *impl = nir_function_impl_create(func);
       this->impl = impl;
 
-      assert(strcmp(func->name, "main") == 0);
-      assert(ir->parameters.is_empty());
-
       this->is_global = false;
 
       nir_builder_init(&b, impl);
       b.cursor = nir_after_cf_list(&impl->body);
+
+      unsigned i = (ir->return_type != glsl_type::void_type) ? 1 : 0;
+
+      foreach_in_list(ir_variable, param, &ir->parameters) {
+         nir_variable *var =
+            nir_local_variable_create(impl, param->type, param->name);
+
+         if (param->data.mode == ir_var_function_in) {
+            nir_store_var(&b, var, nir_load_param(&b, i), ~0);
+         }
+
+         _mesa_hash_table_insert(var_table, param, var);
+         i++;
+      }
+
       visit_exec_list(&ir->body, this);
 
       this->is_global = true;
@@ -558,6 +824,15 @@ nir_visitor::visit(ir_discard *ir)
    nir_builder_instr_insert(&b, &discard->instr);
 }
 
+void
+nir_visitor::visit(ir_demote *ir)
+{
+   nir_intrinsic_instr *demote =
+      nir_intrinsic_instr_create(this->shader, nir_intrinsic_demote);
+
+   nir_builder_instr_insert(&b, &demote->instr);
+}
+
 void
 nir_visitor::visit(ir_emit_vertex *ir)
 {
@@ -598,11 +873,66 @@ nir_visitor::visit(ir_loop_jump *ir)
 void
 nir_visitor::visit(ir_return *ir)
 {
-   assert(ir->value == NULL);
+   if (ir->value != NULL) {
+      nir_deref_instr *ret_deref =
+         nir_build_deref_cast(&b, nir_load_param(&b, 0),
+                              nir_var_function_temp, ir->value->type, 0);
+
+      nir_ssa_def *val = evaluate_rvalue(ir->value);
+      nir_store_deref(&b, ret_deref, val, ~0);
+   }
+
    nir_jump_instr *instr = nir_jump_instr_create(this->shader, nir_jump_return);
    nir_builder_instr_insert(&b, &instr->instr);
 }
 
+static void
+intrinsic_set_std430_align(nir_intrinsic_instr *intrin, const glsl_type *type)
+{
+   unsigned bit_size = type->is_boolean() ? 32 : glsl_get_bit_size(type);
+   unsigned pow2_components = util_next_power_of_two(type->vector_elements);
+   nir_intrinsic_set_align(intrin, (bit_size / 8) * pow2_components, 0);
+}
+
+/* Accumulate any qualifiers along the deref chain to get the actual
+ * load/store qualifier.
+ */
+
+static enum gl_access_qualifier
+deref_get_qualifier(nir_deref_instr *deref)
+{
+   nir_deref_path path;
+   nir_deref_path_init(&path, deref, NULL);
+
+   unsigned qualifiers = path.path[0]->var->data.access;
+
+   const glsl_type *parent_type = path.path[0]->type;
+   for (nir_deref_instr **cur_ptr = &path.path[1]; *cur_ptr; cur_ptr++) {
+      nir_deref_instr *cur = *cur_ptr;
+
+      if (parent_type->is_interface()) {
+         const struct glsl_struct_field *field =
+            &parent_type->fields.structure[cur->strct.index];
+         if (field->memory_read_only)
+            qualifiers |= ACCESS_NON_WRITEABLE;
+         if (field->memory_write_only)
+            qualifiers |= ACCESS_NON_READABLE;
+         if (field->memory_coherent)
+            qualifiers |= ACCESS_COHERENT;
+         if (field->memory_volatile)
+            qualifiers |= ACCESS_VOLATILE;
+         if (field->memory_restrict)
+            qualifiers |= ACCESS_RESTRICT;
+      }
+
+      parent_type = cur->type;
+   }
+
+   nir_deref_path_finish(&path);
+
+   return (gl_access_qualifier) qualifiers;
+}
+
 void
 nir_visitor::visit(ir_call *ir)
 {
@@ -610,6 +940,49 @@ nir_visitor::visit(ir_call *ir)
       nir_intrinsic_op op;
 
       switch (ir->callee->intrinsic_id) {
+      case ir_intrinsic_generic_atomic_add:
+         op = ir->return_deref->type->is_integer_32_64()
+            ? nir_intrinsic_deref_atomic_add : nir_intrinsic_deref_atomic_fadd;
+         break;
+      case ir_intrinsic_generic_atomic_and:
+         op = nir_intrinsic_deref_atomic_and;
+         break;
+      case ir_intrinsic_generic_atomic_or:
+         op = nir_intrinsic_deref_atomic_or;
+         break;
+      case ir_intrinsic_generic_atomic_xor:
+         op = nir_intrinsic_deref_atomic_xor;
+         break;
+      case ir_intrinsic_generic_atomic_min:
+         assert(ir->return_deref);
+         if (ir->return_deref->type == glsl_type::int_type)
+            op = nir_intrinsic_deref_atomic_imin;
+         else if (ir->return_deref->type == glsl_type::uint_type)
+            op = nir_intrinsic_deref_atomic_umin;
+         else if (ir->return_deref->type == glsl_type::float_type)
+            op = nir_intrinsic_deref_atomic_fmin;
+         else
+            unreachable("Invalid type");
+         break;
+      case ir_intrinsic_generic_atomic_max:
+         assert(ir->return_deref);
+         if (ir->return_deref->type == glsl_type::int_type)
+            op = nir_intrinsic_deref_atomic_imax;
+         else if (ir->return_deref->type == glsl_type::uint_type)
+            op = nir_intrinsic_deref_atomic_umax;
+         else if (ir->return_deref->type == glsl_type::float_type)
+            op = nir_intrinsic_deref_atomic_fmax;
+         else
+            unreachable("Invalid type");
+         break;
+      case ir_intrinsic_generic_atomic_exchange:
+         op = nir_intrinsic_deref_atomic_exchange;
+         break;
+      case ir_intrinsic_generic_atomic_comp_swap:
+         op = ir->return_deref->type->is_integer_32_64()
+            ? nir_intrinsic_deref_atomic_comp_swap
+            : nir_intrinsic_deref_atomic_fcomp_swap;
+         break;
       case ir_intrinsic_atomic_counter_read:
          op = nir_intrinsic_atomic_counter_read_deref;
          break;
@@ -655,10 +1028,20 @@ nir_visitor::visit(ir_call *ir)
             : nir_intrinsic_image_deref_atomic_fadd;
          break;
       case ir_intrinsic_image_atomic_min:
-         op = nir_intrinsic_image_deref_atomic_min;
+         if (ir->return_deref->type == glsl_type::int_type)
+            op = nir_intrinsic_image_deref_atomic_imin;
+         else if (ir->return_deref->type == glsl_type::uint_type)
+            op = nir_intrinsic_image_deref_atomic_umin;
+         else
+            unreachable("Invalid type");
          break;
       case ir_intrinsic_image_atomic_max:
-         op = nir_intrinsic_image_deref_atomic_max;
+         if (ir->return_deref->type == glsl_type::int_type)
+            op = nir_intrinsic_image_deref_atomic_imax;
+         else if (ir->return_deref->type == glsl_type::uint_type)
+            op = nir_intrinsic_image_deref_atomic_umax;
+         else
+            unreachable("Invalid type");
          break;
       case ir_intrinsic_image_atomic_and:
          op = nir_intrinsic_image_deref_atomic_and;
@@ -675,6 +1058,12 @@ nir_visitor::visit(ir_call *ir)
       case ir_intrinsic_image_atomic_comp_swap:
          op = nir_intrinsic_image_deref_atomic_comp_swap;
          break;
+      case ir_intrinsic_image_atomic_inc_wrap:
+         op = nir_intrinsic_image_deref_atomic_inc_wrap;
+         break;
+      case ir_intrinsic_image_atomic_dec_wrap:
+         op = nir_intrinsic_image_deref_atomic_dec_wrap;
+         break;
       case ir_intrinsic_memory_barrier:
          op = nir_intrinsic_memory_barrier;
          break;
@@ -685,54 +1074,21 @@ nir_visitor::visit(ir_call *ir)
          op = nir_intrinsic_image_deref_samples;
          break;
       case ir_intrinsic_ssbo_store:
-         op = nir_intrinsic_store_ssbo;
-         break;
       case ir_intrinsic_ssbo_load:
-         op = nir_intrinsic_load_ssbo;
-         break;
       case ir_intrinsic_ssbo_atomic_add:
-         op = ir->return_deref->type->is_integer_32_64()
-            ? nir_intrinsic_ssbo_atomic_add : nir_intrinsic_ssbo_atomic_fadd;
-         break;
       case ir_intrinsic_ssbo_atomic_and:
-         op = nir_intrinsic_ssbo_atomic_and;
-         break;
       case ir_intrinsic_ssbo_atomic_or:
-         op = nir_intrinsic_ssbo_atomic_or;
-         break;
       case ir_intrinsic_ssbo_atomic_xor:
-         op = nir_intrinsic_ssbo_atomic_xor;
-         break;
       case ir_intrinsic_ssbo_atomic_min:
-         assert(ir->return_deref);
-         if (ir->return_deref->type == glsl_type::int_type)
-            op = nir_intrinsic_ssbo_atomic_imin;
-         else if (ir->return_deref->type == glsl_type::uint_type)
-            op = nir_intrinsic_ssbo_atomic_umin;
-         else if (ir->return_deref->type == glsl_type::float_type)
-            op = nir_intrinsic_ssbo_atomic_fmin;
-         else
-            unreachable("Invalid type");
-         break;
       case ir_intrinsic_ssbo_atomic_max:
-         assert(ir->return_deref);
-         if (ir->return_deref->type == glsl_type::int_type)
-            op = nir_intrinsic_ssbo_atomic_imax;
-         else if (ir->return_deref->type == glsl_type::uint_type)
-            op = nir_intrinsic_ssbo_atomic_umax;
-         else if (ir->return_deref->type == glsl_type::float_type)
-            op = nir_intrinsic_ssbo_atomic_fmax;
-         else
-            unreachable("Invalid type");
-         break;
       case ir_intrinsic_ssbo_atomic_exchange:
-         op = nir_intrinsic_ssbo_atomic_exchange;
-         break;
       case ir_intrinsic_ssbo_atomic_comp_swap:
-         op = ir->return_deref->type->is_integer_32_64()
-            ? nir_intrinsic_ssbo_atomic_comp_swap
-            : nir_intrinsic_ssbo_atomic_fcomp_swap;
-         break;
+         /* SSBO store/loads should only have been lowered in GLSL IR for
+          * non-nir drivers, NIR drivers make use of gl_nir_lower_buffers()
+          * instead.
+          */
+         unreachable("Invalid operation nir doesn't want lowered ssbo "
+                     "store/loads");
       case ir_intrinsic_shader_clock:
          op = nir_intrinsic_shader_clock;
          break;
@@ -742,9 +1098,6 @@ nir_visitor::visit(ir_call *ir)
       case ir_intrinsic_end_invocation_interlock:
          op = nir_intrinsic_end_invocation_interlock;
          break;
-      case ir_intrinsic_begin_fragment_shader_ordering:
-         op = nir_intrinsic_begin_fragment_shader_ordering;
-         break;
       case ir_intrinsic_group_memory_barrier:
          op = nir_intrinsic_group_memory_barrier;
          break;
@@ -828,6 +1181,9 @@ nir_visitor::visit(ir_call *ir)
       case ir_intrinsic_read_first_invocation:
          op = nir_intrinsic_read_first_invocation;
          break;
+      case ir_intrinsic_helper_invocation:
+         op = nir_intrinsic_is_helper_invocation;
+         break;
       default:
          unreachable("not reached");
       }
@@ -836,6 +1192,65 @@ nir_visitor::visit(ir_call *ir)
       nir_ssa_def *ret = &instr->dest.ssa;
 
       switch (op) {
+      case nir_intrinsic_deref_atomic_add:
+      case nir_intrinsic_deref_atomic_imin:
+      case nir_intrinsic_deref_atomic_umin:
+      case nir_intrinsic_deref_atomic_imax:
+      case nir_intrinsic_deref_atomic_umax:
+      case nir_intrinsic_deref_atomic_and:
+      case nir_intrinsic_deref_atomic_or:
+      case nir_intrinsic_deref_atomic_xor:
+      case nir_intrinsic_deref_atomic_exchange:
+      case nir_intrinsic_deref_atomic_comp_swap:
+      case nir_intrinsic_deref_atomic_fadd:
+      case nir_intrinsic_deref_atomic_fmin:
+      case nir_intrinsic_deref_atomic_fmax:
+      case nir_intrinsic_deref_atomic_fcomp_swap: {
+         int param_count = ir->actual_parameters.length();
+         assert(param_count == 2 || param_count == 3);
+
+         /* Deref */
+         exec_node *param = ir->actual_parameters.get_head();
+         ir_rvalue *rvalue = (ir_rvalue *) param;
+         ir_dereference *deref = rvalue->as_dereference();
+         ir_swizzle *swizzle = NULL;
+         if (!deref) {
+            /* We may have a swizzle to pick off a single vec4 component */
+            swizzle = rvalue->as_swizzle();
+            assert(swizzle && swizzle->type->vector_elements == 1);
+            deref = swizzle->val->as_dereference();
+            assert(deref);
+         }
+         nir_deref_instr *nir_deref = evaluate_deref(deref);
+         if (swizzle) {
+            nir_deref = nir_build_deref_array_imm(&b, nir_deref,
+                                                  swizzle->mask.x);
+         }
+         instr->src[0] = nir_src_for_ssa(&nir_deref->dest.ssa);
+
+         nir_intrinsic_set_access(instr, deref_get_qualifier(nir_deref));
+
+         /* data1 parameter (this is always present) */
+         param = param->get_next();
+         ir_instruction *inst = (ir_instruction *) param;
+         instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
+
+         /* data2 parameter (only with atomic_comp_swap) */
+         if (param_count == 3) {
+            assert(op == nir_intrinsic_deref_atomic_comp_swap ||
+                   op == nir_intrinsic_deref_atomic_fcomp_swap);
+            param = param->get_next();
+            inst = (ir_instruction *) param;
+            instr->src[2] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
+         }
+
+         /* Atomic result */
+         assert(ir->return_deref);
+         nir_ssa_dest_init(&instr->instr, &instr->dest,
+                           ir->return_deref->type->vector_elements, 32, NULL);
+         nir_builder_instr_insert(&b, &instr->instr);
+         break;
+      }
       case nir_intrinsic_atomic_counter_read_deref:
       case nir_intrinsic_atomic_counter_inc_deref:
       case nir_intrinsic_atomic_counter_pre_dec_deref:
@@ -878,8 +1293,10 @@ nir_visitor::visit(ir_call *ir)
       case nir_intrinsic_image_deref_load:
       case nir_intrinsic_image_deref_store:
       case nir_intrinsic_image_deref_atomic_add:
-      case nir_intrinsic_image_deref_atomic_min:
-      case nir_intrinsic_image_deref_atomic_max:
+      case nir_intrinsic_image_deref_atomic_imin:
+      case nir_intrinsic_image_deref_atomic_umin:
+      case nir_intrinsic_image_deref_atomic_imax:
+      case nir_intrinsic_image_deref_atomic_umax:
       case nir_intrinsic_image_deref_atomic_and:
       case nir_intrinsic_image_deref_atomic_or:
       case nir_intrinsic_image_deref_atomic_xor:
@@ -887,7 +1304,9 @@ nir_visitor::visit(ir_call *ir)
       case nir_intrinsic_image_deref_atomic_comp_swap:
       case nir_intrinsic_image_deref_atomic_fadd:
       case nir_intrinsic_image_deref_samples:
-      case nir_intrinsic_image_deref_size: {
+      case nir_intrinsic_image_deref_size:
+      case nir_intrinsic_image_deref_atomic_inc_wrap:
+      case nir_intrinsic_image_deref_atomic_dec_wrap: {
          nir_ssa_undef_instr *instr_undef =
             nir_ssa_undef_instr_create(shader, 1, 32);
          nir_builder_instr_insert(&b, &instr_undef->instr);
@@ -895,10 +1314,12 @@ nir_visitor::visit(ir_call *ir)
          /* Set the image variable dereference. */
          exec_node *param = ir->actual_parameters.get_head();
          ir_dereference *image = (ir_dereference *)param;
-         const glsl_type *type =
-            image->variable_referenced()->type->without_array();
+         nir_deref_instr *deref = evaluate_deref(image);
+         const glsl_type *type = deref->type;
 
-         instr->src[0] = nir_src_for_ssa(&evaluate_deref(image)->dest.ssa);
+         nir_intrinsic_set_access(instr, deref_get_qualifier(deref));
+
+         instr->src[0] = nir_src_for_ssa(&deref->dest.ssa);
          param = param->get_next();
 
          /* Set the intrinsic destination. */
@@ -954,13 +1375,18 @@ nir_visitor::visit(ir_call *ir)
             instr->src[3] =
                nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
             param = param->get_next();
+         } else if (op == nir_intrinsic_image_deref_load) {
+            instr->src[3] = nir_src_for_ssa(nir_imm_int(&b, 0)); /* LOD */
          }
 
          if (!param->is_tail_sentinel()) {
             instr->src[4] =
                nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
             param = param->get_next();
+         } else if (op == nir_intrinsic_image_deref_store) {
+            instr->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0)); /* LOD */
          }
+
          nir_builder_instr_insert(&b, &instr->instr);
          break;
       }
@@ -974,7 +1400,7 @@ nir_visitor::visit(ir_call *ir)
          break;
       case nir_intrinsic_shader_clock:
          nir_ssa_dest_init(&instr->instr, &instr->dest, 2, 32, NULL);
-         instr->num_components = 2;
+         nir_intrinsic_set_memory_scope(instr, NIR_SCOPE_SUBGROUP);
          nir_builder_instr_insert(&b, &instr->instr);
          break;
       case nir_intrinsic_begin_invocation_interlock:
@@ -983,9 +1409,6 @@ nir_visitor::visit(ir_call *ir)
       case nir_intrinsic_end_invocation_interlock:
          nir_builder_instr_insert(&b, &instr->instr);
          break;
-      case nir_intrinsic_begin_fragment_shader_ordering:
-         nir_builder_instr_insert(&b, &instr->instr);
-         break;
       case nir_intrinsic_store_ssbo: {
          exec_node *param = ir->actual_parameters.get_head();
          ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
@@ -1001,97 +1424,19 @@ nir_visitor::visit(ir_call *ir)
          assert(write_mask);
 
          nir_ssa_def *nir_val = evaluate_rvalue(val);
-         assert(!val->type->is_boolean() || nir_val->bit_size == 32);
+         if (val->type->is_boolean())
+            nir_val = nir_b2i32(&b, nir_val);
 
          instr->src[0] = nir_src_for_ssa(nir_val);
          instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block));
          instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset));
+         intrinsic_set_std430_align(instr, val->type);
          nir_intrinsic_set_write_mask(instr, write_mask->value.u[0]);
          instr->num_components = val->type->vector_elements;
 
          nir_builder_instr_insert(&b, &instr->instr);
          break;
       }
-      case nir_intrinsic_load_ssbo: {
-         exec_node *param = ir->actual_parameters.get_head();
-         ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
-
-         param = param->get_next();
-         ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
-
-         instr->src[0] = nir_src_for_ssa(evaluate_rvalue(block));
-         instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset));
-
-         const glsl_type *type = ir->return_deref->var->type;
-         instr->num_components = type->vector_elements;
-
-         /* Setup destination register */
-         unsigned bit_size = type->is_boolean() ? 32 : glsl_get_bit_size(type);
-         nir_ssa_dest_init(&instr->instr, &instr->dest,
-                           type->vector_elements, bit_size, NULL);
-
-         /* Insert the created nir instruction now since in the case of boolean
-          * result we will need to emit another instruction after it
-          */
-         nir_builder_instr_insert(&b, &instr->instr);
-
-         /*
-          * In SSBO/UBO's, a true boolean value is any non-zero value, but we
-          * consider a true boolean to be ~0. Fix this up with a != 0
-          * comparison.
-          */
-         if (type->is_boolean())
-            ret = nir_i2b(&b, &instr->dest.ssa);
-         break;
-      }
-      case nir_intrinsic_ssbo_atomic_add:
-      case nir_intrinsic_ssbo_atomic_imin:
-      case nir_intrinsic_ssbo_atomic_umin:
-      case nir_intrinsic_ssbo_atomic_imax:
-      case nir_intrinsic_ssbo_atomic_umax:
-      case nir_intrinsic_ssbo_atomic_and:
-      case nir_intrinsic_ssbo_atomic_or:
-      case nir_intrinsic_ssbo_atomic_xor:
-      case nir_intrinsic_ssbo_atomic_exchange:
-      case nir_intrinsic_ssbo_atomic_comp_swap:
-      case nir_intrinsic_ssbo_atomic_fadd:
-      case nir_intrinsic_ssbo_atomic_fmin:
-      case nir_intrinsic_ssbo_atomic_fmax:
-      case nir_intrinsic_ssbo_atomic_fcomp_swap: {
-         int param_count = ir->actual_parameters.length();
-         assert(param_count == 3 || param_count == 4);
-
-         /* Block index */
-         exec_node *param = ir->actual_parameters.get_head();
-         ir_instruction *inst = (ir_instruction *) param;
-         instr->src[0] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
-
-         /* Offset */
-         param = param->get_next();
-         inst = (ir_instruction *) param;
-         instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
-
-         /* data1 parameter (this is always present) */
-         param = param->get_next();
-         inst = (ir_instruction *) param;
-         instr->src[2] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
-
-         /* data2 parameter (only with atomic_comp_swap) */
-         if (param_count == 4) {
-            assert(op == nir_intrinsic_ssbo_atomic_comp_swap ||
-                   op == nir_intrinsic_ssbo_atomic_fcomp_swap);
-            param = param->get_next();
-            inst = (ir_instruction *) param;
-            instr->src[3] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
-         }
-
-         /* Atomic result */
-         assert(ir->return_deref);
-         nir_ssa_dest_init(&instr->instr, &instr->dest,
-                           ir->return_deref->type->vector_elements, 32, NULL);
-         nir_builder_instr_insert(&b, &instr->instr);
-         break;
-      }
       case nir_intrinsic_load_shared: {
          exec_node *param = ir->actual_parameters.get_head();
          ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
@@ -1101,6 +1446,7 @@ nir_visitor::visit(ir_call *ir)
 
          const glsl_type *type = ir->return_deref->var->type;
          instr->num_components = type->vector_elements;
+         intrinsic_set_std430_align(instr, type);
 
          /* Setup destination register */
          unsigned bit_size = type->is_boolean() ? 32 : glsl_get_bit_size(type);
@@ -1108,6 +1454,10 @@ nir_visitor::visit(ir_call *ir)
                            type->vector_elements, bit_size, NULL);
 
          nir_builder_instr_insert(&b, &instr->instr);
+
+         /* The value in shared memory is a 32-bit value */
+         if (type->is_boolean())
+            ret = nir_b2b1(&b, &instr->dest.ssa);
          break;
       }
       case nir_intrinsic_store_shared: {
@@ -1127,10 +1477,13 @@ nir_visitor::visit(ir_call *ir)
          nir_intrinsic_set_write_mask(instr, write_mask->value.u[0]);
 
          nir_ssa_def *nir_val = evaluate_rvalue(val);
-         assert(!val->type->is_boolean() || nir_val->bit_size == 32);
+         /* The value in shared memory is a 32-bit value */
+         if (val->type->is_boolean())
+            nir_val = nir_b2b32(&b, nir_val);
 
          instr->src[0] = nir_src_for_ssa(nir_val);
          instr->num_components = val->type->vector_elements;
+         intrinsic_set_std430_align(instr, val->type);
 
          nir_builder_instr_insert(&b, &instr->instr);
          break;
@@ -1181,11 +1534,12 @@ nir_visitor::visit(ir_call *ir)
          nir_builder_instr_insert(&b, &instr->instr);
          break;
       }
-      case nir_intrinsic_vote_any:
-      case nir_intrinsic_vote_all:
-      case nir_intrinsic_vote_ieq: {
-         nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
+      case nir_intrinsic_vote_ieq:
          instr->num_components = 1;
+         /* fall-through */
+      case nir_intrinsic_vote_any:
+      case nir_intrinsic_vote_all: {
+         nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 1, NULL);
 
          ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head();
          instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value));
@@ -1230,6 +1584,11 @@ nir_visitor::visit(ir_call *ir)
          nir_builder_instr_insert(&b, &instr->instr);
          break;
       }
+      case nir_intrinsic_is_helper_invocation: {
+         nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 1, NULL);
+         nir_builder_instr_insert(&b, &instr->instr);
+         break;
+      }
       default:
          unreachable("not reached");
       }
@@ -1240,7 +1599,47 @@ nir_visitor::visit(ir_call *ir)
       return;
    }
 
-   unreachable("glsl_to_nir only handles function calls to intrinsics");
+   struct hash_entry *entry =
+      _mesa_hash_table_search(this->overload_table, ir->callee);
+   assert(entry);
+   nir_function *callee = (nir_function *) entry->data;
+
+   nir_call_instr *call = nir_call_instr_create(this->shader, callee);
+
+   unsigned i = 0;
+   nir_deref_instr *ret_deref = NULL;
+   if (ir->return_deref) {
+      nir_variable *ret_tmp =
+         nir_local_variable_create(this->impl, ir->return_deref->type,
+                                   "return_tmp");
+      ret_deref = nir_build_deref_var(&b, ret_tmp);
+      call->params[i++] = nir_src_for_ssa(&ret_deref->dest.ssa);
+   }
+
+   foreach_two_lists(formal_node, &ir->callee->parameters,
+                     actual_node, &ir->actual_parameters) {
+      ir_rvalue *param_rvalue = (ir_rvalue *) actual_node;
+      ir_variable *sig_param = (ir_variable *) formal_node;
+
+      if (sig_param->data.mode == ir_var_function_out) {
+         nir_deref_instr *out_deref = evaluate_deref(param_rvalue);
+         call->params[i] = nir_src_for_ssa(&out_deref->dest.ssa);
+      } else if (sig_param->data.mode == ir_var_function_in) {
+         nir_ssa_def *val = evaluate_rvalue(param_rvalue);
+         nir_src src = nir_src_for_ssa(val);
+
+         nir_src_copy(&call->params[i], &src, call);
+      } else if (sig_param->data.mode == ir_var_function_inout) {
+         unreachable("unimplemented: inout parameters");
+      }
+
+      i++;
+   }
+
+   nir_builder_instr_insert(&b, &call->instr);
+
+   if (ir->return_deref)
+      nir_store_deref(&b, evaluate_deref(ir->return_deref), nir_load_deref(&b, ret_deref), ~0);
 }
 
 void
@@ -1253,12 +1652,18 @@ nir_visitor::visit(ir_assignment *ir)
 
    if ((ir->rhs->as_dereference() || ir->rhs->as_constant()) &&
        (ir->write_mask == (1 << num_components) - 1 || ir->write_mask == 0)) {
+      nir_deref_instr *lhs = evaluate_deref(ir->lhs);
+      nir_deref_instr *rhs = evaluate_deref(ir->rhs);
+      enum gl_access_qualifier lhs_qualifiers = deref_get_qualifier(lhs);
+      enum gl_access_qualifier rhs_qualifiers = deref_get_qualifier(rhs);
       if (ir->condition) {
          nir_push_if(&b, evaluate_rvalue(ir->condition));
-         nir_copy_deref(&b, evaluate_deref(ir->lhs), evaluate_deref(ir->rhs));
+         nir_copy_deref_with_access(&b, lhs, rhs, lhs_qualifiers,
+                                    rhs_qualifiers);
          nir_pop_if(&b, NULL);
       } else {
-         nir_copy_deref(&b, evaluate_deref(ir->lhs), evaluate_deref(ir->rhs));
+         nir_copy_deref_with_access(&b, lhs, rhs, lhs_qualifiers,
+                                    rhs_qualifiers);
       }
       return;
    }
@@ -1280,15 +1685,18 @@ nir_visitor::visit(ir_assignment *ir)
       for (unsigned i = 0; i < 4; i++) {
          swiz[i] = ir->write_mask & (1 << i) ? component++ : 0;
       }
-      src = nir_swizzle(&b, src, swiz, num_components, !supports_ints);
+      src = nir_swizzle(&b, src, swiz, num_components);
    }
 
+   enum gl_access_qualifier qualifiers = deref_get_qualifier(lhs_deref);
    if (ir->condition) {
       nir_push_if(&b, evaluate_rvalue(ir->condition));
-      nir_store_deref(&b, lhs_deref, src, ir->write_mask);
+      nir_store_deref_with_access(&b, lhs_deref, src, ir->write_mask,
+                                  qualifiers);
       nir_pop_if(&b, NULL);
    } else {
-      nir_store_deref(&b, lhs_deref, src, ir->write_mask);
+      nir_store_deref_with_access(&b, lhs_deref, src, ir->write_mask,
+                                  qualifiers);
    }
 }
 
@@ -1355,7 +1763,8 @@ nir_visitor::evaluate_rvalue(ir_rvalue* ir)
        * must emit a variable load.
        */
 
-      this->result = nir_load_deref(&b, this->deref);
+      enum gl_access_qualifier access = deref_get_qualifier(this->deref);
+      this->result = nir_load_deref_with_access(&b, this->deref, access);
    }
 
    return this->result;
@@ -1380,27 +1789,6 @@ nir_visitor::visit(ir_expression *ir)
 {
    /* Some special cases */
    switch (ir->operation) {
-   case ir_binop_ubo_load: {
-      nir_intrinsic_instr *load =
-         nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_ubo);
-      unsigned bit_size = ir->type->is_boolean() ? 32 :
-                          glsl_get_bit_size(ir->type);
-      load->num_components = ir->type->vector_elements;
-      load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0]));
-      load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1]));
-      add_instr(&load->instr, ir->type->vector_elements, bit_size);
-
-      /*
-       * In UBO's, a true boolean value is any non-zero value, but we consider
-       * a true boolean to be ~0. Fix this up with a != 0 comparison.
-       */
-
-      if (ir->type->is_boolean())
-         this->result = nir_i2b(&b, &load->dest.ssa);
-
-      return;
-   }
-
    case ir_unop_interpolate_at_centroid:
    case ir_binop_interpolate_at_offset:
    case ir_binop_interpolate_at_sample: {
@@ -1440,7 +1828,7 @@ nir_visitor::visit(ir_expression *ir)
           * sense, we'll just turn it into a load which will probably
           * eventually end up as an SSA definition.
           */
-         assert(this->deref->mode == nir_var_global);
+         assert(this->deref->mode == nir_var_shader_temp);
          op = nir_intrinsic_load_deref;
       }
 
@@ -1461,12 +1849,29 @@ nir_visitor::visit(ir_expression *ir)
          };
 
          result = nir_swizzle(&b, result, swiz,
-                              swizzle->type->vector_elements, false);
+                              swizzle->type->vector_elements);
       }
 
       return;
    }
 
+   case ir_unop_ssbo_unsized_array_length: {
+      nir_intrinsic_instr *intrin =
+         nir_intrinsic_instr_create(b.shader,
+                                    nir_intrinsic_deref_buffer_array_length);
+
+      ir_dereference *deref = ir->operands[0]->as_dereference();
+      intrin->src[0] = nir_src_for_ssa(&evaluate_deref(deref)->dest.ssa);
+
+      add_instr(&intrin->instr, 1, 32);
+      return;
+   }
+
+   case ir_binop_ubo_load:
+      /* UBO loads should only have been lowered in GLSL IR for non-nir drivers,
+       * NIR drivers make use of gl_nir_lower_buffers() instead.
+       */
+      unreachable("Invalid operation nir doesn't want lowered ubo loads");
    default:
       break;
    }
@@ -1477,21 +1882,14 @@ nir_visitor::visit(ir_expression *ir)
 
    glsl_base_type types[4];
    for (unsigned i = 0; i < ir->num_operands; i++)
-      if (supports_ints)
-         types[i] = ir->operands[i]->type->base_type;
-      else
-         types[i] = GLSL_TYPE_FLOAT;
+      types[i] = ir->operands[i]->type->base_type;
 
-   glsl_base_type out_type;
-   if (supports_ints)
-      out_type = ir->type->base_type;
-   else
-      out_type = GLSL_TYPE_FLOAT;
+   glsl_base_type out_type = ir->type->base_type;
 
    switch (ir->operation) {
    case ir_unop_bit_not: result = nir_inot(&b, srcs[0]); break;
    case ir_unop_logic_not:
-      result = supports_ints ? nir_inot(&b, srcs[0]) : nir_fnot(&b, srcs[0]);
+      result = nir_inot(&b, srcs[0]);
       break;
    case ir_unop_neg:
       result = type_is_float(types[0]) ? nir_fneg(&b, srcs[0])
@@ -1501,6 +1899,9 @@ nir_visitor::visit(ir_expression *ir)
       result = type_is_float(types[0]) ? nir_fabs(&b, srcs[0])
                                        : nir_iabs(&b, srcs[0]);
       break;
+   case ir_unop_clz:
+      result = nir_uclz(&b, srcs[0]);
+      break;
    case ir_unop_saturate:
       assert(type_is_float(types[0]));
       result = nir_fsat(&b, srcs[0]);
@@ -1517,14 +1918,8 @@ nir_visitor::visit(ir_expression *ir)
    case ir_unop_exp2: result = nir_fexp2(&b, srcs[0]); break;
    case ir_unop_log2: result = nir_flog2(&b, srcs[0]); break;
    case ir_unop_i2f:
-      result = supports_ints ? nir_i2f32(&b, srcs[0]) : nir_fmov(&b, srcs[0]);
-      break;
    case ir_unop_u2f:
-      result = supports_ints ? nir_u2f32(&b, srcs[0]) : nir_fmov(&b, srcs[0]);
-      break;
    case ir_unop_b2f:
-      result = supports_ints ? nir_b2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]);
-      break;
    case ir_unop_f2i:
    case ir_unop_f2u:
    case ir_unop_f2b:
@@ -1533,6 +1928,12 @@ nir_visitor::visit(ir_expression *ir)
    case ir_unop_b2i64:
    case ir_unop_d2f:
    case ir_unop_f2d:
+   case ir_unop_f162f:
+   case ir_unop_f2f16:
+   case ir_unop_f162b:
+   case ir_unop_b2f16:
+   case ir_unop_i2i:
+   case ir_unop_u2u:
    case ir_unop_d2i:
    case ir_unop_d2u:
    case ir_unop_d2b:
@@ -1571,6 +1972,21 @@ nir_visitor::visit(ir_expression *ir)
       break;
    }
 
+   case ir_unop_f2fmp: {
+      result = nir_build_alu(&b, nir_op_f2fmp, srcs[0], NULL, NULL, NULL);
+      break;
+   }
+
+   case ir_unop_i2imp: {
+      result = nir_build_alu(&b, nir_op_i2imp, srcs[0], NULL, NULL, NULL);
+      break;
+   }
+
+   case ir_unop_u2ump: {
+      result = nir_build_alu(&b, nir_op_u2ump, srcs[0], NULL, NULL, NULL);
+      break;
+   }
+
    case ir_unop_bitcast_i2f:
    case ir_unop_bitcast_f2i:
    case ir_unop_bitcast_u2f:
@@ -1581,7 +1997,7 @@ nir_visitor::visit(ir_expression *ir)
    case ir_unop_bitcast_d2u64:
    case ir_unop_subroutine_to_int:
       /* no-op */
-      result = nir_imov(&b, srcs[0]);
+      result = nir_mov(&b, srcs[0]);
       break;
    case ir_unop_trunc: result = nir_ftrunc(&b, srcs[0]); break;
    case ir_unop_ceil:  result = nir_fceil(&b, srcs[0]); break;
@@ -1664,48 +2080,6 @@ nir_visitor::visit(ir_expression *ir)
       result = nir_find_lsb(&b, srcs[0]);
       break;
 
-   case ir_unop_noise:
-      switch (ir->type->vector_elements) {
-      case 1:
-         switch (ir->operands[0]->type->vector_elements) {
-            case 1: result = nir_fnoise1_1(&b, srcs[0]); break;
-            case 2: result = nir_fnoise1_2(&b, srcs[0]); break;
-            case 3: result = nir_fnoise1_3(&b, srcs[0]); break;
-            case 4: result = nir_fnoise1_4(&b, srcs[0]); break;
-            default: unreachable("not reached");
-         }
-         break;
-      case 2:
-         switch (ir->operands[0]->type->vector_elements) {
-            case 1: result = nir_fnoise2_1(&b, srcs[0]); break;
-            case 2: result = nir_fnoise2_2(&b, srcs[0]); break;
-            case 3: result = nir_fnoise2_3(&b, srcs[0]); break;
-            case 4: result = nir_fnoise2_4(&b, srcs[0]); break;
-            default: unreachable("not reached");
-         }
-         break;
-      case 3:
-         switch (ir->operands[0]->type->vector_elements) {
-            case 1: result = nir_fnoise3_1(&b, srcs[0]); break;
-            case 2: result = nir_fnoise3_2(&b, srcs[0]); break;
-            case 3: result = nir_fnoise3_3(&b, srcs[0]); break;
-            case 4: result = nir_fnoise3_4(&b, srcs[0]); break;
-            default: unreachable("not reached");
-         }
-         break;
-      case 4:
-         switch (ir->operands[0]->type->vector_elements) {
-            case 1: result = nir_fnoise4_1(&b, srcs[0]); break;
-            case 2: result = nir_fnoise4_2(&b, srcs[0]); break;
-            case 3: result = nir_fnoise4_3(&b, srcs[0]); break;
-            case 4: result = nir_fnoise4_4(&b, srcs[0]); break;
-            default: unreachable("not reached");
-         }
-         break;
-      default:
-         unreachable("not reached");
-      }
-      break;
    case ir_unop_get_buffer_size: {
       nir_intrinsic_instr *load = nir_intrinsic_instr_create(
          this->shader,
@@ -1717,17 +2091,58 @@ nir_visitor::visit(ir_expression *ir)
       return;
    }
 
+   case ir_unop_atan:
+      result = nir_atan(&b, srcs[0]);
+      break;
+
    case ir_binop_add:
       result = type_is_float(out_type) ? nir_fadd(&b, srcs[0], srcs[1])
                                        : nir_iadd(&b, srcs[0], srcs[1]);
       break;
+   case ir_binop_add_sat:
+      result = type_is_signed(out_type) ? nir_iadd_sat(&b, srcs[0], srcs[1])
+                                        : nir_uadd_sat(&b, srcs[0], srcs[1]);
+      break;
    case ir_binop_sub:
       result = type_is_float(out_type) ? nir_fsub(&b, srcs[0], srcs[1])
                                        : nir_isub(&b, srcs[0], srcs[1]);
       break;
+   case ir_binop_sub_sat:
+      result = type_is_signed(out_type) ? nir_isub_sat(&b, srcs[0], srcs[1])
+                                        : nir_usub_sat(&b, srcs[0], srcs[1]);
+      break;
+   case ir_binop_abs_sub:
+      /* out_type is always unsigned for ir_binop_abs_sub, so we have to key
+       * on the type of the sources.
+       */
+      result = type_is_signed(types[0]) ? nir_uabs_isub(&b, srcs[0], srcs[1])
+                                        : nir_uabs_usub(&b, srcs[0], srcs[1]);
+      break;
+   case ir_binop_avg:
+      result = type_is_signed(out_type) ? nir_ihadd(&b, srcs[0], srcs[1])
+                                        : nir_uhadd(&b, srcs[0], srcs[1]);
+      break;
+   case ir_binop_avg_round:
+      result = type_is_signed(out_type) ? nir_irhadd(&b, srcs[0], srcs[1])
+                                        : nir_urhadd(&b, srcs[0], srcs[1]);
+      break;
+   case ir_binop_mul_32x16:
+      result = type_is_signed(out_type) ? nir_imul_32x16(&b, srcs[0], srcs[1])
+                                        : nir_umul_32x16(&b, srcs[0], srcs[1]);
+      break;
    case ir_binop_mul:
-      result = type_is_float(out_type) ? nir_fmul(&b, srcs[0], srcs[1])
-                                       : nir_imul(&b, srcs[0], srcs[1]);
+      if (type_is_float(out_type))
+         result = nir_fmul(&b, srcs[0], srcs[1]);
+      else if (out_type == GLSL_TYPE_INT64 &&
+               (ir->operands[0]->type->base_type == GLSL_TYPE_INT ||
+                ir->operands[1]->type->base_type == GLSL_TYPE_INT))
+         result = nir_imul_2x32_64(&b, srcs[0], srcs[1]);
+      else if (out_type == GLSL_TYPE_UINT64 &&
+               (ir->operands[0]->type->base_type == GLSL_TYPE_UINT ||
+                ir->operands[1]->type->base_type == GLSL_TYPE_UINT))
+         result = nir_umul_2x32_64(&b, srcs[0], srcs[1]);
+      else
+         result = nir_imul(&b, srcs[0], srcs[1]);
       break;
    case ir_binop_div:
       if (type_is_float(out_type))
@@ -1762,16 +2177,13 @@ nir_visitor::visit(ir_expression *ir)
    case ir_binop_bit_or: result = nir_ior(&b, srcs[0], srcs[1]); break;
    case ir_binop_bit_xor: result = nir_ixor(&b, srcs[0], srcs[1]); break;
    case ir_binop_logic_and:
-      result = supports_ints ? nir_iand(&b, srcs[0], srcs[1])
-                             : nir_fand(&b, srcs[0], srcs[1]);
+      result = nir_iand(&b, srcs[0], srcs[1]);
       break;
    case ir_binop_logic_or:
-      result = supports_ints ? nir_ior(&b, srcs[0], srcs[1])
-                             : nir_for(&b, srcs[0], srcs[1]);
+      result = nir_ior(&b, srcs[0], srcs[1]);
       break;
    case ir_binop_logic_xor:
-      result = supports_ints ? nir_ixor(&b, srcs[0], srcs[1])
-                             : nir_fxor(&b, srcs[0], srcs[1]);
+      result = nir_ixor(&b, srcs[0], srcs[1]);
       break;
    case ir_binop_lshift: result = nir_ishl(&b, srcs[0], srcs[1]); break;
    case ir_binop_rshift:
@@ -1785,108 +2197,70 @@ nir_visitor::visit(ir_expression *ir)
    case ir_binop_carry:  result = nir_uadd_carry(&b, srcs[0], srcs[1]);  break;
    case ir_binop_borrow: result = nir_usub_borrow(&b, srcs[0], srcs[1]); break;
    case ir_binop_less:
-      if (supports_ints) {
-         if (type_is_float(types[0]))
-            result = nir_flt(&b, srcs[0], srcs[1]);
-         else if (type_is_signed(types[0]))
-            result = nir_ilt(&b, srcs[0], srcs[1]);
-         else
-            result = nir_ult(&b, srcs[0], srcs[1]);
-      } else {
-         result = nir_slt(&b, srcs[0], srcs[1]);
-      }
+      if (type_is_float(types[0]))
+         result = nir_flt(&b, srcs[0], srcs[1]);
+      else if (type_is_signed(types[0]))
+         result = nir_ilt(&b, srcs[0], srcs[1]);
+      else
+         result = nir_ult(&b, srcs[0], srcs[1]);
       break;
    case ir_binop_gequal:
-      if (supports_ints) {
-         if (type_is_float(types[0]))
-            result = nir_fge(&b, srcs[0], srcs[1]);
-         else if (type_is_signed(types[0]))
-            result = nir_ige(&b, srcs[0], srcs[1]);
-         else
-            result = nir_uge(&b, srcs[0], srcs[1]);
-      } else {
-         result = nir_sge(&b, srcs[0], srcs[1]);
-      }
+      if (type_is_float(types[0]))
+         result = nir_fge(&b, srcs[0], srcs[1]);
+      else if (type_is_signed(types[0]))
+         result = nir_ige(&b, srcs[0], srcs[1]);
+      else
+         result = nir_uge(&b, srcs[0], srcs[1]);
       break;
    case ir_binop_equal:
-      if (supports_ints) {
-         if (type_is_float(types[0]))
-            result = nir_feq(&b, srcs[0], srcs[1]);
-         else
-            result = nir_ieq(&b, srcs[0], srcs[1]);
-      } else {
-         result = nir_seq(&b, srcs[0], srcs[1]);
-      }
+      if (type_is_float(types[0]))
+         result = nir_feq(&b, srcs[0], srcs[1]);
+      else
+         result = nir_ieq(&b, srcs[0], srcs[1]);
       break;
    case ir_binop_nequal:
-      if (supports_ints) {
-         if (type_is_float(types[0]))
-            result = nir_fne(&b, srcs[0], srcs[1]);
-         else
-            result = nir_ine(&b, srcs[0], srcs[1]);
-      } else {
-         result = nir_sne(&b, srcs[0], srcs[1]);
-      }
+      if (type_is_float(types[0]))
+         result = nir_fne(&b, srcs[0], srcs[1]);
+      else
+         result = nir_ine(&b, srcs[0], srcs[1]);
       break;
    case ir_binop_all_equal:
-      if (supports_ints) {
-         if (type_is_float(types[0])) {
-            switch (ir->operands[0]->type->vector_elements) {
-               case 1: result = nir_feq(&b, srcs[0], srcs[1]); break;
-               case 2: result = nir_ball_fequal2(&b, srcs[0], srcs[1]); break;
-               case 3: result = nir_ball_fequal3(&b, srcs[0], srcs[1]); break;
-               case 4: result = nir_ball_fequal4(&b, srcs[0], srcs[1]); break;
-               default:
-                  unreachable("not reached");
-            }
-         } else {
-            switch (ir->operands[0]->type->vector_elements) {
-               case 1: result = nir_ieq(&b, srcs[0], srcs[1]); break;
-               case 2: result = nir_ball_iequal2(&b, srcs[0], srcs[1]); break;
-               case 3: result = nir_ball_iequal3(&b, srcs[0], srcs[1]); break;
-               case 4: result = nir_ball_iequal4(&b, srcs[0], srcs[1]); break;
-               default:
-                  unreachable("not reached");
-            }
+      if (type_is_float(types[0])) {
+         switch (ir->operands[0]->type->vector_elements) {
+            case 1: result = nir_feq(&b, srcs[0], srcs[1]); break;
+            case 2: result = nir_ball_fequal2(&b, srcs[0], srcs[1]); break;
+            case 3: result = nir_ball_fequal3(&b, srcs[0], srcs[1]); break;
+            case 4: result = nir_ball_fequal4(&b, srcs[0], srcs[1]); break;
+            default:
+               unreachable("not reached");
          }
       } else {
          switch (ir->operands[0]->type->vector_elements) {
-            case 1: result = nir_seq(&b, srcs[0], srcs[1]); break;
-            case 2: result = nir_fall_equal2(&b, srcs[0], srcs[1]); break;
-            case 3: result = nir_fall_equal3(&b, srcs[0], srcs[1]); break;
-            case 4: result = nir_fall_equal4(&b, srcs[0], srcs[1]); break;
+            case 1: result = nir_ieq(&b, srcs[0], srcs[1]); break;
+            case 2: result = nir_ball_iequal2(&b, srcs[0], srcs[1]); break;
+            case 3: result = nir_ball_iequal3(&b, srcs[0], srcs[1]); break;
+            case 4: result = nir_ball_iequal4(&b, srcs[0], srcs[1]); break;
             default:
                unreachable("not reached");
          }
       }
       break;
    case ir_binop_any_nequal:
-      if (supports_ints) {
-         if (type_is_float(types[0])) {
-            switch (ir->operands[0]->type->vector_elements) {
-               case 1: result = nir_fne(&b, srcs[0], srcs[1]); break;
-               case 2: result = nir_bany_fnequal2(&b, srcs[0], srcs[1]); break;
-               case 3: result = nir_bany_fnequal3(&b, srcs[0], srcs[1]); break;
-               case 4: result = nir_bany_fnequal4(&b, srcs[0], srcs[1]); break;
-               default:
-                  unreachable("not reached");
-            }
-         } else {
-            switch (ir->operands[0]->type->vector_elements) {
-               case 1: result = nir_ine(&b, srcs[0], srcs[1]); break;
-               case 2: result = nir_bany_inequal2(&b, srcs[0], srcs[1]); break;
-               case 3: result = nir_bany_inequal3(&b, srcs[0], srcs[1]); break;
-               case 4: result = nir_bany_inequal4(&b, srcs[0], srcs[1]); break;
-               default:
-                  unreachable("not reached");
-            }
+      if (type_is_float(types[0])) {
+         switch (ir->operands[0]->type->vector_elements) {
+            case 1: result = nir_fne(&b, srcs[0], srcs[1]); break;
+            case 2: result = nir_bany_fnequal2(&b, srcs[0], srcs[1]); break;
+            case 3: result = nir_bany_fnequal3(&b, srcs[0], srcs[1]); break;
+            case 4: result = nir_bany_fnequal4(&b, srcs[0], srcs[1]); break;
+            default:
+               unreachable("not reached");
          }
       } else {
          switch (ir->operands[0]->type->vector_elements) {
-            case 1: result = nir_sne(&b, srcs[0], srcs[1]); break;
-            case 2: result = nir_fany_nequal2(&b, srcs[0], srcs[1]); break;
-            case 3: result = nir_fany_nequal3(&b, srcs[0], srcs[1]); break;
-            case 4: result = nir_fany_nequal4(&b, srcs[0], srcs[1]); break;
+            case 1: result = nir_ine(&b, srcs[0], srcs[1]); break;
+            case 2: result = nir_bany_inequal2(&b, srcs[0], srcs[1]); break;
+            case 3: result = nir_bany_inequal3(&b, srcs[0], srcs[1]); break;
+            case 4: result = nir_bany_inequal4(&b, srcs[0], srcs[1]); break;
             default:
                unreachable("not reached");
          }
@@ -1911,6 +2285,10 @@ nir_visitor::visit(ir_expression *ir)
       break;
    }
 
+   case ir_binop_atan2:
+      result = nir_atan2(&b, srcs[0], srcs[1]);
+      break;
+
    case ir_binop_ldexp: result = nir_ldexp(&b, srcs[0], srcs[1]); break;
    case ir_triop_fma:
       result = nir_ffma(&b, srcs[0], srcs[1], srcs[2]);
@@ -1919,18 +2297,17 @@ nir_visitor::visit(ir_expression *ir)
       result = nir_flrp(&b, srcs[0], srcs[1], srcs[2]);
       break;
    case ir_triop_csel:
-      if (supports_ints)
-         result = nir_bcsel(&b, srcs[0], srcs[1], srcs[2]);
-      else
-         result = nir_fcsel(&b, srcs[0], srcs[1], srcs[2]);
+      result = nir_bcsel(&b, srcs[0], srcs[1], srcs[2]);
       break;
    case ir_triop_bitfield_extract:
-      result = (out_type == GLSL_TYPE_INT) ?
-         nir_ibitfield_extract(&b, srcs[0], srcs[1], srcs[2]) :
-         nir_ubitfield_extract(&b, srcs[0], srcs[1], srcs[2]);
+      result = ir->type->is_int_16_32() ?
+         nir_ibitfield_extract(&b, nir_i2i32(&b, srcs[0]), nir_i2i32(&b, srcs[1]), nir_i2i32(&b, srcs[2])) :
+         nir_ubitfield_extract(&b, nir_u2u32(&b, srcs[0]), nir_i2i32(&b, srcs[1]), nir_i2i32(&b, srcs[2]));
       break;
    case ir_quadop_bitfield_insert:
-      result = nir_bitfield_insert(&b, srcs[0], srcs[1], srcs[2], srcs[3]);
+      result = nir_bitfield_insert(&b,
+                                   nir_u2u32(&b, srcs[0]), nir_u2u32(&b, srcs[1]),
+                                   nir_i2i32(&b, srcs[2]), nir_i2i32(&b, srcs[3]));
       break;
    case ir_quadop_vector:
       result = nir_vec(&b, srcs, ir->type->vector_elements);
@@ -1946,7 +2323,7 @@ nir_visitor::visit(ir_swizzle *ir)
 {
    unsigned swizzle[4] = { ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w };
    result = nir_swizzle(&b, evaluate_rvalue(ir->val), swizzle,
-                        ir->type->vector_elements, !supports_ints);
+                        ir->type->vector_elements);
 }
 
 void
@@ -2025,7 +2402,8 @@ nir_visitor::visit(ir_texture *ir)
       num_srcs++;
    if (ir->shadow_comparator != NULL)
       num_srcs++;
-   if (ir->offset != NULL)
+   /* offsets are constants we store inside nir_tex_intrs.offsets */
+   if (ir->offset != NULL && !ir->offset->type->is_array())
       num_srcs++;
 
    /* Add one for the texture deref */
@@ -2044,6 +2422,15 @@ nir_visitor::visit(ir_texture *ir)
    case GLSL_TYPE_FLOAT:
       instr->dest_type = nir_type_float;
       break;
+   case GLSL_TYPE_FLOAT16:
+      instr->dest_type = nir_type_float16;
+      break;
+   case GLSL_TYPE_INT16:
+      instr->dest_type = nir_type_int16;
+      break;
+   case GLSL_TYPE_UINT16:
+      instr->dest_type = nir_type_uint16;
+      break;
    case GLSL_TYPE_INT:
       instr->dest_type = nir_type_int;
       break;
@@ -2056,10 +2443,21 @@ nir_visitor::visit(ir_texture *ir)
    }
 
    nir_deref_instr *sampler_deref = evaluate_deref(ir->sampler);
-   instr->src[0].src = nir_src_for_ssa(&sampler_deref->dest.ssa);
-   instr->src[0].src_type = nir_tex_src_texture_deref;
-   instr->src[1].src = nir_src_for_ssa(&sampler_deref->dest.ssa);
-   instr->src[1].src_type = nir_tex_src_sampler_deref;
+
+   /* check for bindless handles */
+   if (sampler_deref->mode != nir_var_uniform ||
+       nir_deref_instr_get_variable(sampler_deref)->data.bindless) {
+      nir_ssa_def *load = nir_load_deref(&b, sampler_deref);
+      instr->src[0].src = nir_src_for_ssa(load);
+      instr->src[0].src_type = nir_tex_src_texture_handle;
+      instr->src[1].src = nir_src_for_ssa(load);
+      instr->src[1].src_type = nir_tex_src_sampler_handle;
+   } else {
+      instr->src[0].src = nir_src_for_ssa(&sampler_deref->dest.ssa);
+      instr->src[0].src_type = nir_tex_src_texture_deref;
+      instr->src[1].src = nir_src_for_ssa(&sampler_deref->dest.ssa);
+      instr->src[1].src_type = nir_tex_src_sampler_deref;
+   }
 
    unsigned src_number = 2;
 
@@ -2086,13 +2484,25 @@ nir_visitor::visit(ir_texture *ir)
    }
 
    if (ir->offset != NULL) {
-      /* we don't support multiple offsets yet */
-      assert(ir->offset->type->is_vector() || ir->offset->type->is_scalar());
+      if (ir->offset->type->is_array()) {
+         for (int i = 0; i < ir->offset->type->array_size(); i++) {
+            const ir_constant *c =
+               ir->offset->as_constant()->get_array_element(i);
+
+            for (unsigned j = 0; j < 2; ++j) {
+               int val = c->get_int_component(j);
+               assert(val <= 31 && val >= -32);
+               instr->tg4_offsets[i][j] = val;
+            }
+         }
+      } else {
+         assert(ir->offset->type->is_vector() || ir->offset->type->is_scalar());
 
-      instr->src[src_number].src =
-         nir_src_for_ssa(evaluate_rvalue(ir->offset));
-      instr->src[src_number].src_type = nir_tex_src_offset;
-      src_number++;
+         instr->src[src_number].src =
+            nir_src_for_ssa(evaluate_rvalue(ir->offset));
+         instr->src[src_number].src_type = nir_tex_src_offset;
+         src_number++;
+      }
    }
 
    switch (ir->op) {
@@ -2166,6 +2576,23 @@ nir_visitor::visit(ir_constant *ir)
 void
 nir_visitor::visit(ir_dereference_variable *ir)
 {
+   if (ir->variable_referenced()->data.mode == ir_var_function_out) {
+      unsigned i = (sig->return_type != glsl_type::void_type) ? 1 : 0;
+
+      foreach_in_list(ir_variable, param, &sig->parameters) {
+         if (param == ir->variable_referenced()) {
+            break;
+         }
+         i++;
+      }
+
+      this->deref = nir_build_deref_cast(&b, nir_load_param(&b, i),
+                                         nir_var_function_temp, ir->type, 0);
+      return;
+   }
+
+   assert(ir->variable_referenced()->data.mode != ir_var_function_inout);
+
    struct hash_entry *entry =
       _mesa_hash_table_search(this->var_table, ir->var);
    assert(entry);
@@ -2198,7 +2625,74 @@ nir_visitor::visit(ir_dereference_array *ir)
 void
 nir_visitor::visit(ir_barrier *)
 {
+   if (shader->info.stage == MESA_SHADER_COMPUTE) {
+      nir_intrinsic_instr *shared_barrier =
+         nir_intrinsic_instr_create(this->shader,
+                                    nir_intrinsic_memory_barrier_shared);
+      nir_builder_instr_insert(&b, &shared_barrier->instr);
+   } else if (shader->info.stage == MESA_SHADER_TESS_CTRL) {
+      nir_intrinsic_instr *patch_barrier =
+         nir_intrinsic_instr_create(this->shader,
+                                    nir_intrinsic_memory_barrier_tcs_patch);
+      nir_builder_instr_insert(&b, &patch_barrier->instr);
+   }
+
    nir_intrinsic_instr *instr =
-      nir_intrinsic_instr_create(this->shader, nir_intrinsic_barrier);
+      nir_intrinsic_instr_create(this->shader, nir_intrinsic_control_barrier);
    nir_builder_instr_insert(&b, &instr->instr);
 }
+
+nir_shader *
+glsl_float64_funcs_to_nir(struct gl_context *ctx,
+                          const nir_shader_compiler_options *options)
+{
+   /* We pretend it's a vertex shader.  Ultimately, the stage shouldn't
+    * matter because we're not optimizing anything here.
+    */
+   struct gl_shader *sh = _mesa_new_shader(-1, MESA_SHADER_VERTEX);
+   sh->Source = float64_source;
+   sh->CompileStatus = COMPILE_FAILURE;
+   _mesa_glsl_compile_shader(ctx, sh, false, false, true);
+
+   if (!sh->CompileStatus) {
+      if (sh->InfoLog) {
+         _mesa_problem(ctx,
+                       "fp64 software impl compile failed:\n%s\nsource:\n%s\n",
+                       sh->InfoLog, float64_source);
+      }
+      return NULL;
+   }
+
+   nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_VERTEX, options, NULL);
+
+   nir_visitor v1(ctx, nir);
+   nir_function_visitor v2(&v1);
+   v2.run(sh->ir);
+   visit_exec_list(sh->ir, &v1);
+
+   /* _mesa_delete_shader will try to free sh->Source but it's static const */
+   sh->Source = NULL;
+   _mesa_delete_shader(ctx, sh);
+
+   nir_validate_shader(nir, "float64_funcs_to_nir");
+
+   NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
+   NIR_PASS_V(nir, nir_lower_returns);
+   NIR_PASS_V(nir, nir_inline_functions);
+   NIR_PASS_V(nir, nir_opt_deref);
+
+   /* Do some optimizations to clean up the shader now.  By optimizing the
+    * functions in the library, we avoid having to re-do that work every
+    * time we inline a copy of a function.  Reducing basic blocks also helps
+    * with compile times.
+    */
+   NIR_PASS_V(nir, nir_lower_vars_to_ssa);
+   NIR_PASS_V(nir, nir_copy_prop);
+   NIR_PASS_V(nir, nir_opt_dce);
+   NIR_PASS_V(nir, nir_opt_cse);
+   NIR_PASS_V(nir, nir_opt_gcm, true);
+   NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false);
+   NIR_PASS_V(nir, nir_opt_dce);
+
+   return nir;
+}