nir: Take a mode in remove_unused_io_vars

[mesa.git] / src / compiler / nir / nir.h
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h

index f4bd85f9255df9df59cc8da09623cad746bb0538..02d0a6000dae0807e584af69c27e88c9ad848fcc 100644 (file)
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -37,9 +37,12 @@
  #include "util/bitscan.h"
  #include "util/bitset.h"
  #include "util/macros.h"
+#include "util/format/u_format.h"
  #include "compiler/nir_types.h"
  #include "compiler/shader_enums.h"
  #include "compiler/shader_info.h"
+#define XXH_INLINE_ALL
+#include "util/xxhash.h"
  #include <stdio.h>
  
  #ifndef NDEBUG
@@ -58,9 +61,19 @@ extern "C" {
  
  #define NIR_FALSE 0u
  #define NIR_TRUE (~0u)
-#define NIR_MAX_VEC_COMPONENTS 4
+#define NIR_MAX_VEC_COMPONENTS 16
  #define NIR_MAX_MATRIX_COLUMNS 4
-typedef uint8_t nir_component_mask_t;
+#define NIR_STREAM_PACKED (1 << 8)
+typedef uint16_t nir_component_mask_t;
+
+static inline bool
+nir_num_components_valid(unsigned num_components)
+{
+   return (num_components >= 1  &&
+           num_components <= 4) ||
+           num_components == 8  ||
+           num_components == 16;
+}
  
  /** Defines a cast function
   *
@@ -92,7 +105,7 @@ struct nir_builder;
   */
  typedef struct {
     gl_state_index16 tokens[STATE_LENGTH];
-   int swizzle;
+   uint16_t swizzle;
  } nir_state_slot;
  
  typedef enum {
@@ -106,7 +119,9 @@ typedef enum {
     nir_var_mem_ssbo        = (1 << 7),
     nir_var_mem_shared      = (1 << 8),
     nir_var_mem_global      = (1 << 9),
-   nir_var_all             = ~0,
+   nir_var_mem_push_const  = (1 << 10), /* not actually used for variables */
+   nir_num_variable_modes  = 11,
+   nir_var_all             = (1 << nir_num_variable_modes) - 1,
  } nir_variable_mode;
  
  /**
@@ -140,6 +155,106 @@ typedef union {
        arr[i] = c[i].m; \
  } while (false)
  
+static inline nir_const_value
+nir_const_value_for_raw_uint(uint64_t x, unsigned bit_size)
+{
+   nir_const_value v;
+   memset(&v, 0, sizeof(v));
+
+   switch (bit_size) {
+   case 1:  v.b   = x;  break;
+   case 8:  v.u8  = x;  break;
+   case 16: v.u16 = x;  break;
+   case 32: v.u32 = x;  break;
+   case 64: v.u64 = x;  break;
+   default:
+      unreachable("Invalid bit size");
+   }
+
+   return v;
+}
+
+static inline nir_const_value
+nir_const_value_for_int(int64_t i, unsigned bit_size)
+{
+   nir_const_value v;
+   memset(&v, 0, sizeof(v));
+
+   assert(bit_size <= 64);
+   if (bit_size < 64) {
+      assert(i >= (-(1ll << (bit_size - 1))));
+      assert(i < (1ll << (bit_size - 1)));
+   }
+
+   return nir_const_value_for_raw_uint(i, bit_size);
+}
+
+static inline nir_const_value
+nir_const_value_for_uint(uint64_t u, unsigned bit_size)
+{
+   nir_const_value v;
+   memset(&v, 0, sizeof(v));
+
+   assert(bit_size <= 64);
+   if (bit_size < 64)
+      assert(u < (1ull << bit_size));
+
+   return nir_const_value_for_raw_uint(u, bit_size);
+}
+
+static inline nir_const_value
+nir_const_value_for_bool(bool b, unsigned bit_size)
+{
+   /* Booleans use a 0/-1 convention */
+   return nir_const_value_for_int(-(int)b, bit_size);
+}
+
+/* This one isn't inline because it requires half-float conversion */
+nir_const_value nir_const_value_for_float(double b, unsigned bit_size);
+
+static inline int64_t
+nir_const_value_as_int(nir_const_value value, unsigned bit_size)
+{
+   switch (bit_size) {
+   /* int1_t uses 0/-1 convention */
+   case 1:  return -(int)value.b;
+   case 8:  return value.i8;
+   case 16: return value.i16;
+   case 32: return value.i32;
+   case 64: return value.i64;
+   default:
+      unreachable("Invalid bit size");
+   }
+}
+
+static inline uint64_t
+nir_const_value_as_uint(nir_const_value value, unsigned bit_size)
+{
+   switch (bit_size) {
+   case 1:  return value.b;
+   case 8:  return value.u8;
+   case 16: return value.u16;
+   case 32: return value.u32;
+   case 64: return value.u64;
+   default:
+      unreachable("Invalid bit size");
+   }
+}
+
+static inline bool
+nir_const_value_as_bool(nir_const_value value, unsigned bit_size)
+{
+   int64_t i = nir_const_value_as_int(value, bit_size);
+
+   /* Booleans of any size use 0/-1 convention */
+   assert(i == 0 || i == -1);
+
+   return i;
+}
+
+/* This one isn't inline because it requires half-float conversion */
+double nir_const_value_as_float(nir_const_value value, unsigned bit_size);
+
  typedef struct nir_constant {
     /**
      * Value of the constant.
@@ -148,7 +263,7 @@ typedef struct nir_constant {
      * by the type associated with the \c nir_variable.  Constants may be
      * scalars, vectors, or matrices.
      */
-   nir_const_value values[NIR_MAX_MATRIX_COLUMNS][NIR_MAX_VEC_COMPONENTS];
+   nir_const_value values[NIR_MAX_VEC_COMPONENTS];
  
     /* we could get this from the var->type but makes clone *much* easier to
      * not have to care about the type.
@@ -213,7 +328,7 @@ typedef struct nir_variable {
         *
         * \sa nir_variable_mode
         */
-      nir_variable_mode mode;
+      nir_variable_mode mode:11;
  
        /**
         * Is the variable read-only?
@@ -227,6 +342,30 @@ typedef struct nir_variable {
        unsigned patch:1;
        unsigned invariant:1;
  
+     /**
+       * Precision qualifier.
+       *
+       * In desktop GLSL we do not care about precision qualifiers at all, in
+       * fact, the spec says that precision qualifiers are ignored.
+       *
+       * To make things easy, we make it so that this field is always
+       * GLSL_PRECISION_NONE on desktop shaders. This way all the variables
+       * have the same precision value and the checks we add in the compiler
+       * for this field will never break a desktop shader compile.
+       */
+      unsigned precision:2;
+
+      /**
+       * Can this variable be coalesced with another?
+       *
+       * This is set by nir_lower_io_to_temporaries to say that any
+       * copies involving this variable should stay put. Propagating it can
+       * duplicate the resulting load/store, which is not wanted, and may
+       * result in a load/store of the variable with an indirect offset which
+       * the backend may not be able to handle.
+       */
+      unsigned cannot_coalesce:1;
+
        /**
         * When separate shader programs are enabled, only input/outputs between
         * the stages of a multi-stage separate program can be safely removed
@@ -242,7 +381,7 @@ typedef struct nir_variable {
         *
         * \sa glsl_interp_mode
         */
-      unsigned interpolation:2;
+      unsigned interpolation:3;
  
        /**
         * If non-zero, then this variable may be packed along with other variables
@@ -278,6 +417,15 @@ typedef struct nir_variable {
         */
        unsigned explicit_binding:1;
  
+      /**
+       * Was the location explicitly set in the shader?
+       *
+       * If the location is explicitly set in the shader, it \b cannot be changed
+       * by the linker or by the API (e.g., calls to \c glBindAttribLocation have
+       * no effect).
+       */
+      unsigned explicit_location:1;
+
        /**
         * Was a transfer feedback buffer set in the shader?
         */
@@ -294,99 +442,130 @@ typedef struct nir_variable {
        unsigned explicit_offset:1;
  
        /**
-       * \brief Layout qualifier for gl_FragDepth.
-       *
-       * This is not equal to \c ir_depth_layout_none if and only if this
-       * variable is \c gl_FragDepth and a layout qualifier is specified.
+       * Layout of the matrix.  Uses glsl_matrix_layout values.
         */
-      nir_depth_layout depth_layout;
+      unsigned matrix_layout:2;
  
        /**
-       * Storage location of the base of this variable
-       *
-       * The precise meaning of this field depends on the nature of the variable.
-       *
-       *   - Vertex shader input: one of the values from \c gl_vert_attrib.
-       *   - Vertex shader output: one of the values from \c gl_varying_slot.
-       *   - Geometry shader input: one of the values from \c gl_varying_slot.
-       *   - Geometry shader output: one of the values from \c gl_varying_slot.
-       *   - Fragment shader input: one of the values from \c gl_varying_slot.
-       *   - Fragment shader output: one of the values from \c gl_frag_result.
-       *   - Uniforms: Per-stage uniform slot number for default uniform block.
-       *   - Uniforms: Index within the uniform block definition for UBO members.
-       *   - Non-UBO Uniforms: uniform slot number.
-       *   - Other: This field is not currently used.
+       * Non-zero if this variable was created by lowering a named interface
+       * block.
+       */
+      unsigned from_named_ifc_block:1;
+
+      /**
+       * How the variable was declared.  See nir_var_declaration_type.
         *
-       * If the variable is a uniform, shader input, or shader output, and the
-       * slot has not been assigned, the value will be -1.
+       * This is used to detect variables generated by the compiler, so should
+       * not be visible via the API.
         */
-      int location;
+      unsigned how_declared:2;
  
        /**
-       * The actual location of the variable in the IR. Only valid for inputs
-       * and outputs.
+       * Is this variable per-view?  If so, we know it must be an array with
+       * size corresponding to the number of views.
         */
-      unsigned int driver_location;
+      unsigned per_view:1;
  
        /**
-       * Vertex stream output identifier.
+       * \brief Layout qualifier for gl_FragDepth.
         *
-       * For packed outputs, bit 31 is set and bits [2*i+1,2*i] indicate the
-       * stream of the i-th component.
+       * This is not equal to \c ir_depth_layout_none if and only if this
+       * variable is \c gl_FragDepth and a layout qualifier is specified.
         */
-      unsigned stream;
+      nir_depth_layout depth_layout:3;
  
        /**
-       * output index for dual source blending.
+       * Vertex stream output identifier.
+       *
+       * For packed outputs, NIR_STREAM_PACKED is set and bits [2*i+1,2*i]
+       * indicate the stream of the i-th component.
         */
-      int index;
+      unsigned stream:9;
  
        /**
-       * Descriptor set binding for sampler or UBO.
+       * Access flags for memory variables (SSBO/global), image uniforms, and
+       * bindless images in uniforms/inputs/outputs.
         */
-      int descriptor_set;
+      enum gl_access_qualifier access:8;
  
        /**
-       * Initial binding point for a sampler or UBO.
-       *
-       * For array types, this represents the binding point for the first element.
+       * Descriptor set binding for sampler or UBO.
         */
-      int binding;
+      unsigned descriptor_set:5;
  
        /**
-       * Location an atomic counter or transform feedback is stored at.
+       * output index for dual source blending.
         */
-      unsigned offset;
+      unsigned index;
  
        /**
-       * Transform feedback buffer.
+       * Initial binding point for a sampler or UBO.
+       *
+       * For array types, this represents the binding point for the first element.
         */
-      unsigned xfb_buffer;
+      unsigned binding;
  
        /**
-       * Transform feedback stride.
+       * Storage location of the base of this variable
+       *
+       * The precise meaning of this field depends on the nature of the variable.
+       *
+       *   - Vertex shader input: one of the values from \c gl_vert_attrib.
+       *   - Vertex shader output: one of the values from \c gl_varying_slot.
+       *   - Geometry shader input: one of the values from \c gl_varying_slot.
+       *   - Geometry shader output: one of the values from \c gl_varying_slot.
+       *   - Fragment shader input: one of the values from \c gl_varying_slot.
+       *   - Fragment shader output: one of the values from \c gl_frag_result.
+       *   - Uniforms: Per-stage uniform slot number for default uniform block.
+       *   - Uniforms: Index within the uniform block definition for UBO members.
+       *   - Non-UBO Uniforms: uniform slot number.
+       *   - Other: This field is not currently used.
+       *
+       * If the variable is a uniform, shader input, or shader output, and the
+       * slot has not been assigned, the value will be -1.
         */
-      unsigned xfb_stride;
+      int location;
  
        /**
-       * How the variable was declared.  See nir_var_declaration_type.
-       *
-       * This is used to detect variables generated by the compiler, so should
-       * not be visible via the API.
+       * The actual location of the variable in the IR. Only valid for inputs,
+       * outputs, and uniforms (including samplers and images).
         */
-      unsigned how_declared:2;
+      unsigned driver_location;
  
        /**
-       * ARB_shader_image_load_store qualifiers.
+       * Location an atomic counter or transform feedback is stored at.
         */
-      struct {
-         enum gl_access_qualifier access;
+      unsigned offset;
  
-         /** Image internal format if specified explicitly, otherwise GL_NONE. */
-         GLenum format;
-      } image;
+      union {
+         struct {
+            /** Image internal format if specified explicitly, otherwise PIPE_FORMAT_NONE. */
+            enum pipe_format format;
+         } image;
+
+         struct {
+            /**
+             * Transform feedback buffer.
+             */
+            uint16_t buffer:2;
+
+            /**
+             * Transform feedback stride.
+             */
+            uint16_t stride;
+         } xfb;
+      };
     } data;
  
+   /**
+    * Identifier for this variable generated by nir_index_vars() that is unique
+    * among other variables in the same exec_list.
+    */
+   unsigned index;
+
+   /* Number of nir_variable_data members */
+   uint16_t num_members;
+
     /**
      * Built-in state that backs this uniform
      *
@@ -399,7 +578,7 @@ typedef struct nir_variable {
      * \c state_slots will be \c NULL.
      */
     /*@{*/
-   unsigned num_state_slots;    /**< Number of state slots used */
+   uint16_t num_state_slots;    /**< Number of state slots used */
     nir_state_slot *state_slots;  /**< State descriptors. */
     /*@}*/
  
@@ -412,6 +591,14 @@ typedef struct nir_variable {
      */
     nir_constant *constant_initializer;
  
+   /**
+    * Global variable assigned in the initializer of the variable
+    * This field should only be used temporarily by creators of NIR shaders
+    * and then lower_constant_initializers can be used to get rid of them.
+    * Most of the rest of NIR ignores this field or asserts that it's NULL.
+    */
+   struct nir_variable *pointer_initializer;
+
     /**
      * For variables that are in an interface block or are an instance of an
      * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block.
@@ -428,7 +615,6 @@ typedef struct nir_variable {
      * inputs each with their own layout specifier.  This is only allowed on
      * variables with a struct or array of array of struct type.
      */
-   unsigned num_members;
     struct nir_variable_data *members;
  } nir_variable;
  
@@ -556,6 +742,12 @@ typedef struct nir_ssa_def {
  
     /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */
     uint8_t bit_size;
+
+   /**
+    * True if this SSA value may have different values in different SIMD
+    * invocations of the shader.  This is set by nir_divergence_analysis.
+    */
+   bool divergent;
  } nir_ssa_def;
  
  struct nir_src;
@@ -696,14 +888,12 @@ nir_src_is_const(nir_src src)
            src.ssa->parent_instr->type == nir_instr_type_load_const;
  }
  
-int64_t nir_src_as_int(nir_src src);
-uint64_t nir_src_as_uint(nir_src src);
-bool nir_src_as_bool(nir_src src);
-double nir_src_as_float(nir_src src);
-int64_t nir_src_comp_as_int(nir_src src, unsigned component);
-uint64_t nir_src_comp_as_uint(nir_src src, unsigned component);
-bool nir_src_comp_as_bool(nir_src src, unsigned component);
-double nir_src_comp_as_float(nir_src src, unsigned component);
+static inline bool
+nir_src_is_divergent(nir_src src)
+{
+   assert(src.is_ssa);
+   return src.ssa->divergent;
+}
  
  static inline unsigned
  nir_dest_bit_size(nir_dest dest)
@@ -717,6 +907,33 @@ nir_dest_num_components(nir_dest dest)
     return dest.is_ssa ? dest.ssa.num_components : dest.reg.reg->num_components;
  }
  
+static inline bool
+nir_dest_is_divergent(nir_dest dest)
+{
+   assert(dest.is_ssa);
+   return dest.ssa.divergent;
+}
+
+/* Are all components the same, ie. .xxxx */
+static inline bool
+nir_is_same_comp_swizzle(uint8_t *swiz, unsigned nr_comp)
+{
+   for (unsigned i = 1; i < nr_comp; i++)
+      if (swiz[i] != swiz[0])
+         return false;
+   return true;
+}
+
+/* Are all components sequential, ie. .yzw */
+static inline bool
+nir_is_sequential_comp_swizzle(uint8_t *swiz, unsigned nr_comp)
+{
+   for (unsigned i = 1; i < nr_comp; i++)
+      if (swiz[i] != (swiz[0] + i))
+         return false;
+   return true;
+}
+
  void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if);
  void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr);
  
@@ -772,13 +989,15 @@ typedef struct {
   * The values in this enum are carefully chosen so that the sized type is
   * just the unsized type OR the number of bits.
   */
-typedef enum {
+typedef enum PACKED {
     nir_type_invalid = 0, /* Not a valid type */
     nir_type_int =       2,
     nir_type_uint =      4,
     nir_type_bool =      6,
     nir_type_float =     128,
     nir_type_bool1 =     1  | nir_type_bool,
+   nir_type_bool8 =     8  | nir_type_bool,
+   nir_type_bool16 =    16 | nir_type_bool,
     nir_type_bool32 =    32 | nir_type_bool,
     nir_type_int1 =      1  | nir_type_int,
     nir_type_int8 =      8  | nir_type_int,
@@ -804,10 +1023,10 @@ nir_alu_type_get_type_size(nir_alu_type type)
     return type & NIR_ALU_TYPE_SIZE_MASK;
  }
  
-static inline unsigned
+static inline nir_alu_type
  nir_alu_type_get_base_type(nir_alu_type type)
  {
-   return type & NIR_ALU_TYPE_BASE_TYPE_MASK;
+   return (nir_alu_type)(type & NIR_ALU_TYPE_BASE_TYPE_MASK);
  }
  
  static inline nir_alu_type
@@ -848,9 +1067,21 @@ nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type)
     case GLSL_TYPE_DOUBLE:
        return nir_type_float64;
        break;
-   default:
-      unreachable("unknown type");
+
+   case GLSL_TYPE_SAMPLER:
+   case GLSL_TYPE_IMAGE:
+   case GLSL_TYPE_ATOMIC_UINT:
+   case GLSL_TYPE_STRUCT:
+   case GLSL_TYPE_INTERFACE:
+   case GLSL_TYPE_ARRAY:
+   case GLSL_TYPE_VOID:
+   case GLSL_TYPE_SUBROUTINE:
+   case GLSL_TYPE_FUNCTION:
+   case GLSL_TYPE_ERROR:
+      return nir_type_invalid;
     }
+
+   unreachable("unknown type");
  }
  
  static inline nir_alu_type
@@ -862,15 +1093,133 @@ nir_get_nir_type_for_glsl_type(const struct glsl_type *type)
  nir_op nir_type_conversion_op(nir_alu_type src, nir_alu_type dst,
                                nir_rounding_mode rnd);
  
+static inline nir_op
+nir_op_vec(unsigned components)
+{
+   switch (components) {
+   case  1: return nir_op_mov;
+   case  2: return nir_op_vec2;
+   case  3: return nir_op_vec3;
+   case  4: return nir_op_vec4;
+   case  8: return nir_op_vec8;
+   case 16: return nir_op_vec16;
+   default: unreachable("bad component count");
+   }
+}
+
+static inline bool
+nir_op_is_vec(nir_op op)
+{
+   switch (op) {
+   case nir_op_mov:
+   case nir_op_vec2:
+   case nir_op_vec3:
+   case nir_op_vec4:
+   case nir_op_vec8:
+   case nir_op_vec16:
+      return true;
+   default:
+      return false;
+   }
+}
+
+static inline bool
+nir_is_float_control_signed_zero_inf_nan_preserve(unsigned execution_mode, unsigned bit_size)
+{
+    return (16 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16) ||
+        (32 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32) ||
+        (64 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64);
+}
+
+static inline bool
+nir_is_denorm_flush_to_zero(unsigned execution_mode, unsigned bit_size)
+{
+    return (16 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16) ||
+        (32 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32) ||
+        (64 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64);
+}
+
+static inline bool
+nir_is_denorm_preserve(unsigned execution_mode, unsigned bit_size)
+{
+    return (16 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP16) ||
+        (32 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP32) ||
+        (64 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP64);
+}
+
+static inline bool
+nir_is_rounding_mode_rtne(unsigned execution_mode, unsigned bit_size)
+{
+    return (16 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) ||
+        (32 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) ||
+        (64 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64);
+}
+
+static inline bool
+nir_is_rounding_mode_rtz(unsigned execution_mode, unsigned bit_size)
+{
+    return (16 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) ||
+        (32 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) ||
+        (64 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64);
+}
+
+static inline bool
+nir_has_any_rounding_mode_rtz(unsigned execution_mode)
+{
+    return (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) ||
+        (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) ||
+        (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64);
+}
+
+static inline bool
+nir_has_any_rounding_mode_rtne(unsigned execution_mode)
+{
+    return (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) ||
+        (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) ||
+        (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64);
+}
+
+static inline nir_rounding_mode
+nir_get_rounding_mode_from_float_controls(unsigned execution_mode,
+                                          nir_alu_type type)
+{
+   if (nir_alu_type_get_base_type(type) != nir_type_float)
+      return nir_rounding_mode_undef;
+
+   unsigned bit_size = nir_alu_type_get_type_size(type);
+
+   if (nir_is_rounding_mode_rtz(execution_mode, bit_size))
+      return nir_rounding_mode_rtz;
+   if (nir_is_rounding_mode_rtne(execution_mode, bit_size))
+      return nir_rounding_mode_rtne;
+   return nir_rounding_mode_undef;
+}
+
+static inline bool
+nir_has_any_rounding_mode_enabled(unsigned execution_mode)
+{
+   bool result =
+      nir_has_any_rounding_mode_rtne(execution_mode) ||
+      nir_has_any_rounding_mode_rtz(execution_mode);
+   return result;
+}
+
  typedef enum {
-   NIR_OP_IS_COMMUTATIVE = (1 << 0),
+   /**
+    * Operation where the first two sources are commutative.
+    *
+    * For 2-source operations, this just mathematical commutativity.  Some
+    * 3-source operations, like ffma, are only commutative in the first two
+    * sources.
+    */
+   NIR_OP_IS_2SRC_COMMUTATIVE = (1 << 0),
     NIR_OP_IS_ASSOCIATIVE = (1 << 1),
  } nir_op_algebraic_property;
  
  typedef struct {
     const char *name;
  
-   unsigned num_inputs;
+   uint8_t num_inputs;
  
     /**
      * The number of components in the output
@@ -889,7 +1238,7 @@ typedef struct {
      * though output_size is zero; in that case, the inputs with a zero
      * size act per-component, while the inputs with non-zero size don't.
      */
-   unsigned output_size;
+   uint8_t output_size;
  
     /**
      * The type of vector that the instruction outputs. Note that the
@@ -901,7 +1250,7 @@ typedef struct {
     /**
      * The number of components in each input
      */
-   unsigned input_sizes[NIR_MAX_VEC_COMPONENTS];
+   uint8_t input_sizes[NIR_MAX_VEC_COMPONENTS];
  
     /**
      * The type of vector that each input takes. Note that negate and
@@ -930,7 +1279,14 @@ typedef struct nir_alu_instr {
      * it must ensure that the resulting value is bit-for-bit identical to the
      * original.
      */
-   bool exact;
+   bool exact:1;
+
+   /**
+    * Indicates that this instruction do not cause wrapping to occur, in the
+    * form of overflow or underflow.
+    */
+   bool no_signed_wrap:1;
+   bool no_unsigned_wrap:1;
  
     nir_alu_dest dest;
     nir_alu_src src[];
@@ -965,26 +1321,43 @@ nir_alu_instr_src_read_mask(const nir_alu_instr *instr, unsigned src)
     return read_mask;
  }
  
-/*
- * For instructions whose destinations are SSA, get the number of channels
- * used for a source
+/**
+ * Get the number of channels used for a source
   */
  static inline unsigned
  nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src)
  {
-   assert(instr->dest.dest.is_ssa);
-
     if (nir_op_infos[instr->op].input_sizes[src] > 0)
        return nir_op_infos[instr->op].input_sizes[src];
  
-   return instr->dest.dest.ssa.num_components;
+   return nir_dest_num_components(instr->dest.dest);
  }
  
-bool nir_const_value_negative_equal(const nir_const_value *c1,
-                                    const nir_const_value *c2,
-                                    unsigned components,
-                                    nir_alu_type base_type,
-                                    unsigned bits);
+static inline bool
+nir_alu_instr_is_comparison(const nir_alu_instr *instr)
+{
+   switch (instr->op) {
+   case nir_op_flt:
+   case nir_op_fge:
+   case nir_op_feq:
+   case nir_op_fne:
+   case nir_op_ilt:
+   case nir_op_ult:
+   case nir_op_ige:
+   case nir_op_uge:
+   case nir_op_ieq:
+   case nir_op_ine:
+   case nir_op_i2b1:
+   case nir_op_f2b1:
+   case nir_op_inot:
+      return true;
+   default:
+      return false;
+   }
+}
+
+bool nir_const_value_negative_equal(nir_const_value c1, nir_const_value c2,
+                                    nir_alu_type full_type);
  
  bool nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2,
                          unsigned src1, unsigned src2);
@@ -1041,20 +1414,7 @@ typedef struct {
     nir_dest dest;
  } nir_deref_instr;
  
-NIR_DEFINE_CAST(nir_instr_as_deref, nir_instr, nir_deref_instr, instr,
-                type, nir_instr_type_deref)
-
-static inline nir_deref_instr *
-nir_src_as_deref(nir_src src)
-{
-   if (!src.is_ssa)
-      return NULL;
-
-   if (src.ssa->parent_instr->type != nir_instr_type_deref)
-      return NULL;
-
-   return nir_instr_as_deref(src.ssa->parent_instr);
-}
+static inline nir_deref_instr *nir_src_as_deref(nir_src src);
  
  static inline nir_deref_instr *
  nir_deref_instr_parent(const nir_deref_instr *instr)
@@ -1079,6 +1439,8 @@ nir_deref_instr_get_variable(const nir_deref_instr *instr)
  }
  
  bool nir_deref_instr_has_indirect(nir_deref_instr *instr);
+bool nir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr);
+bool nir_deref_instr_has_complex_use(nir_deref_instr *instr);
  
  bool nir_deref_instr_remove_if_unused(nir_deref_instr *instr);
  
@@ -1151,6 +1513,26 @@ nir_intrinsic_get_var(nir_intrinsic_instr *intrin, unsigned i)
     return nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[i]));
  }
  
+typedef enum {
+   /* Memory ordering. */
+   NIR_MEMORY_ACQUIRE        = 1 << 0,
+   NIR_MEMORY_RELEASE        = 1 << 1,
+   NIR_MEMORY_ACQ_REL        = NIR_MEMORY_ACQUIRE | NIR_MEMORY_RELEASE,
+
+   /* Memory visibility operations. */
+   NIR_MEMORY_MAKE_AVAILABLE = 1 << 2,
+   NIR_MEMORY_MAKE_VISIBLE   = 1 << 3,
+} nir_memory_semantics;
+
+typedef enum {
+   NIR_SCOPE_NONE,
+   NIR_SCOPE_INVOCATION,
+   NIR_SCOPE_SUBGROUP,
+   NIR_SCOPE_WORKGROUP,
+   NIR_SCOPE_QUEUE_FAMILY,
+   NIR_SCOPE_DEVICE,
+} nir_scope;
+
  /**
   * \name NIR intrinsics semantic flags
   *
@@ -1190,80 +1572,80 @@ typedef enum {
     /**
      * For store instructions, a writemask for the store.
      */
-   NIR_INTRINSIC_WRMASK = 2,
+   NIR_INTRINSIC_WRMASK,
  
     /**
      * The stream-id for GS emit_vertex/end_primitive intrinsics.
      */
-   NIR_INTRINSIC_STREAM_ID = 3,
+   NIR_INTRINSIC_STREAM_ID,
  
     /**
      * The clip-plane id for load_user_clip_plane intrinsic.
      */
-   NIR_INTRINSIC_UCP_ID = 4,
+   NIR_INTRINSIC_UCP_ID,
  
     /**
      * The amount of data, starting from BASE, that this instruction may
      * access.  This is used to provide bounds if the offset is not constant.
      */
-   NIR_INTRINSIC_RANGE = 5,
+   NIR_INTRINSIC_RANGE,
  
     /**
      * The Vulkan descriptor set for vulkan_resource_index intrinsic.
      */
-   NIR_INTRINSIC_DESC_SET = 6,
+   NIR_INTRINSIC_DESC_SET,
  
     /**
      * The Vulkan descriptor set binding for vulkan_resource_index intrinsic.
      */
-   NIR_INTRINSIC_BINDING = 7,
+   NIR_INTRINSIC_BINDING,
  
     /**
      * Component offset.
      */
-   NIR_INTRINSIC_COMPONENT = 8,
+   NIR_INTRINSIC_COMPONENT,
  
     /**
      * Interpolation mode (only meaningful for FS inputs).
      */
-   NIR_INTRINSIC_INTERP_MODE = 9,
+   NIR_INTRINSIC_INTERP_MODE,
  
     /**
      * A binary nir_op to use when performing a reduction or scan operation
      */
-   NIR_INTRINSIC_REDUCTION_OP = 10,
+   NIR_INTRINSIC_REDUCTION_OP,
  
     /**
      * Cluster size for reduction operations
      */
-   NIR_INTRINSIC_CLUSTER_SIZE = 11,
+   NIR_INTRINSIC_CLUSTER_SIZE,
  
     /**
      * Parameter index for a load_param intrinsic
      */
-   NIR_INTRINSIC_PARAM_IDX = 12,
+   NIR_INTRINSIC_PARAM_IDX,
  
     /**
      * Image dimensionality for image intrinsics
      *
      * One of GLSL_SAMPLER_DIM_*
      */
-   NIR_INTRINSIC_IMAGE_DIM = 13,
+   NIR_INTRINSIC_IMAGE_DIM,
  
     /**
      * Non-zero if we are accessing an array image
      */
-   NIR_INTRINSIC_IMAGE_ARRAY = 14,
+   NIR_INTRINSIC_IMAGE_ARRAY,
  
     /**
      * Image format for image intrinsics
      */
-   NIR_INTRINSIC_FORMAT = 15,
+   NIR_INTRINSIC_FORMAT,
  
     /**
      * Access qualifiers for image and memory access intrinsics
      */
-   NIR_INTRINSIC_ACCESS = 16,
+   NIR_INTRINSIC_ACCESS,
  
     /**
      * Alignment for offsets and addresses
@@ -1274,13 +1656,51 @@ typedef enum {
      *
      *                (X - align_offset) % align_mul == 0
      */
-   NIR_INTRINSIC_ALIGN_MUL = 17,
-   NIR_INTRINSIC_ALIGN_OFFSET = 18,
+   NIR_INTRINSIC_ALIGN_MUL,
+   NIR_INTRINSIC_ALIGN_OFFSET,
  
     /**
      * The Vulkan descriptor type for a vulkan_resource_[re]index intrinsic.
      */
-   NIR_INTRINSIC_DESC_TYPE = 19,
+   NIR_INTRINSIC_DESC_TYPE,
+
+   /**
+    * The nir_alu_type of a uniform/input/output
+    */
+   NIR_INTRINSIC_TYPE,
+
+   /**
+    * The swizzle mask for the instructions
+    * SwizzleInvocationsAMD and SwizzleInvocationsMaskedAMD
+    */
+   NIR_INTRINSIC_SWIZZLE_MASK,
+
+   /* Separate source/dest access flags for copies */
+   NIR_INTRINSIC_SRC_ACCESS,
+   NIR_INTRINSIC_DST_ACCESS,
+
+   /* Driver location for nir_load_patch_location_ir3 */
+   NIR_INTRINSIC_DRIVER_LOCATION,
+
+   /**
+    * Mask of nir_memory_semantics, includes ordering and visibility.
+    */
+   NIR_INTRINSIC_MEMORY_SEMANTICS,
+
+   /**
+    * Mask of nir_variable_modes affected by the memory operation.
+    */
+   NIR_INTRINSIC_MEMORY_MODES,
+
+   /**
+    * Value of nir_scope.
+    */
+   NIR_INTRINSIC_MEMORY_SCOPE,
+
+   /**
+    * Value of nir_scope.
+    */
+   NIR_INTRINSIC_EXECUTION_SCOPE,
  
     NIR_INTRINSIC_NUM_INDEX_FLAGS,
  
@@ -1291,7 +1711,7 @@ typedef enum {
  typedef struct {
     const char *name;
  
-   unsigned num_srcs; /** < number of register/SSA inputs */
+   uint8_t num_srcs; /** < number of register/SSA inputs */
  
     /** number of components of each input register
      *
@@ -1300,7 +1720,7 @@ typedef struct {
      * intrinsic consumes however many components are provided and it is not
      * validated at all.
      */
-   int src_components[NIR_INTRINSIC_MAX_INPUTS];
+   int8_t src_components[NIR_INTRINSIC_MAX_INPUTS];
  
     bool has_dest;
  
@@ -1309,16 +1729,16 @@ typedef struct {
      * If this value is 0, the number of components is given by the
      * num_components field of nir_intrinsic_instr.
      */
-   unsigned dest_components;
+   uint8_t dest_components;
  
     /** bitfield of legal bit sizes */
-   unsigned dest_bit_sizes;
+   uint8_t dest_bit_sizes;
  
     /** the number of constant indices used by the intrinsic */
-   unsigned num_indices;
+   uint8_t num_indices;
  
     /** indicates the usage of intr->const_index[n] */
-   unsigned index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS];
+   uint8_t index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS];
  
     /** semantic flags for calls to this intrinsic */
     nir_intrinsic_semantic_flag flags;
@@ -1327,7 +1747,7 @@ typedef struct {
  extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics];
  
  static inline unsigned
-nir_intrinsic_src_components(nir_intrinsic_instr *intr, unsigned srcn)
+nir_intrinsic_src_components(const nir_intrinsic_instr *intr, unsigned srcn)
  {
     const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
     assert(srcn < info->num_srcs);
@@ -1351,6 +1771,33 @@ nir_intrinsic_dest_components(nir_intrinsic_instr *intr)
        return intr->num_components;
  }
  
+/**
+ * Helper to copy const_index[] from src to dst, without assuming they
+ * match in order.
+ */
+static inline void
+nir_intrinsic_copy_const_indices(nir_intrinsic_instr *dst, nir_intrinsic_instr *src)
+{
+   if (src->intrinsic == dst->intrinsic) {
+      memcpy(dst->const_index, src->const_index, sizeof(dst->const_index));
+      return;
+   }
+
+   const nir_intrinsic_info *src_info = &nir_intrinsic_infos[src->intrinsic];
+   const nir_intrinsic_info *dst_info = &nir_intrinsic_infos[dst->intrinsic];
+
+   for (unsigned i = 0; i < NIR_INTRINSIC_NUM_INDEX_FLAGS; i++) {
+      if (src_info->index_map[i] == 0)
+         continue;
+
+      /* require that dst instruction also uses the same const_index[]: */
+      assert(dst_info->index_map[i] > 0);
+
+      dst->const_index[dst_info->index_map[i] - 1] =
+            src->const_index[src_info->index_map[i] - 1];
+   }
+}
+
  #define INTRINSIC_IDX_ACCESSORS(name, flag, type)                             \
  static inline type                                                            \
  nir_intrinsic_##name(const nir_intrinsic_instr *instr)                        \
@@ -1382,10 +1829,19 @@ INTRINSIC_IDX_ACCESSORS(param_idx, PARAM_IDX, unsigned)
  INTRINSIC_IDX_ACCESSORS(image_dim, IMAGE_DIM, enum glsl_sampler_dim)
  INTRINSIC_IDX_ACCESSORS(image_array, IMAGE_ARRAY, bool)
  INTRINSIC_IDX_ACCESSORS(access, ACCESS, enum gl_access_qualifier)
-INTRINSIC_IDX_ACCESSORS(format, FORMAT, unsigned)
+INTRINSIC_IDX_ACCESSORS(src_access, SRC_ACCESS, enum gl_access_qualifier)
+INTRINSIC_IDX_ACCESSORS(dst_access, DST_ACCESS, enum gl_access_qualifier)
+INTRINSIC_IDX_ACCESSORS(format, FORMAT, enum pipe_format)
  INTRINSIC_IDX_ACCESSORS(align_mul, ALIGN_MUL, unsigned)
  INTRINSIC_IDX_ACCESSORS(align_offset, ALIGN_OFFSET, unsigned)
  INTRINSIC_IDX_ACCESSORS(desc_type, DESC_TYPE, unsigned)
+INTRINSIC_IDX_ACCESSORS(type, TYPE, nir_alu_type)
+INTRINSIC_IDX_ACCESSORS(swizzle_mask, SWIZZLE_MASK, unsigned)
+INTRINSIC_IDX_ACCESSORS(driver_location, DRIVER_LOCATION, unsigned)
+INTRINSIC_IDX_ACCESSORS(memory_semantics, MEMORY_SEMANTICS, nir_memory_semantics)
+INTRINSIC_IDX_ACCESSORS(memory_modes, MEMORY_MODES, nir_variable_mode)
+INTRINSIC_IDX_ACCESSORS(memory_scope, MEMORY_SCOPE, nir_scope)
+INTRINSIC_IDX_ACCESSORS(execution_scope, EXECUTION_SCOPE, nir_scope)
  
  static inline void
  nir_intrinsic_set_align(nir_intrinsic_instr *intrin,
@@ -1413,10 +1869,31 @@ nir_intrinsic_align(const nir_intrinsic_instr *intrin)
     return align_offset ? 1 << (ffs(align_offset) - 1) : align_mul;
  }
  
+unsigned
+nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr);
+
  /* Converts a image_deref_* intrinsic into a image_* one */
  void nir_rewrite_image_intrinsic(nir_intrinsic_instr *instr,
                                   nir_ssa_def *handle, bool bindless);
  
+/* Determine if an intrinsic can be arbitrarily reordered and eliminated. */
+static inline bool
+nir_intrinsic_can_reorder(nir_intrinsic_instr *instr)
+{
+   if (instr->intrinsic == nir_intrinsic_load_deref ||
+       instr->intrinsic == nir_intrinsic_load_ssbo ||
+       instr->intrinsic == nir_intrinsic_bindless_image_load ||
+       instr->intrinsic == nir_intrinsic_image_deref_load ||
+       instr->intrinsic == nir_intrinsic_image_load) {
+      return nir_intrinsic_access(instr) & ACCESS_CAN_REORDER;
+   } else {
+      const nir_intrinsic_info *info =
+         &nir_intrinsic_infos[instr->intrinsic];
+      return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
+             (info->flags & NIR_INTRINSIC_CAN_REORDER);
+   }
+}
+
  /**
   * \group texture information
   *
@@ -1457,7 +1934,8 @@ typedef enum {
     nir_texop_txl,                /**< Texture look-up with explicit LOD */
     nir_texop_txd,                /**< Texture look-up with partial derivatives */
     nir_texop_txf,                /**< Texel fetch with explicit LOD */
-   nir_texop_txf_ms,                /**< Multisample texture fetch */
+   nir_texop_txf_ms,             /**< Multisample texture fetch */
+   nir_texop_txf_ms_fb,          /**< Multisample texture fetch from framebuffer */
     nir_texop_txf_ms_mcs,         /**< Multisample compression value fetch */
     nir_texop_txs,                /**< Texture size */
     nir_texop_lod,                /**< Texture lod query */
@@ -1467,6 +1945,9 @@ typedef enum {
     nir_texop_samples_identical,  /**< Query whether all samples are definitely
                                    * identical.
                                    */
+   nir_texop_tex_prefetch,       /**< Regular texture look-up, eligible for pre-dispatch */
+   nir_texop_fragment_fetch,     /**< Multisample fragment color texture fetch */
+   nir_texop_fragment_mask_fetch,/**< Multisample fragment mask texture fetch */
  } nir_texop;
  
  typedef struct {
@@ -1506,9 +1987,6 @@ typedef struct {
      */
     unsigned texture_index;
  
-   /** The size of the texture array or 0 if it's not an array */
-   unsigned texture_array_size;
-
     /** The sampler index
      *
      * The following operations do not require a sampler and, as such, this
@@ -1527,6 +2005,30 @@ typedef struct {
     unsigned sampler_index;
  } nir_tex_instr;
  
+/*
+ * Returns true if the texture operation requires a sampler as a general rule,
+ * see the documentation of sampler_index.
+ *
+ * Note that the specific hw/driver backend could require to a sampler
+ * object/configuration packet in any case, for some other reason.
+ */
+static inline bool
+nir_tex_instr_need_sampler(const nir_tex_instr *instr)
+{
+   switch (instr->op) {
+   case nir_texop_txf:
+   case nir_texop_txf_ms:
+   case nir_texop_txs:
+   case nir_texop_lod:
+   case nir_texop_query_levels:
+   case nir_texop_texture_samples:
+   case nir_texop_samples_identical:
+      return false;
+   default:
+      return true;
+   }
+}
+
  static inline unsigned
  nir_tex_instr_dest_size(const nir_tex_instr *instr)
  {
@@ -1563,6 +2065,7 @@ nir_tex_instr_dest_size(const nir_tex_instr *instr)
     case nir_texop_texture_samples:
     case nir_texop_query_levels:
     case nir_texop_samples_identical:
+   case nir_texop_fragment_mask_fetch:
        return 1;
  
     default:
@@ -1592,6 +2095,7 @@ nir_tex_instr_is_query(const nir_tex_instr *instr)
     case nir_texop_txd:
     case nir_texop_txf:
     case nir_texop_txf_ms:
+   case nir_texop_txf_ms_fb:
     case nir_texop_tg4:
        return false;
     default:
@@ -1600,23 +2104,12 @@ nir_tex_instr_is_query(const nir_tex_instr *instr)
  }
  
  static inline bool
-nir_alu_instr_is_comparison(const nir_alu_instr *instr)
+nir_tex_instr_has_implicit_derivative(const nir_tex_instr *instr)
  {
     switch (instr->op) {
-   case nir_op_flt:
-   case nir_op_fge:
-   case nir_op_feq:
-   case nir_op_fne:
-   case nir_op_ilt:
-   case nir_op_ult:
-   case nir_op_ige:
-   case nir_op_uge:
-   case nir_op_ieq:
-   case nir_op_ine:
-   case nir_op_i2b1:
-   case nir_op_f2b1:
-   case nir_op_inot:
-   case nir_op_fnot:
+   case nir_texop_tex:
+   case nir_texop_txb:
+   case nir_texop_lod:
        return true;
     default:
        return false;
@@ -1631,6 +2124,7 @@ nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src)
        switch (instr->op) {
        case nir_texop_txf:
        case nir_texop_txf_ms:
+      case nir_texop_txf_ms_fb:
        case nir_texop_txf_ms_mcs:
        case nir_texop_samples_identical:
           return nir_type_int;
@@ -1652,19 +2146,30 @@ nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src)
     case nir_tex_src_projector:
     case nir_tex_src_comparator:
     case nir_tex_src_bias:
+   case nir_tex_src_min_lod:
     case nir_tex_src_ddx:
     case nir_tex_src_ddy:
        return nir_type_float;
  
     case nir_tex_src_offset:
     case nir_tex_src_ms_index:
+   case nir_tex_src_plane:
+      return nir_type_int;
+
+   case nir_tex_src_ms_mcs:
+   case nir_tex_src_texture_deref:
+   case nir_tex_src_sampler_deref:
     case nir_tex_src_texture_offset:
     case nir_tex_src_sampler_offset:
-      return nir_type_int;
+   case nir_tex_src_texture_handle:
+   case nir_tex_src_sampler_handle:
+      return nir_type_uint;
  
-   default:
-      unreachable("Invalid texture source type");
+   case nir_num_tex_src_types:
+      unreachable("nir_num_tex_src_types is not a valid source type");
     }
+
+   unreachable("Invalid texture source type");
  }
  
  static inline unsigned
@@ -1726,14 +2231,27 @@ typedef struct {
     nir_const_value value[];
  } nir_load_const_instr;
  
-#define nir_const_load_to_arr(arr, l, m) \
-{ \
-   nir_const_value_to_array(arr, l->value, l->def.num_components, m); \
-} while (false);
-
  typedef enum {
+   /** Return from a function
+    *
+    * This instruction is a classic function return.  It jumps to
+    * nir_function_impl::end_block.  No return value is provided in this
+    * instruction.  Instead, the function is expected to write any return
+    * data to a deref passed in from the caller.
+    */
     nir_jump_return,
+
+   /** Break out of the inner-most loop
+    *
+    * This has the same semantics as C's "break" statement.
+    */
     nir_jump_break,
+
+   /** Jump back to the top of the inner-most loop
+    *
+    * This has the same semantics as C's "continue" statement assuming that a
+    * NIR loop is implemented as "while (1) { body }".
+    */
     nir_jump_continue,
  } nir_jump_type;
  
@@ -1793,6 +2311,8 @@ typedef struct {
  
  NIR_DEFINE_CAST(nir_instr_as_alu, nir_instr, nir_alu_instr, instr,
                  type, nir_instr_type_alu)
+NIR_DEFINE_CAST(nir_instr_as_deref, nir_instr, nir_deref_instr, instr,
+                type, nir_instr_type_deref)
  NIR_DEFINE_CAST(nir_instr_as_call, nir_instr, nir_call_instr, instr,
                  type, nir_instr_type_call)
  NIR_DEFINE_CAST(nir_instr_as_jump, nir_instr, nir_jump_instr, instr,
@@ -1811,6 +2331,114 @@ NIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr,
                  nir_parallel_copy_instr, instr,
                  type, nir_instr_type_parallel_copy)
  
+
+#define NIR_DEFINE_SRC_AS_CONST(type, suffix)               \
+static inline type                                          \
+nir_src_comp_as_##suffix(nir_src src, unsigned comp)        \
+{                                                           \
+   assert(nir_src_is_const(src));                           \
+   nir_load_const_instr *load =                             \
+      nir_instr_as_load_const(src.ssa->parent_instr);       \
+   assert(comp < load->def.num_components);                 \
+   return nir_const_value_as_##suffix(load->value[comp],    \
+                                      load->def.bit_size);  \
+}                                                           \
+                                                            \
+static inline type                                          \
+nir_src_as_##suffix(nir_src src)                            \
+{                                                           \
+   assert(nir_src_num_components(src) == 1);                \
+   return nir_src_comp_as_##suffix(src, 0);                 \
+}
+
+NIR_DEFINE_SRC_AS_CONST(int64_t,    int)
+NIR_DEFINE_SRC_AS_CONST(uint64_t,   uint)
+NIR_DEFINE_SRC_AS_CONST(bool,       bool)
+NIR_DEFINE_SRC_AS_CONST(double,     float)
+
+#undef NIR_DEFINE_SRC_AS_CONST
+
+
+typedef struct {
+   nir_ssa_def *def;
+   unsigned comp;
+} nir_ssa_scalar;
+
+static inline bool
+nir_ssa_scalar_is_const(nir_ssa_scalar s)
+{
+   return s.def->parent_instr->type == nir_instr_type_load_const;
+}
+
+static inline nir_const_value
+nir_ssa_scalar_as_const_value(nir_ssa_scalar s)
+{
+   assert(s.comp < s.def->num_components);
+   nir_load_const_instr *load = nir_instr_as_load_const(s.def->parent_instr);
+   return load->value[s.comp];
+}
+
+#define NIR_DEFINE_SCALAR_AS_CONST(type, suffix)                     \
+static inline type                                                   \
+nir_ssa_scalar_as_##suffix(nir_ssa_scalar s)                         \
+{                                                                    \
+   return nir_const_value_as_##suffix(                               \
+      nir_ssa_scalar_as_const_value(s), s.def->bit_size);            \
+}
+
+NIR_DEFINE_SCALAR_AS_CONST(int64_t,    int)
+NIR_DEFINE_SCALAR_AS_CONST(uint64_t,   uint)
+NIR_DEFINE_SCALAR_AS_CONST(bool,       bool)
+NIR_DEFINE_SCALAR_AS_CONST(double,     float)
+
+#undef NIR_DEFINE_SCALAR_AS_CONST
+
+static inline bool
+nir_ssa_scalar_is_alu(nir_ssa_scalar s)
+{
+   return s.def->parent_instr->type == nir_instr_type_alu;
+}
+
+static inline nir_op
+nir_ssa_scalar_alu_op(nir_ssa_scalar s)
+{
+   return nir_instr_as_alu(s.def->parent_instr)->op;
+}
+
+static inline nir_ssa_scalar
+nir_ssa_scalar_chase_alu_src(nir_ssa_scalar s, unsigned alu_src_idx)
+{
+   nir_ssa_scalar out = { NULL, 0 };
+
+   nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
+   assert(alu_src_idx < nir_op_infos[alu->op].num_inputs);
+
+   /* Our component must be written */
+   assert(s.comp < s.def->num_components);
+   assert(alu->dest.write_mask & (1u << s.comp));
+
+   assert(alu->src[alu_src_idx].src.is_ssa);
+   out.def = alu->src[alu_src_idx].src.ssa;
+
+   if (nir_op_infos[alu->op].input_sizes[alu_src_idx] == 0) {
+      /* The ALU src is unsized so the source component follows the
+       * destination component.
+       */
+      out.comp = alu->src[alu_src_idx].swizzle[s.comp];
+   } else {
+      /* This is a sized source so all source components work together to
+       * produce all the destination components.  Since we need to return a
+       * scalar, this only works if the source is a scalar.
+       */
+      assert(nir_op_infos[alu->op].input_sizes[alu_src_idx] == 1);
+      out.comp = alu->src[alu_src_idx].swizzle[0];
+   }
+   assert(out.comp < out.def->num_components);
+
+   return out;
+}
+
+
  /*
   * Control flow
   *
@@ -1876,13 +2504,20 @@ typedef struct nir_block {
      * dom_pre_index and dom_post_index for this block, which makes testing if
      * a given block is dominated by another block an O(1) operation.
      */
-   unsigned dom_pre_index, dom_post_index;
+   int16_t dom_pre_index, dom_post_index;
  
     /* live in and out for this block; used for liveness analysis */
     BITSET_WORD *live_in;
     BITSET_WORD *live_out;
  } nir_block;
  
+static inline bool
+nir_block_is_reachable(nir_block *b)
+{
+   /* See also nir_block_dominates */
+   return b->dom_post_index != -1;
+}
+
  static inline nir_instr *
  nir_block_first_instr(nir_block *block)
  {
@@ -2008,11 +2643,71 @@ typedef struct {
   */
  typedef enum {
     nir_metadata_none = 0x0,
+
+   /** Indicates that nir_block::index values are valid.
+    *
+    * The start block has index 0 and they increase through a natural walk of
+    * the CFG.  nir_function_impl::num_blocks is the number of blocks and
+    * every block index is in the range [0, nir_function_impl::num_blocks].
+    *
+    * A pass can preserve this metadata type if it doesn't touch the CFG.
+    */
     nir_metadata_block_index = 0x1,
+
+   /** Indicates that block dominance information is valid
+    *
+    * This includes:
+    *
+    *   - nir_block::num_dom_children
+    *   - nir_block::dom_children
+    *   - nir_block::dom_frontier
+    *   - nir_block::dom_pre_index
+    *   - nir_block::dom_post_index
+    *
+    * A pass can preserve this metadata type if it doesn't touch the CFG.
+    */
     nir_metadata_dominance = 0x2,
+
+   /** Indicates that SSA def data-flow liveness information is valid
+    *
+    * This includes:
+    *
+    *   - nir_ssa_def::live_index
+    *   - nir_block::live_in
+    *   - nir_block::live_out
+    *
+    * A pass can preserve this metadata type if it never adds or removes any
+    * SSA defs (most passes shouldn't preserve this metadata type).
+    */
     nir_metadata_live_ssa_defs = 0x4,
+
+   /** A dummy metadata value to track when a pass forgot to call
+    * nir_metadata_preserve.
+    *
+    * A pass should always clear this value even if it doesn't make any
+    * progress to indicate that it thought about preserving metadata.
+    */
     nir_metadata_not_properly_reset = 0x8,
+
+   /** Indicates that loop analysis information is valid.
+    *
+    * This includes everything pointed to by nir_loop::info.
+    *
+    * A pass can preserve this metadata type if it is guaranteed to not affect
+    * any loop metadata.  However, since loop metadata includes things like
+    * loop counts which depend on arithmetic in the loop, this is very hard to
+    * determine.  Most passes shouldn't preserve this metadata type.
+    */
     nir_metadata_loop_analysis = 0x10,
+
+   /** All metadata
+    *
+    * This includes all nir_metadata flags except not_properly_reset.  Passes
+    * which do not change the shader in any way should call
+    *
+    *    nir_metadata_preserve(impl, nir_metadata_all);
+    */
+   nir_metadata_all = ~nir_metadata_not_properly_reset,
  } nir_metadata;
  
  typedef struct {
@@ -2138,6 +2833,21 @@ nir_loop_last_block(nir_loop *loop)
     return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node));
  }
  
+/**
+ * Return true if this list of cf_nodes contains a single empty block.
+ */
+static inline bool
+nir_cf_list_is_empty_block(struct exec_list *cf_list)
+{
+   if (exec_list_is_singular(cf_list)) {
+      struct exec_node *head = exec_list_get_head(cf_list);
+      nir_block *block =
+         nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node));
+      return exec_list_is_empty(&block->instr_list);
+   }
+   return false;
+}
+
  typedef struct {
     uint8_t num_components;
     uint8_t bit_size;
@@ -2177,6 +2887,8 @@ typedef enum {
     nir_lower_minmax64 = (1 << 10),
     nir_lower_shift64 = (1 << 11),
     nir_lower_imul_2x32_64 = (1 << 12),
+   nir_lower_extract64 = (1 << 13),
+   nir_lower_ufind_msb64 = (1 << 14),
  } nir_lower_int64_options;
  
  typedef enum {
@@ -2189,9 +2901,18 @@ typedef enum {
     nir_lower_dfract = (1 << 6),
     nir_lower_dround_even = (1 << 7),
     nir_lower_dmod = (1 << 8),
-   nir_lower_fp64_full_software = (1 << 9),
+   nir_lower_dsub = (1 << 9),
+   nir_lower_ddiv = (1 << 10),
+   nir_lower_fp64_full_software = (1 << 11),
  } nir_lower_doubles_options;
  
+typedef enum {
+   nir_divergence_single_prim_per_subgroup = (1 << 0),
+   nir_divergence_single_patch_per_tcs_subgroup = (1 << 1),
+   nir_divergence_single_patch_per_tes_subgroup = (1 << 2),
+   nir_divergence_view_index_uniform = (1 << 3),
+} nir_divergence_options;
+
  typedef struct nir_shader_compiler_options {
     bool lower_fdiv;
     bool lower_ffma;
@@ -2203,23 +2924,22 @@ typedef struct nir_shader_compiler_options {
     bool lower_fpow;
     bool lower_fsat;
     bool lower_fsqrt;
-   bool lower_fmod16;
-   bool lower_fmod32;
-   bool lower_fmod64;
+   bool lower_sincos;
+   bool lower_fmod;
     /** Lowers ibitfield_extract/ubitfield_extract to ibfe/ubfe. */
     bool lower_bitfield_extract;
-   /** Lowers ibitfield_extract/ubitfield_extract to bfm, compares, shifts. */
+   /** Lowers ibitfield_extract/ubitfield_extract to compares, shifts. */
     bool lower_bitfield_extract_to_shifts;
     /** Lowers bitfield_insert to bfi/bfm */
     bool lower_bitfield_insert;
-   /** Lowers bitfield_insert to bfm, compares, and shifts. */
+   /** Lowers bitfield_insert to compares, and shifts. */
     bool lower_bitfield_insert_to_shifts;
+   /** Lowers bitfield_insert to bfm/bitfield_select. */
+   bool lower_bitfield_insert_to_bitfield_select;
     /** Lowers bitfield_reverse to shifts. */
     bool lower_bitfield_reverse;
     /** Lowers bit_count to shifts. */
     bool lower_bit_count;
-   /** Lowers bfm to shifts and subtracts. */
-   bool lower_bfm;
     /** Lowers ifind_msb to compare and ufind_msb */
     bool lower_ifind_msb;
     /** Lowers find_lsb to ufind_msb and logic ops */
@@ -2236,15 +2956,27 @@ typedef struct nir_shader_compiler_options {
     /* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */
     bool lower_scmp;
  
+   /* lower fall_equalN/fany_nequalN (ex:fany_nequal4 to sne+fdot4+fsat) */
+   bool lower_vector_cmp;
+
     /** enables rules to lower idiv by power-of-two: */
     bool lower_idiv;
  
+   /** enable rules to avoid bit ops */
+   bool lower_bitops;
+
     /** enables rules to lower isign to imin+imax */
     bool lower_isign;
  
     /** enables rules to lower fsign to fsub and flt */
     bool lower_fsign;
  
+   /* lower fdph to fdot4 */
+   bool lower_fdph;
+
+   /** lower fdot to fmul and fsum/fadd. */
+   bool lower_fdot;
+
     /* Does the native fdot instruction replicate its result for four
      * components?  If so, then opt_algebraic_late will turn all fdotN
      * instructions into fdot_replicatedN instructions.
@@ -2275,18 +3007,14 @@ typedef struct nir_shader_compiler_options {
     bool lower_unpack_unorm_4x8;
     bool lower_unpack_snorm_4x8;
  
+   bool lower_pack_split;
+
     bool lower_extract_byte;
     bool lower_extract_word;
  
     bool lower_all_io_to_temps;
     bool lower_all_io_to_elements;
  
-   /**
-    * Does the driver support real 32-bit integers?  (Otherwise, integers
-    * are simulated by floats.)
-    */
-   bool native_integers;
-
     /* Indicates that the driver only has zero-based vertex id */
     bool vertex_id_zero_based;
  
@@ -2326,9 +3054,65 @@ typedef struct nir_shader_compiler_options {
     /* Set if nir_lower_wpos_ytransform() should also invert gl_PointCoord. */
     bool lower_wpos_pntc;
  
+   /**
+    * Set if nir_op_[iu]hadd and nir_op_[iu]rhadd instructions should be
+    * lowered to simple arithmetic.
+    *
+    * If this flag is set, the lowering will be applied to all bit-sizes of
+    * these instructions.
+    *
+    * \sa ::lower_hadd64
+    */
     bool lower_hadd;
+
+   /**
+    * Set if only 64-bit nir_op_[iu]hadd and nir_op_[iu]rhadd instructions
+    * should be lowered to simple arithmetic.
+    *
+    * If this flag is set, the lowering will be applied to only 64-bit
+    * versions of these instructions.
+    *
+    * \sa ::lower_hadd
+    */
+   bool lower_hadd64;
+
+   /**
+    * Set if nir_op_add_sat and nir_op_usub_sat should be lowered to simple
+    * arithmetic.
+    *
+    * If this flag is set, the lowering will be applied to all bit-sizes of
+    * these instructions.
+    *
+    * \sa ::lower_usub_sat64
+    */
     bool lower_add_sat;
  
+   /**
+    * Set if only 64-bit nir_op_usub_sat should be lowered to simple
+    * arithmetic.
+    *
+    * \sa ::lower_add_sat
+    */
+   bool lower_usub_sat64;
+
+   /**
+    * Should IO be re-vectorized?  Some scalar ISAs still operate on vec4's
+    * for IO purposes and would prefer loads/stores be vectorized.
+    */
+   bool vectorize_io;
+   bool lower_to_scalar;
+
+   /**
+    * Whether nir_opt_vectorize should only create 16-bit 2D vectors.
+    */
+   bool vectorize_vec2_16bit;
+
+   /**
+    * Should the linker unify inputs_read/outputs_written between adjacent
+    * shader stages which are linked into a single program?
+    */
+   bool unify_interfaces;
+
     /**
      * Should nir_lower_io() create load_interpolated_input intrinsics?
      *
@@ -2340,6 +3124,49 @@ typedef struct nir_shader_compiler_options {
     /* Lowers when 32x32->64 bit multiplication is not supported */
     bool lower_mul_2x32_64;
  
+   /* Lowers when rotate instruction is not supported */
+   bool lower_rotate;
+
+   /**
+    * Backend supports imul24, and would like to use it (when possible)
+    * for address/offset calculation.  If true, driver should call
+    * nir_lower_amul().  (If not set, amul will automatically be lowered
+    * to imul.)
+    */
+   bool has_imul24;
+
+   /** Backend supports umul24, if not set  umul24 will automatically be lowered
+    * to imul with masked inputs */
+   bool has_umul24;
+
+   /** Backend supports umad24, if not set  umad24 will automatically be lowered
+    * to imul with masked inputs and iadd */
+   bool has_umad24;
+
+   /* Whether to generate only scoped_barrier intrinsics instead of the set of
+    * memory and control barrier intrinsics based on GLSL.
+    */
+   bool use_scoped_barrier;
+
+   /**
+    * Is this the Intel vec4 backend?
+    *
+    * Used to inhibit algebraic optimizations that are known to be harmful on
+    * the Intel vec4 backend.  This is generally applicable to any
+    * optimization that might cause more immediate values to be used in
+    * 3-source (e.g., ffma and flrp) instructions.
+    */
+   bool intel_vec4;
+
+   /** Lower nir_op_ibfe and nir_op_ubfe that have two constant sources. */
+   bool lower_bfe_with_two_constants;
+
+   /** Whether 8-bit ALU is supported. */
+   bool support_8bit_alu;
+
+   /** Whether 16-bit ALU is supported. */
+   bool support_16bit_alu;
+
     unsigned max_unroll_iterations;
  
     nir_lower_int64_options lower_int64_options;
@@ -2467,6 +3294,8 @@ nir_function_impl *nir_cf_node_get_function(nir_cf_node *node);
  void nir_metadata_require(nir_function_impl *impl, nir_metadata required, ...);
  /** dirties all but the preserved metadata */
  void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved);
+/** Preserves all metadata for the given shader */
+void nir_shader_preserve_all_metadata(nir_shader *shader);
  
  /** creates an instruction with default swizzle/writemask/etc. with NULL registers */
  nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op);
@@ -2741,6 +3570,8 @@ nir_instr_remove(nir_instr *instr)
  
  /** @} */
  
+nir_ssa_def *nir_instr_ssa_def(nir_instr *instr);
+
  typedef bool (*nir_foreach_ssa_def_cb)(nir_ssa_def *def, void *state);
  typedef bool (*nir_foreach_dest_cb)(nir_dest *dest, void *state);
  typedef bool (*nir_foreach_src_cb)(nir_src *src, void *state);
@@ -2748,6 +3579,9 @@ bool nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb,
                           void *state);
  bool nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state);
  bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state);
+bool nir_foreach_phi_src_leaving_block(nir_block *instr,
+                                       nir_foreach_src_cb cb,
+                                       void *state);
  
  nir_const_value *nir_src_as_const_value(nir_src src);
  
@@ -2762,9 +3596,11 @@ nir_src_as_ ## name (nir_src src)                                       \
  NIR_SRC_AS_(alu_instr, nir_alu_instr, nir_instr_type_alu, nir_instr_as_alu)
  NIR_SRC_AS_(intrinsic, nir_intrinsic_instr,
              nir_instr_type_intrinsic, nir_instr_as_intrinsic)
+NIR_SRC_AS_(deref, nir_deref_instr, nir_instr_type_deref, nir_instr_as_deref)
  
  bool nir_src_is_dynamically_uniform(nir_src src);
  bool nir_srcs_equal(nir_src src1, nir_src src2);
+bool nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2);
  void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src);
  void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src);
  void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src);
@@ -2855,18 +3691,25 @@ unsigned nir_index_instrs(nir_function_impl *impl);
  
  void nir_index_blocks(nir_function_impl *impl);
  
+void nir_index_vars(nir_shader *shader, nir_function_impl *impl, nir_variable_mode modes);
+
  void nir_print_shader(nir_shader *shader, FILE *fp);
  void nir_print_shader_annotated(nir_shader *shader, FILE *fp, struct hash_table *errors);
  void nir_print_instr(const nir_instr *instr, FILE *fp);
  void nir_print_deref(const nir_deref_instr *deref, FILE *fp);
  
+/** Shallow clone of a single ALU instruction. */
+nir_alu_instr *nir_alu_instr_clone(nir_shader *s, const nir_alu_instr *orig);
+
  nir_shader *nir_shader_clone(void *mem_ctx, const nir_shader *s);
  nir_function_impl *nir_function_impl_clone(nir_shader *shader,
                                             const nir_function_impl *fi);
  nir_constant *nir_constant_clone(const nir_constant *c, nir_variable *var);
  nir_variable *nir_variable_clone(const nir_variable *c, nir_shader *shader);
  
-nir_shader *nir_shader_serialize_deserialize(void *mem_ctx, nir_shader *s);
+void nir_shader_replace(nir_shader *dest, nir_shader *src);
+
+void nir_shader_serialize_deserialize(nir_shader *s);
  
  #ifndef NDEBUG
  void nir_validate_shader(nir_shader *shader, const char *when);
@@ -2938,12 +3781,10 @@ static inline bool should_print_nir(void) { return false; }
     nir_validate_shader(nir, "after " #pass);                         \
     if (should_clone_nir()) {                                         \
        nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \
-      ralloc_free(nir);                                              \
-      nir = clone;                                                   \
+      nir_shader_replace(nir, clone);                                \
     }                                                                 \
     if (should_serialize_deserialize_nir()) {                         \
-      void *mem_ctx = ralloc_parent(nir);                            \
-      nir = nir_shader_serialize_deserialize(mem_ctx, nir);          \
+      nir_shader_serialize_deserialize(nir);                         \
     }                                                                 \
  } while (0)
  
@@ -2969,11 +3810,64 @@ static inline bool should_print_nir(void) { return false; }
  
  #define NIR_SKIP(name) should_skip_nir(#name)
  
+/** An instruction filtering callback
+ *
+ * Returns true if the instruction should be processed and false otherwise.
+ */
+typedef bool (*nir_instr_filter_cb)(const nir_instr *, const void *);
+
+/** A simple instruction lowering callback
+ *
+ * Many instruction lowering passes can be written as a simple function which
+ * takes an instruction as its input and returns a sequence of instructions
+ * that implement the consumed instruction.  This function type represents
+ * such a lowering function.  When called, a function with this prototype
+ * should either return NULL indicating that no lowering needs to be done or
+ * emit a sequence of instructions using the provided builder (whose cursor
+ * will already be placed after the instruction to be lowered) and return the
+ * resulting nir_ssa_def.
+ */
+typedef nir_ssa_def *(*nir_lower_instr_cb)(struct nir_builder *,
+                                           nir_instr *, void *);
+
+/**
+ * Special return value for nir_lower_instr_cb when some progress occurred
+ * (like changing an input to the instr) that didn't result in a replacement
+ * SSA def being generated.
+ */
+#define NIR_LOWER_INSTR_PROGRESS ((nir_ssa_def *)(uintptr_t)1)
+
+/** Iterate over all the instructions in a nir_function_impl and lower them
+ *  using the provided callbacks
+ *
+ * This function implements the guts of a standard lowering pass for you.  It
+ * iterates over all of the instructions in a nir_function_impl and calls the
+ * filter callback on each one.  If the filter callback returns true, it then
+ * calls the lowering call back on the instruction.  (Splitting it this way
+ * allows us to avoid some save/restore work for instructions we know won't be
+ * lowered.)  If the instruction is dead after the lowering is complete, it
+ * will be removed.  If new instructions are added, the lowering callback will
+ * also be called on them in case multiple lowerings are required.
+ *
+ * The metadata for the nir_function_impl will also be updated.  If any blocks
+ * are added (they cannot be removed), dominance and block indices will be
+ * invalidated.
+ */
+bool nir_function_impl_lower_instructions(nir_function_impl *impl,
+                                          nir_instr_filter_cb filter,
+                                          nir_lower_instr_cb lower,
+                                          void *cb_data);
+bool nir_shader_lower_instructions(nir_shader *shader,
+                                   nir_instr_filter_cb filter,
+                                   nir_lower_instr_cb lower,
+                                   void *cb_data);
+
  void nir_calc_dominance_impl(nir_function_impl *impl);
  void nir_calc_dominance(nir_shader *shader);
  
  nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2);
  bool nir_block_dominates(nir_block *parent, nir_block *child);
+bool nir_block_is_unreachable(nir_block *block);
  
  void nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp);
  void nir_dump_dom_tree(nir_shader *shader, FILE *fp);
@@ -3034,14 +3928,20 @@ bool nir_lower_vars_to_scratch(nir_shader *shader,
                                 int size_threshold,
                                 glsl_type_size_align_func size_align);
  
+void nir_lower_clip_halfz(nir_shader *shader);
+
  void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint);
  
+void nir_gather_ssa_types(nir_function_impl *impl,
+                          BITSET_WORD *float_types,
+                          BITSET_WORD *int_types);
+
  void nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
                                int (*type_size)(const struct glsl_type *, bool));
  
  /* Some helpers to do very simple linking */
  bool nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer);
-bool nir_remove_unused_io_vars(nir_shader *shader, struct exec_list *var_list,
+bool nir_remove_unused_io_vars(nir_shader *shader, nir_variable_mode mode,
                                 uint64_t *used_by_other_stage,
                                 uint64_t *used_by_other_stage_patches);
  void nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
@@ -3049,7 +3949,29 @@ void nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
  void nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer);
  bool nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer);
  
+bool nir_lower_amul(nir_shader *shader,
+                    int (*type_size)(const struct glsl_type *, bool));
+
+void nir_assign_io_var_locations(struct exec_list *var_list,
+                                 unsigned *size,
+                                 gl_shader_stage stage);
+
+typedef struct {
+   uint8_t num_linked_io_vars;
+   uint8_t num_linked_patch_io_vars;
+} nir_linked_io_var_info;
+
+nir_linked_io_var_info
+nir_assign_linked_io_var_locations(nir_shader *producer,
+                                   nir_shader *consumer);
+
  typedef enum {
+   /* If set, this causes all 64-bit IO operations to be lowered on-the-fly
+    * to 32-bit operations.  This is only valid for nir_var_shader_in/out
+    * modes.
+    */
+   nir_lower_io_lower_64bit_to_32 = (1 << 0),
+
     /* If set, this forces all non-flat fragment shader inputs to be
      * interpolated as if with the "sample" qualifier.  This requires
      * nir_shader_compiler_options::use_interpolated_input_intrinsics.
@@ -3061,6 +3983,13 @@ bool nir_lower_io(nir_shader *shader,
                    int (*type_size)(const struct glsl_type *, bool),
                    nir_lower_io_options);
  
+bool nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode mode);
+
+bool
+nir_lower_vars_to_explicit_types(nir_shader *shader,
+                                 nir_variable_mode modes,
+                                 glsl_type_size_align_func type_info);
+
  typedef enum {
     /**
      * An address format which is a simple 32-bit global GPU address.
@@ -3087,16 +4016,39 @@ typedef enum {
      * component is a buffer index and the second is an offset.
      */
     nir_address_format_32bit_index_offset,
+
+   /**
+    * An address format which is comprised of a vec3 where the first two
+    * components specify the buffer and the third is an offset.
+    */
+   nir_address_format_vec2_index_32bit_offset,
+
+   /**
+    * An address format which is a simple 32-bit offset.
+    */
+   nir_address_format_32bit_offset,
+
+   /**
+    * An address format representing a purely logical addressing model.  In
+    * this model, all deref chains must be complete from the dereference
+    * operation to the variable.  Cast derefs are not allowed.  These
+    * addresses will be 32-bit scalars but the format is immaterial because
+    * you can always chase the chain.
+    */
+   nir_address_format_logical,
  } nir_address_format;
  
  static inline unsigned
  nir_address_format_bit_size(nir_address_format addr_format)
  {
     switch (addr_format) {
-   case nir_address_format_32bit_global:           return 32;
-   case nir_address_format_64bit_global:           return 64;
-   case nir_address_format_64bit_bounded_global:   return 32;
-   case nir_address_format_32bit_index_offset:     return 32;
+   case nir_address_format_32bit_global:              return 32;
+   case nir_address_format_64bit_global:              return 64;
+   case nir_address_format_64bit_bounded_global:      return 32;
+   case nir_address_format_32bit_index_offset:        return 32;
+   case nir_address_format_vec2_index_32bit_offset:   return 32;
+   case nir_address_format_32bit_offset:              return 32;
+   case nir_address_format_logical:                   return 32;
     }
     unreachable("Invalid address format");
  }
@@ -3105,10 +4057,13 @@ static inline unsigned
  nir_address_format_num_components(nir_address_format addr_format)
  {
     switch (addr_format) {
-   case nir_address_format_32bit_global:           return 1;
-   case nir_address_format_64bit_global:           return 1;
-   case nir_address_format_64bit_bounded_global:   return 4;
-   case nir_address_format_32bit_index_offset:     return 2;
+   case nir_address_format_32bit_global:              return 1;
+   case nir_address_format_64bit_global:              return 1;
+   case nir_address_format_64bit_bounded_global:      return 4;
+   case nir_address_format_32bit_index_offset:        return 2;
+   case nir_address_format_vec2_index_32bit_offset:   return 3;
+   case nir_address_format_32bit_offset:              return 1;
+   case nir_address_format_logical:                   return 1;
     }
     unreachable("Invalid address format");
  }
@@ -3122,6 +4077,14 @@ nir_address_format_to_glsl_type(nir_address_format addr_format)
                             nir_address_format_num_components(addr_format));
  }
  
+const nir_const_value *nir_address_format_null_value(nir_address_format addr_format);
+
+nir_ssa_def *nir_build_addr_ieq(struct nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
+                                nir_address_format addr_format);
+
+nir_ssa_def *nir_build_addr_isub(struct nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
+                                 nir_address_format addr_format);
+
  nir_ssa_def * nir_explicit_io_address_from_deref(struct nir_builder *b,
                                                   nir_deref_instr *deref,
                                                   nir_ssa_def *base_addr,
@@ -3146,19 +4109,26 @@ bool nir_lower_vars_to_ssa(nir_shader *shader);
  
  bool nir_remove_dead_derefs(nir_shader *shader);
  bool nir_remove_dead_derefs_impl(nir_function_impl *impl);
-bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes);
-bool nir_lower_constant_initializers(nir_shader *shader,
+bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes,
+                               bool (*can_remove_var)(nir_variable *var));
+bool nir_lower_variable_initializers(nir_shader *shader,
                                       nir_variable_mode modes);
  
-bool nir_move_load_const(nir_shader *shader);
  bool nir_move_vec_src_uses_to_dest(nir_shader *shader);
  bool nir_lower_vec_to_movs(nir_shader *shader);
  void nir_lower_alpha_test(nir_shader *shader, enum compare_func func,
-                          bool alpha_to_one);
+                          bool alpha_to_one,
+                          const gl_state_index16 *alpha_ref_state_tokens);
  bool nir_lower_alu(nir_shader *shader);
-bool nir_lower_alu_to_scalar(nir_shader *shader);
+
+bool nir_lower_flrp(nir_shader *shader, unsigned lowering_mask,
+                    bool always_precise, bool have_ffma);
+
+bool nir_lower_alu_to_scalar(nir_shader *shader, nir_instr_filter_cb cb, const void *data);
+bool nir_lower_bool_to_bitsize(nir_shader *shader);
  bool nir_lower_bool_to_float(nir_shader *shader);
  bool nir_lower_bool_to_int32(nir_shader *shader);
+bool nir_lower_int_to_float(nir_shader *shader);
  bool nir_lower_load_const_to_scalar(nir_shader *shader);
  bool nir_lower_read_invocation_to_scalar(nir_shader *shader);
  bool nir_lower_phis_to_scalar(nir_shader *shader);
@@ -3169,6 +4139,8 @@ void nir_lower_io_to_scalar(nir_shader *shader, nir_variable_mode mask);
  void nir_lower_io_to_scalar_early(nir_shader *shader, nir_variable_mode mask);
  bool nir_lower_io_to_vector(nir_shader *shader, nir_variable_mode mask);
  
+bool nir_lower_fragcolor(nir_shader *shader);
+void nir_lower_fragcoord_wtrans(nir_shader *shader);
  void nir_lower_viewport_transform(nir_shader *shader);
  bool nir_lower_uniforms_to_ubo(nir_shader *shader, int multiplier);
  
@@ -3181,7 +4153,10 @@ typedef struct nir_lower_subgroups_options {
     bool lower_subgroup_masks:1;
     bool lower_shuffle:1;
     bool lower_shuffle_to_32bit:1;
+   bool lower_shuffle_to_swizzle_amd:1;
     bool lower_quad:1;
+   bool lower_quad_broadcast_dynamic:1;
+   bool lower_quad_broadcast_dynamic_to_const:1;
  } nir_lower_subgroups_options;
  
  bool nir_lower_subgroups(nir_shader *shader,
@@ -3274,6 +4249,12 @@ typedef struct nir_lower_tex_options {
      */
     unsigned lower_srgb;
  
+   /**
+    * If true, lower nir_texop_tex on shaders that doesn't support implicit
+    * LODs to nir_texop_txl.
+    */
+   bool lower_tex_without_implicit_lod;
+
     /**
      * If true, lower nir_texop_txd on cube maps with nir_texop_txl.
      */
@@ -3327,6 +4308,12 @@ typedef struct nir_lower_tex_options {
      */
     bool lower_txd_clamp_if_sampler_index_not_lt_16;
  
+   /**
+    * If true, lower nir_texop_txs with a non-0-lod into nir_texop_txs with
+    * 0-lod followed by a nir_ishr.
+    */
+   bool lower_txs_lod;
+
     /**
      * If true, apply a .bagr swizzle on tg4 results to handle Broadcom's
      * mixed-up tg4 locations.
@@ -3354,18 +4341,45 @@ enum nir_lower_non_uniform_access_type {
  bool nir_lower_non_uniform_access(nir_shader *shader,
                                    enum nir_lower_non_uniform_access_type);
  
-bool nir_lower_idiv(nir_shader *shader);
+enum nir_lower_idiv_path {
+   /* This path is based on NV50LegalizeSSA::handleDIV(). It is the faster of
+    * the two but it is not exact in some cases (for example, 1091317713u /
+    * 1034u gives 5209173 instead of 1055432) */
+   nir_lower_idiv_fast,
+   /* This path is based on AMDGPUTargetLowering::LowerUDIVREM() and
+    * AMDGPUTargetLowering::LowerSDIVREM(). It requires more instructions than
+    * the nv50 path and many of them are integer multiplications, so it is
+    * probably slower. It should always return the correct result, though. */
+   nir_lower_idiv_precise,
+};
+
+bool nir_lower_idiv(nir_shader *shader, enum nir_lower_idiv_path path);
+
+bool nir_lower_input_attachments(nir_shader *shader, bool use_fragcoord_sysval);
  
-bool nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables, bool use_vars);
-bool nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables);
+bool nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables,
+                       bool use_vars,
+                       bool use_clipdist_array,
+                       const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]);
+bool nir_lower_clip_gs(nir_shader *shader, unsigned ucp_enables,
+                       bool use_clipdist_array,
+                       const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]);
+bool nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables,
+                       bool use_clipdist_array);
  bool nir_lower_clip_cull_distance_arrays(nir_shader *nir);
+bool nir_lower_clip_disable(nir_shader *shader, unsigned clip_plane_enable);
+
+void nir_lower_point_size_mov(nir_shader *shader,
+                              const gl_state_index16 *pointsize_state_tokens);
  
  bool nir_lower_frexp(nir_shader *nir);
  
-void nir_lower_two_sided_color(nir_shader *shader);
+void nir_lower_two_sided_color(nir_shader *shader, bool face_sysval);
  
  bool nir_lower_clamp_color_outputs(nir_shader *shader);
  
+bool nir_lower_flatshade(nir_shader *shader);
+
  void nir_lower_passthrough_edgeflags(nir_shader *shader);
  bool nir_lower_patch_vertices(nir_shader *nir, unsigned static_count,
                                const gl_state_index16 *uniform_state_tokens);
@@ -3382,6 +4396,10 @@ bool nir_lower_wpos_ytransform(nir_shader *shader,
                                 const nir_lower_wpos_ytransform_options *options);
  bool nir_lower_wpos_center(nir_shader *shader, const bool for_sample_shading);
  
+bool nir_lower_wrmasks(nir_shader *shader, nir_instr_filter_cb cb, const void *data);
+
+bool nir_lower_fb_read(nir_shader *shader);
+
  typedef struct nir_lower_drawpixels_options {
     gl_state_index16 texcoord_state_tokens[STATE_LENGTH];
     gl_state_index16 scale_state_tokens[STATE_LENGTH];
@@ -3402,7 +4420,7 @@ typedef struct nir_lower_bitmap_options {
  
  void nir_lower_bitmap(nir_shader *shader, const nir_lower_bitmap_options *options);
  
-bool nir_lower_atomics_to_ssbo(nir_shader *shader, unsigned ssbo_offset);
+bool nir_lower_atomics_to_ssbo(nir_shader *shader);
  
  typedef enum  {
     nir_lower_int_source_mods = 1 << 0,
@@ -3414,7 +4432,7 @@ typedef enum  {
  
  bool nir_lower_to_source_mods(nir_shader *shader, nir_lower_to_source_mods_flags options);
  
-bool nir_lower_gs_intrinsics(nir_shader *shader);
+bool nir_lower_gs_intrinsics(nir_shader *shader, bool per_stream);
  
  typedef unsigned (*nir_lower_bit_size_callback)(const nir_alu_instr *, void *);
  
@@ -3430,6 +4448,25 @@ bool nir_lower_doubles(nir_shader *shader, const nir_shader *softfp64,
                         nir_lower_doubles_options options);
  bool nir_lower_pack(nir_shader *shader);
  
+void nir_lower_mediump_outputs(nir_shader *nir);
+
+bool nir_lower_point_size(nir_shader *shader, float min, float max);
+
+typedef enum {
+   nir_lower_interpolation_at_sample = (1 << 1),
+   nir_lower_interpolation_at_offset = (1 << 2),
+   nir_lower_interpolation_centroid  = (1 << 3),
+   nir_lower_interpolation_pixel     = (1 << 4),
+   nir_lower_interpolation_sample    = (1 << 5),
+} nir_lower_interpolation_options;
+
+bool nir_lower_interpolation(nir_shader *shader,
+                             nir_lower_interpolation_options options);
+
+bool nir_lower_discard_to_demote(nir_shader *shader);
+
+bool nir_lower_memory_model(nir_shader *shader);
+
  bool nir_normalize_cubemap_coords(nir_shader *shader);
  
  void nir_live_ssa_defs_impl(nir_function_impl *impl);
@@ -3443,6 +4480,8 @@ bool nir_repair_ssa_impl(nir_function_impl *impl);
  bool nir_repair_ssa(nir_shader *shader);
  
  void nir_convert_loop_to_lcssa(nir_loop *loop);
+bool nir_convert_to_lcssa(nir_shader *shader, bool skip_invariants, bool skip_bool_invariants);
+void nir_divergence_analysis(nir_shader *shader, nir_divergence_options options);
  
  /* If phi_webs_only is true, only convert SSA values involved in phi nodes to
   * registers.  If false, convert all values (even those not involved in a phi
@@ -3454,13 +4493,32 @@ bool nir_lower_phis_to_regs_block(nir_block *block);
  bool nir_lower_ssa_defs_to_regs_block(nir_block *block);
  bool nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl);
  
+bool nir_lower_samplers(nir_shader *shader);
+bool nir_lower_ssbo(nir_shader *shader);
+
+/* This is here for unit tests. */
+bool nir_opt_comparison_pre_impl(nir_function_impl *impl);
+
  bool nir_opt_comparison_pre(nir_shader *shader);
  
+bool nir_opt_access(nir_shader *shader);
  bool nir_opt_algebraic(nir_shader *shader);
  bool nir_opt_algebraic_before_ffma(nir_shader *shader);
  bool nir_opt_algebraic_late(nir_shader *shader);
+bool nir_opt_algebraic_distribute_src_mods(nir_shader *shader);
  bool nir_opt_constant_folding(nir_shader *shader);
  
+/* Try to combine a and b into a.  Return true if combination was possible,
+ * which will result in b being removed by the pass.  Return false if
+ * combination wasn't possible.
+ */
+typedef bool (*nir_combine_memory_barrier_cb)(
+   nir_intrinsic_instr *a, nir_intrinsic_instr *b, void *data);
+
+bool nir_opt_combine_memory_barriers(nir_shader *shader,
+                                     nir_combine_memory_barrier_cb combine_cb,
+                                     void *data);
+
  bool nir_opt_combine_stores(nir_shader *shader, nir_variable_mode modes);
  
  bool nir_copy_prop(nir_shader *shader);
@@ -3494,14 +4552,27 @@ bool nir_opt_large_constants(nir_shader *shader,
  
  bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask);
  
-bool nir_opt_move_comparisons(nir_shader *shader);
+typedef enum {
+    nir_move_const_undef = (1 << 0),
+    nir_move_load_ubo    = (1 << 1),
+    nir_move_load_input  = (1 << 2),
+    nir_move_comparisons = (1 << 3),
+    nir_move_copies      = (1 << 4),
+} nir_move_options;
+
+bool nir_can_move_instr(nir_instr *instr, nir_move_options options);
  
-bool nir_opt_move_load_ubo(nir_shader *shader);
+bool nir_opt_sink(nir_shader *shader, nir_move_options options);
+
+bool nir_opt_move(nir_shader *shader, nir_move_options options);
  
  bool nir_opt_peephole_select(nir_shader *shader, unsigned limit,
                               bool indirect_load_ok, bool expensive_alu_ok);
  
+bool nir_opt_rematerialize_compares(nir_shader *shader);
+
  bool nir_opt_remove_phis(nir_shader *shader);
+bool nir_opt_remove_phis_block(nir_block *block);
  
  bool nir_opt_shrink_load(nir_shader *shader);
  
@@ -3509,8 +4580,18 @@ bool nir_opt_trivial_continues(nir_shader *shader);
  
  bool nir_opt_undef(nir_shader *shader);
  
+bool nir_opt_vectorize(nir_shader *shader);
+
  bool nir_opt_conditional_discard(nir_shader *shader);
  
+typedef bool (*nir_should_vectorize_mem_func)(unsigned align, unsigned bit_size,
+                                              unsigned num_components, unsigned high_offset,
+                                              nir_intrinsic_instr *low, nir_intrinsic_instr *high);
+
+bool nir_opt_load_store_vectorize(nir_shader *shader, nir_variable_mode modes,
+                                  nir_should_vectorize_mem_func callback,
+                                  nir_variable_mode robust_modes);
+
  void nir_strip(nir_shader *shader);
  
  void nir_sweep(nir_shader *shader);
@@ -3522,6 +4603,46 @@ uint64_t nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot);
  nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val);
  gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin);
  
+static inline bool
+nir_variable_is_in_ubo(const nir_variable *var)
+{
+   return (var->data.mode == nir_var_mem_ubo &&
+           var->interface_type != NULL);
+}
+
+static inline bool
+nir_variable_is_in_ssbo(const nir_variable *var)
+{
+   return (var->data.mode == nir_var_mem_ssbo &&
+           var->interface_type != NULL);
+}
+
+static inline bool
+nir_variable_is_in_block(const nir_variable *var)
+{
+   return nir_variable_is_in_ubo(var) || nir_variable_is_in_ssbo(var);
+}
+
+typedef struct nir_unsigned_upper_bound_config {
+   unsigned min_subgroup_size;
+   unsigned max_subgroup_size;
+   unsigned max_work_group_invocations;
+   unsigned max_work_group_count[3];
+   unsigned max_work_group_size[3];
+
+   uint32_t vertex_attrib_max[32];
+} nir_unsigned_upper_bound_config;
+
+uint32_t
+nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht,
+                         nir_ssa_scalar scalar,
+                         const nir_unsigned_upper_bound_config *config);
+
+bool
+nir_addition_might_overflow(nir_shader *shader, struct hash_table *range_ht,
+                            nir_ssa_scalar ssa, unsigned const_val,
+                            const nir_unsigned_upper_bound_config *config);
+
  #ifdef __cplusplus
  } /* extern "C" */
  #endif