nir: add loop unroll support for complex wrapper loops

[mesa.git] / src / compiler / nir / nir.h
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h

index 92ab3a699ccc18640d399bd529b862c02cf72ea7..599f469a714f255d82b1a5f4b2525848ee440bcf 100644 (file)
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -57,6 +57,8 @@ extern "C" {
  
  #define NIR_FALSE 0u
  #define NIR_TRUE (~0u)
+#define NIR_MAX_VEC_COMPONENTS 4
+typedef uint8_t nir_component_mask_t;
  
  /** Defines a cast function
   *
@@ -115,16 +117,16 @@ typedef enum {
  } nir_rounding_mode;
  
  typedef union {
-   float f32[4];
-   double f64[4];
-   int8_t i8[4];
-   uint8_t u8[4];
-   int16_t i16[4];
-   uint16_t u16[4];
-   int32_t i32[4];
-   uint32_t u32[4];
-   int64_t i64[4];
-   uint64_t u64[4];
+   float f32[NIR_MAX_VEC_COMPONENTS];
+   double f64[NIR_MAX_VEC_COMPONENTS];
+   int8_t i8[NIR_MAX_VEC_COMPONENTS];
+   uint8_t u8[NIR_MAX_VEC_COMPONENTS];
+   int16_t i16[NIR_MAX_VEC_COMPONENTS];
+   uint16_t u16[NIR_MAX_VEC_COMPONENTS];
+   int32_t i32[NIR_MAX_VEC_COMPONENTS];
+   uint32_t u32[NIR_MAX_VEC_COMPONENTS];
+   int64_t i64[NIR_MAX_VEC_COMPONENTS];
+   uint64_t u64[NIR_MAX_VEC_COMPONENTS];
  } nir_const_value;
  
  typedef struct nir_constant {
@@ -135,7 +137,7 @@ typedef struct nir_constant {
      * by the type associated with the \c nir_variable.  Constants may be
      * scalars, vectors, or matrices.
      */
-   nir_const_value values[4];
+   nir_const_value values[NIR_MAX_VEC_COMPONENTS];
  
     /* we could get this from the var->type but makes clone *much* easier to
      * not have to care about the type.
@@ -160,6 +162,22 @@ typedef enum {
      nir_depth_layout_unchanged
  } nir_depth_layout;
  
+/**
+ * Enum keeping track of how a variable was declared.
+ */
+typedef enum {
+   /**
+    * Normal declaration.
+    */
+   nir_var_declared_normally = 0,
+
+   /**
+    * Variable is implicitly generated by the compiler and should not be
+    * visible via the API.
+    */
+   nir_var_hidden,
+} nir_var_declaration_type;
+
  /**
   * Either a uniform, global variable, shader input, or shader output. Based on
   * ir_variable - it should be easy to translate between the two.
@@ -257,6 +275,21 @@ typedef struct nir_variable {
         */
        unsigned explicit_binding:1;
  
+      /**
+       * Was a transfer feedback buffer set in the shader?
+       */
+      unsigned explicit_xfb_buffer:1;
+
+      /**
+       * Was a transfer feedback stride set in the shader?
+       */
+      unsigned explicit_xfb_stride:1;
+
+      /**
+       * Was an explicit offset set in the shader?
+       */
+      unsigned explicit_offset:1;
+
        /**
         * \brief Layout qualifier for gl_FragDepth.
         *
@@ -318,19 +351,33 @@ typedef struct nir_variable {
        int binding;
  
        /**
-       * Location an atomic counter is stored at.
+       * Location an atomic counter or transform feedback is stored at.
         */
        unsigned offset;
  
+      /**
+       * Transform feedback buffer.
+       */
+      unsigned xfb_buffer;
+
+      /**
+       * Transform feedback stride.
+       */
+      unsigned xfb_stride;
+
+      /**
+       * How the variable was declared.  See nir_var_declaration_type.
+       *
+       * This is used to detect variables generated by the compiler, so should
+       * not be visible via the API.
+       */
+      unsigned how_declared:2;
+
        /**
         * ARB_shader_image_load_store qualifiers.
         */
        struct {
-         bool read_only; /**< "readonly" qualifier. */
-         bool write_only; /**< "writeonly" qualifier. */
-         bool coherent;
-         bool _volatile;
-         bool restrict_flag;
+         enum gl_access_qualifier access;
  
           /** Image internal format if specified explicitly, otherwise GL_NONE. */
           GLenum format;
@@ -697,7 +744,7 @@ typedef struct {
      * a statement like "foo.xzw = bar.zyx" would have a writemask of 1101b and
      * a swizzle of {2, x, 1, 0} where x means "don't care."
      */
-   uint8_t swizzle[4];
+   uint8_t swizzle[NIR_MAX_VEC_COMPONENTS];
  } nir_alu_src;
  
  typedef struct {
@@ -712,7 +759,7 @@ typedef struct {
  
     bool saturate;
  
-   unsigned write_mask : 4; /* ignored if dest.is_ssa is true */
+   unsigned write_mask : NIR_MAX_VEC_COMPONENTS; /* ignored if dest.is_ssa is true */
  } nir_alu_dest;
  
  typedef enum {
@@ -841,14 +888,14 @@ typedef struct {
     /**
      * The number of components in each input
      */
-   unsigned input_sizes[4];
+   unsigned input_sizes[NIR_MAX_VEC_COMPONENTS];
  
     /**
      * The type of vector that each input takes. Note that negate and
      * absolute value are only allowed on inputs with int or float type and
      * behave differently on the two.
      */
-   nir_alu_type input_types[4];
+   nir_alu_type input_types[NIR_MAX_VEC_COMPONENTS];
  
     nir_op_algebraic_property algebraic_properties;
  } nir_op_info;
@@ -1002,7 +1049,7 @@ typedef struct {
  
  #include "nir_intrinsics.h"
  
-#define NIR_INTRINSIC_MAX_CONST_INDEX 3
+#define NIR_INTRINSIC_MAX_CONST_INDEX 4
  
  /** Represents an intrinsic
   *
@@ -1150,6 +1197,28 @@ typedef enum {
      */
     NIR_INTRINSIC_PARAM_IDX = 12,
  
+   /**
+    * Image dimensionality for image intrinsics
+    *
+    * One of GLSL_SAMPLER_DIM_*
+    */
+   NIR_INTRINSIC_IMAGE_DIM = 13,
+
+   /**
+    * Non-zero if we are accessing an array image
+    */
+   NIR_INTRINSIC_IMAGE_ARRAY = 14,
+
+   /**
+    * Image format for image intrinsics
+    */
+   NIR_INTRINSIC_FORMAT = 15,
+
+   /**
+    * Access qualifiers for image intrinsics
+    */
+   NIR_INTRINSIC_ACCESS = 16,
+
     NIR_INTRINSIC_NUM_INDEX_FLAGS,
  
  } nir_intrinsic_index_flag;
@@ -1218,7 +1287,7 @@ nir_intrinsic_##name(const nir_intrinsic_instr *instr)                        \
  {                                                                             \
     const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];   \
     assert(info->index_map[NIR_INTRINSIC_##flag] > 0);                         \
-   return instr->const_index[info->index_map[NIR_INTRINSIC_##flag] - 1];      \
+   return (type)instr->const_index[info->index_map[NIR_INTRINSIC_##flag] - 1]; \
  }                                                                             \
  static inline void                                                            \
  nir_intrinsic_set_##name(nir_intrinsic_instr *instr, type val)                \
@@ -1240,6 +1309,10 @@ INTRINSIC_IDX_ACCESSORS(interp_mode, INTERP_MODE, unsigned)
  INTRINSIC_IDX_ACCESSORS(reduction_op, REDUCTION_OP, unsigned)
  INTRINSIC_IDX_ACCESSORS(cluster_size, CLUSTER_SIZE, unsigned)
  INTRINSIC_IDX_ACCESSORS(param_idx, PARAM_IDX, unsigned)
+INTRINSIC_IDX_ACCESSORS(image_dim, IMAGE_DIM, enum glsl_sampler_dim)
+INTRINSIC_IDX_ACCESSORS(image_array, IMAGE_ARRAY, bool)
+INTRINSIC_IDX_ACCESSORS(access, ACCESS, enum gl_access_qualifier)
+INTRINSIC_IDX_ACCESSORS(format, FORMAT, unsigned)
  
  /**
   * \group texture information
@@ -1702,6 +1775,13 @@ nir_block_last_instr(nir_block *block)
     return exec_node_data(nir_instr, tail, node);
  }
  
+static inline bool
+nir_block_ends_in_jump(nir_block *block)
+{
+   return !exec_list_is_empty(&block->instr_list) &&
+          nir_block_last_instr(block)->type == nir_instr_type_jump;
+}
+
  #define nir_foreach_instr(instr, block) \
     foreach_list_typed(nir_instr, instr, node, &(block)->instr_list)
  #define nir_foreach_instr_reverse(instr, block) \
@@ -1743,6 +1823,12 @@ typedef struct {
     /* Unroll the loop regardless of its size */
     bool force_unroll;
  
+   /* Does the loop contain complex loop terminators, continues or other
+    * complex behaviours? If this is true we can't rely on
+    * loop_terminator_list to be complete or accurate.
+    */
+   bool complex_loop;
+
     nir_loop_terminator *limiting_terminator;
  
     /* A list of loop_terminators terminating this loop. */
@@ -2004,10 +2090,27 @@ typedef struct nir_shader_compiler_options {
      */
     bool lower_base_vertex;
  
+   /**
+    * If enabled, gl_HelperInvocation will be lowered as:
+    *
+    *   !((1 << sample_id) & sample_mask_in))
+    *
+    * This depends on some possibly hw implementation details, which may
+    * not be true for all hw.  In particular that the FS is only executed
+    * for covered samples or for helper invocations.  So, do not blindly
+    * enable this option.
+    *
+    * Note: See also issue #22 in ARB_shader_image_load_store
+    */
+   bool lower_helper_invocation;
+
     bool lower_cs_local_index_from_id;
  
     bool lower_device_index_to_zero;
  
+   /* Set if nir_lower_wpos_ytransform() should also invert gl_PointCoord. */
+   bool lower_wpos_pntc;
+
     /**
      * Should nir_lower_io() create load_interpolated_input intrinsics?
      *
@@ -2016,12 +2119,6 @@ typedef struct nir_shader_compiler_options {
      */
     bool use_interpolated_input_intrinsics;
  
-   /**
-    * Do vertex shader double inputs use two locations? The Vulkan spec
-    * requires two locations to be used, OpenGL allows a single location.
-    */
-   bool vs_inputs_dual_locations;
-
     unsigned max_unroll_iterations;
  } nir_shader_compiler_options;
  
@@ -2256,6 +2353,36 @@ nir_after_block_before_jump(nir_block *block)
     }
  }
  
+static inline nir_cursor
+nir_before_src(nir_src *src, bool is_if_condition)
+{
+   if (is_if_condition) {
+      nir_block *prev_block =
+         nir_cf_node_as_block(nir_cf_node_prev(&src->parent_if->cf_node));
+      assert(!nir_block_ends_in_jump(prev_block));
+      return nir_after_block(prev_block);
+   } else if (src->parent_instr->type == nir_instr_type_phi) {
+#ifndef NDEBUG
+      nir_phi_instr *cond_phi = nir_instr_as_phi(src->parent_instr);
+      bool found = false;
+      nir_foreach_phi_src(phi_src, cond_phi) {
+         if (phi_src->src.ssa == src->ssa) {
+            found = true;
+            break;
+         }
+      }
+      assert(found);
+#endif
+      /* The LIST_ENTRY macro is a generic container-of macro, it just happens
+       * to have a more specific name.
+       */
+      nir_phi_src *phi_src = LIST_ENTRY(nir_phi_src, src, src);
+      return nir_after_block_before_jump(phi_src->pred);
+   } else {
+      return nir_before_instr(src->parent_instr);
+   }
+}
+
  static inline nir_cursor
  nir_before_cf_node(nir_cf_node *node)
  {
@@ -2420,7 +2547,7 @@ void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src);
  void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src,
                                      nir_instr *after_me);
  
-uint8_t nir_ssa_def_components_read(const nir_ssa_def *def);
+nir_component_mask_t nir_ssa_def_components_read(const nir_ssa_def *def);
  
  /*
   * finds the next basic block in source-code order, returns NULL if there is
@@ -2591,8 +2718,11 @@ void nir_dump_cfg(nir_shader *shader, FILE *fp);
  
  int nir_gs_count_vertices(const nir_shader *shader);
  
+bool nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes);
+bool nir_split_array_vars(nir_shader *shader, nir_variable_mode modes);
  bool nir_split_var_copies(nir_shader *shader);
  bool nir_split_per_member_structs(nir_shader *shader);
+bool nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes);
  
  bool nir_lower_returns_impl(nir_function_impl *impl);
  bool nir_lower_returns(nir_shader *shader);
@@ -2790,7 +2920,8 @@ void nir_lower_two_sided_color(nir_shader *shader);
  bool nir_lower_clamp_color_outputs(nir_shader *shader);
  
  void nir_lower_passthrough_edgeflags(nir_shader *shader);
-void nir_lower_tes_patch_vertices(nir_shader *tes, unsigned patch_vertices);
+bool nir_lower_patch_vertices(nir_shader *nir, unsigned static_count,
+                              const gl_state_index16 *uniform_state_tokens);
  
  typedef struct nir_lower_wpos_ytransform_options {
     gl_state_index16 state_tokens[STATE_LENGTH];
@@ -2899,6 +3030,8 @@ bool nir_opt_dce(nir_shader *shader);
  
  bool nir_opt_dead_cf(nir_shader *shader);
  
+bool nir_opt_find_array_copies(nir_shader *shader);
+
  bool nir_opt_gcm(nir_shader *shader, bool value_number);
  
  bool nir_opt_if(nir_shader *shader);
@@ -2930,6 +3063,10 @@ bool nir_opt_conditional_discard(nir_shader *shader);
  
  void nir_sweep(nir_shader *shader);
  
+void nir_remap_dual_slot_attributes(nir_shader *shader,
+                                    uint64_t *dual_slot_inputs);
+uint64_t nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot);
+
  nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val);
  gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin);