nir/lower_double_ops: Rework the if (progress) tree

[mesa.git] / src / compiler / nir / nir.h
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h

index 105e09347b5b4948cbd0e2918a599a45325bfe58..d32bbab5dfc8fdd6e91866dcf06176d1abd35ef6 100644 (file)
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -740,6 +740,12 @@ typedef struct nir_ssa_def {
  
     /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */
     uint8_t bit_size;
+
+   /**
+    * True if this SSA value may have different values in different SIMD
+    * invocations of the shader.  This is set by nir_divergence_analysis.
+    */
+   bool divergent;
  } nir_ssa_def;
  
  struct nir_src;
@@ -880,6 +886,13 @@ nir_src_is_const(nir_src src)
            src.ssa->parent_instr->type == nir_instr_type_load_const;
  }
  
+static inline bool
+nir_src_is_divergent(nir_src src)
+{
+   assert(src.is_ssa);
+   return src.ssa->divergent;
+}
+
  static inline unsigned
  nir_dest_bit_size(nir_dest dest)
  {
@@ -892,6 +905,13 @@ nir_dest_num_components(nir_dest dest)
     return dest.is_ssa ? dest.ssa.num_components : dest.reg.reg->num_components;
  }
  
+static inline bool
+nir_dest_is_divergent(nir_dest dest)
+{
+   assert(dest.is_ssa);
+   return dest.ssa.divergent;
+}
+
  /* Are all components the same, ie. .xxxx */
  static inline bool
  nir_is_same_comp_swizzle(uint8_t *swiz, unsigned nr_comp)
@@ -967,7 +987,7 @@ typedef struct {
   * The values in this enum are carefully chosen so that the sized type is
   * just the unsized type OR the number of bits.
   */
-typedef enum {
+typedef enum PACKED {
     nir_type_invalid = 0, /* Not a valid type */
     nir_type_int =       2,
     nir_type_uint =      4,
@@ -1197,7 +1217,7 @@ typedef enum {
  typedef struct {
     const char *name;
  
-   unsigned num_inputs;
+   uint8_t num_inputs;
  
     /**
      * The number of components in the output
@@ -1216,7 +1236,7 @@ typedef struct {
      * though output_size is zero; in that case, the inputs with a zero
      * size act per-component, while the inputs with non-zero size don't.
      */
-   unsigned output_size;
+   uint8_t output_size;
  
     /**
      * The type of vector that the instruction outputs. Note that the
@@ -1228,7 +1248,7 @@ typedef struct {
     /**
      * The number of components in each input
      */
-   unsigned input_sizes[NIR_MAX_VEC_COMPONENTS];
+   uint8_t input_sizes[NIR_MAX_VEC_COMPONENTS];
  
     /**
      * The type of vector that each input takes. Note that negate and
@@ -1683,7 +1703,7 @@ typedef enum {
  typedef struct {
     const char *name;
  
-   unsigned num_srcs; /** < number of register/SSA inputs */
+   uint8_t num_srcs; /** < number of register/SSA inputs */
  
     /** number of components of each input register
      *
@@ -1692,7 +1712,7 @@ typedef struct {
      * intrinsic consumes however many components are provided and it is not
      * validated at all.
      */
-   int src_components[NIR_INTRINSIC_MAX_INPUTS];
+   int8_t src_components[NIR_INTRINSIC_MAX_INPUTS];
  
     bool has_dest;
  
@@ -1701,16 +1721,16 @@ typedef struct {
      * If this value is 0, the number of components is given by the
      * num_components field of nir_intrinsic_instr.
      */
-   unsigned dest_components;
+   uint8_t dest_components;
  
     /** bitfield of legal bit sizes */
-   unsigned dest_bit_sizes;
+   uint8_t dest_bit_sizes;
  
     /** the number of constant indices used by the intrinsic */
-   unsigned num_indices;
+   uint8_t num_indices;
  
     /** indicates the usage of intr->const_index[n] */
-   unsigned index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS];
+   uint8_t index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS];
  
     /** semantic flags for calls to this intrinsic */
     nir_intrinsic_semantic_flag flags;
@@ -1719,7 +1739,7 @@ typedef struct {
  extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics];
  
  static inline unsigned
-nir_intrinsic_src_components(nir_intrinsic_instr *intr, unsigned srcn)
+nir_intrinsic_src_components(const nir_intrinsic_instr *intr, unsigned srcn)
  {
     const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
     assert(srcn < info->num_srcs);
@@ -1743,6 +1763,33 @@ nir_intrinsic_dest_components(nir_intrinsic_instr *intr)
        return intr->num_components;
  }
  
+/**
+ * Helper to copy const_index[] from src to dst, without assuming they
+ * match in order.
+ */
+static inline void
+nir_intrinsic_copy_const_indices(nir_intrinsic_instr *dst, nir_intrinsic_instr *src)
+{
+   if (src->intrinsic == dst->intrinsic) {
+      memcpy(dst->const_index, src->const_index, sizeof(dst->const_index));
+      return;
+   }
+
+   const nir_intrinsic_info *src_info = &nir_intrinsic_infos[src->intrinsic];
+   const nir_intrinsic_info *dst_info = &nir_intrinsic_infos[dst->intrinsic];
+
+   for (unsigned i = 0; i < NIR_INTRINSIC_NUM_INDEX_FLAGS; i++) {
+      if (src_info->index_map[i] == 0)
+         continue;
+
+      /* require that dst instruction also uses the same const_index[]: */
+      assert(dst_info->index_map[i] > 0);
+
+      dst->const_index[dst_info->index_map[i] - 1] =
+            src->const_index[src_info->index_map[i] - 1];
+   }
+}
+
  #define INTRINSIC_IDX_ACCESSORS(name, flag, type)                             \
  static inline type                                                            \
  nir_intrinsic_##name(const nir_intrinsic_instr *instr)                        \
@@ -1949,6 +1996,30 @@ typedef struct {
     unsigned sampler_index;
  } nir_tex_instr;
  
+/*
+ * Returns true if the texture operation requires a sampler as a general rule,
+ * see the documentation of sampler_index.
+ *
+ * Note that the specific hw/driver backend could require to a sampler
+ * object/configuration packet in any case, for some other reason.
+ */
+static inline bool
+nir_tex_instr_need_sampler(const nir_tex_instr *instr)
+{
+   switch (instr->op) {
+   case nir_texop_txf:
+   case nir_texop_txf_ms:
+   case nir_texop_txs:
+   case nir_texop_lod:
+   case nir_texop_query_levels:
+   case nir_texop_texture_samples:
+   case nir_texop_samples_identical:
+      return false;
+   default:
+      return true;
+   }
+}
+
  static inline unsigned
  nir_tex_instr_dest_size(const nir_tex_instr *instr)
  {
@@ -2152,8 +2223,26 @@ typedef struct {
  } nir_load_const_instr;
  
  typedef enum {
+   /** Return from a function
+    *
+    * This instruction is a classic function return.  It jumps to
+    * nir_function_impl::end_block.  No return value is provided in this
+    * instruction.  Instead, the function is expected to write any return
+    * data to a deref passed in from the caller.
+    */
     nir_jump_return,
+
+   /** Break out of the inner-most loop
+    *
+    * This has the same semantics as C's "break" statement.
+    */
     nir_jump_break,
+
+   /** Jump back to the top of the inner-most loop
+    *
+    * This has the same semantics as C's "continue" statement assuming that a
+    * NIR loop is implemented as "while (1) { body }".
+    */
     nir_jump_continue,
  } nir_jump_type;
  
@@ -2545,10 +2634,61 @@ typedef struct {
   */
  typedef enum {
     nir_metadata_none = 0x0,
+
+   /** Indicates that nir_block::index values are valid.
+    *
+    * The start block has index 0 and they increase through a natural walk of
+    * the CFG.  nir_function_impl::num_blocks is the number of blocks and
+    * every block index is in the range [0, nir_function_impl::num_blocks].
+    *
+    * A pass can preserve this metadata type if it doesn't touch the CFG.
+    */
     nir_metadata_block_index = 0x1,
+
+   /** Indicates that block dominance information is valid
+    *
+    * This includes:
+    *
+    *   - nir_block::num_dom_children
+    *   - nir_block::dom_children
+    *   - nir_block::dom_frontier
+    *   - nir_block::dom_pre_index
+    *   - nir_block::dom_post_index
+    *
+    * A pass can preserve this metadata type if it doesn't touch the CFG.
+    */
     nir_metadata_dominance = 0x2,
+
+   /** Indicates that SSA def data-flow liveness information is valid
+    *
+    * This includes:
+    *
+    *   - nir_ssa_def::live_index
+    *   - nir_block::live_in
+    *   - nir_block::live_out
+    *
+    * A pass can preserve this metadata type if it never adds or removes any
+    * SSA defs (most passes shouldn't preserve this metadata type).
+    */
     nir_metadata_live_ssa_defs = 0x4,
+
+   /** A dummy metadata value to track when a pass forgot to call
+    * nir_metadata_preserve.
+    *
+    * A pass should always clear this value even if it doesn't make any
+    * progress to indicate that it thought about preserving metadata.
+    */
     nir_metadata_not_properly_reset = 0x8,
+
+   /** Indicates that loop analysis information is valid.
+    *
+    * This includes everything pointed to by nir_loop::info.
+    *
+    * A pass can preserve this metadata type if it is guaranteed to not affect
+    * any loop metadata.  However, since loop metadata includes things like
+    * loop counts which depend on arithmetic in the loop, this is very hard to
+    * determine.  Most passes shouldn't preserve this metadata type.
+    */
     nir_metadata_loop_analysis = 0x10,
  } nir_metadata;
  
@@ -2839,18 +2979,18 @@ typedef struct nir_shader_compiler_options {
     bool lower_ldexp;
  
     bool lower_pack_half_2x16;
-   bool lower_pack_half_2x16_split;
     bool lower_pack_unorm_2x16;
     bool lower_pack_snorm_2x16;
     bool lower_pack_unorm_4x8;
     bool lower_pack_snorm_4x8;
     bool lower_unpack_half_2x16;
-   bool lower_unpack_half_2x16_split;
     bool lower_unpack_unorm_2x16;
     bool lower_unpack_snorm_2x16;
     bool lower_unpack_unorm_4x8;
     bool lower_unpack_snorm_4x8;
  
+   bool lower_pack_split;
+
     bool lower_extract_byte;
     bool lower_extract_word;
  
@@ -2972,6 +3112,14 @@ typedef struct nir_shader_compiler_options {
      */
     bool has_imul24;
  
+   /** Backend supports umul24, if not set  umul24 will automatically be lowered
+    * to imul with masked inputs */
+   bool has_umul24;
+
+   /** Backend supports umad24, if not set  umad24 will automatically be lowered
+    * to imul with masked inputs and iadd */
+   bool has_umad24;
+
     /* Whether to generate only scoped_memory_barrier intrinsics instead of the
      * set of memory barrier intrinsics based on GLSL.
      */
@@ -2987,6 +3135,15 @@ typedef struct nir_shader_compiler_options {
      */
     bool intel_vec4;
  
+   /** Lower nir_op_ibfe and nir_op_ubfe that have two constant sources. */
+   bool lower_bfe_with_two_constants;
+
+   /** Whether 8-bit ALU is supported. */
+   bool support_8bit_alu;
+
+   /** Whether 16-bit ALU is supported. */
+   bool support_16bit_alu;
+
     unsigned max_unroll_iterations;
  
     nir_lower_int64_options lower_int64_options;
@@ -3397,6 +3554,9 @@ bool nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb,
                           void *state);
  bool nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state);
  bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state);
+bool nir_foreach_phi_src_leaving_block(nir_block *instr,
+                                       nir_foreach_src_cb cb,
+                                       void *state);
  
  nir_const_value *nir_src_as_const_value(nir_src src);
  
@@ -3771,6 +3931,15 @@ void nir_assign_io_var_locations(struct exec_list *var_list,
                                   unsigned *size,
                                   gl_shader_stage stage);
  
+typedef struct {
+   uint8_t num_linked_io_vars;
+   uint8_t num_linked_patch_io_vars;
+} nir_linked_io_var_info;
+
+nir_linked_io_var_info
+nir_assign_linked_io_var_locations(nir_shader *producer,
+                                   nir_shader *consumer);
+
  typedef enum {
     /* If set, this causes all 64-bit IO operations to be lowered on-the-fly
      * to 32-bit operations.  This is only valid for nir_var_shader_in/out
@@ -4190,6 +4359,8 @@ bool nir_lower_wpos_ytransform(nir_shader *shader,
                                 const nir_lower_wpos_ytransform_options *options);
  bool nir_lower_wpos_center(nir_shader *shader, const bool for_sample_shading);
  
+bool nir_lower_wrmasks(nir_shader *shader, nir_instr_filter_cb cb, const void *data);
+
  bool nir_lower_fb_read(nir_shader *shader);
  
  typedef struct nir_lower_drawpixels_options {
@@ -4240,6 +4411,8 @@ bool nir_lower_doubles(nir_shader *shader, const nir_shader *softfp64,
                         nir_lower_doubles_options options);
  bool nir_lower_pack(nir_shader *shader);
  
+void nir_lower_mediump_outputs(nir_shader *nir);
+
  bool nir_lower_point_size(nir_shader *shader, float min, float max);
  
  typedef enum {
@@ -4269,7 +4442,7 @@ bool nir_repair_ssa(nir_shader *shader);
  
  void nir_convert_loop_to_lcssa(nir_loop *loop);
  bool nir_convert_to_lcssa(nir_shader *shader, bool skip_invariants, bool skip_bool_invariants);
-bool* nir_divergence_analysis(nir_shader *shader, nir_divergence_options options);
+void nir_divergence_analysis(nir_shader *shader, nir_divergence_options options);
  
  /* If phi_webs_only is true, only convert SSA values involved in phi nodes to
   * registers.  If false, convert all values (even those not involved in a phi
@@ -4377,7 +4550,8 @@ typedef bool (*nir_should_vectorize_mem_func)(unsigned align, unsigned bit_size,
                                                nir_intrinsic_instr *low, nir_intrinsic_instr *high);
  
  bool nir_opt_load_store_vectorize(nir_shader *shader, nir_variable_mode modes,
-                                  nir_should_vectorize_mem_func callback);
+                                  nir_should_vectorize_mem_func callback,
+                                  nir_variable_mode robust_modes);
  
  void nir_schedule(nir_shader *shader, int threshold);