i965/fs: Add support for nir_intrinsic_shuffle

[mesa.git] / src / intel / compiler / brw_nir.c
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c

index 5ed36fe1bf7f2546948c25a01e6371aba6793cf2..dbad4a14b173026efae3256c8756f3b18e4edf19 100644 (file)
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -211,7 +211,6 @@ remap_patch_urb_offsets(nir_block *block, nir_builder *b,
  
  void
  brw_nir_lower_vs_inputs(nir_shader *nir,
-                        bool use_legacy_snorm_formula,
                          const uint8_t *vs_attrib_wa_flags)
  {
     /* Start with the location of the variable's base. */
@@ -230,8 +229,7 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
  
     add_const_offset_to_base(nir, nir_var_shader_in);
  
-   brw_nir_apply_attribute_workarounds(nir, use_legacy_snorm_formula,
-                                       vs_attrib_wa_flags);
+   brw_nir_apply_attribute_workarounds(nir, vs_attrib_wa_flags);
  
     /* The last step is to remap VERT_ATTRIB_* to actual registers */
  
@@ -505,14 +503,6 @@ brw_nir_lower_fs_outputs(nir_shader *nir)
     nir_lower_io(nir, nir_var_shader_out, type_size_dvec4, 0);
  }
  
-void
-brw_nir_lower_cs_shared(nir_shader *nir)
-{
-   nir_assign_var_locations(&nir->shared, &nir->num_shared,
-                            type_size_scalar_bytes);
-   nir_lower_io(nir, nir_var_shared, type_size_scalar_bytes, 0);
-}
-
  #define OPT(pass, ...) ({                                  \
     bool this_progress = false;                             \
     NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__);      \
@@ -521,18 +511,29 @@ brw_nir_lower_cs_shared(nir_shader *nir)
     this_progress;                                          \
  })
  
-nir_shader *
-brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
-                 bool is_scalar)
+static nir_variable_mode
+brw_nir_no_indirect_mask(const struct brw_compiler *compiler,
+                         gl_shader_stage stage)
  {
     nir_variable_mode indirect_mask = 0;
-   if (compiler->glsl_compiler_options[nir->info.stage].EmitNoIndirectInput)
+
+   if (compiler->glsl_compiler_options[stage].EmitNoIndirectInput)
        indirect_mask |= nir_var_shader_in;
-   if (compiler->glsl_compiler_options[nir->info.stage].EmitNoIndirectOutput)
+   if (compiler->glsl_compiler_options[stage].EmitNoIndirectOutput)
        indirect_mask |= nir_var_shader_out;
-   if (compiler->glsl_compiler_options[nir->info.stage].EmitNoIndirectTemp)
+   if (compiler->glsl_compiler_options[stage].EmitNoIndirectTemp)
        indirect_mask |= nir_var_local;
  
+   return indirect_mask;
+}
+
+nir_shader *
+brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
+                 bool is_scalar)
+{
+   nir_variable_mode indirect_mask =
+      brw_nir_no_indirect_mask(compiler, nir->info.stage);
+
     bool progress;
     do {
        progress = false;
@@ -625,6 +626,18 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir)
  
     OPT(nir_split_var_copies);
  
+   /* Run opt_algebraic before int64 lowering so we can hopefully get rid
+    * of some int64 instructions.
+    */
+   OPT(nir_opt_algebraic);
+
+   /* Lower int64 instructions before nir_optimize so that loop unrolling
+    * sees their actual cost.
+    */
+   nir_lower_int64(nir, nir_lower_imul64 |
+                        nir_lower_isign64 |
+                        nir_lower_divmod64);
+
     nir = brw_nir_optimize(nir, compiler, is_scalar);
  
     if (is_scalar) {
@@ -643,25 +656,16 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir)
        .lower_to_scalar = true,
        .lower_subgroup_masks = true,
        .lower_vote_trivial = !is_scalar,
+      .lower_shuffle = true,
     };
     OPT(nir_lower_subgroups, &subgroups_options);
  
     OPT(nir_lower_clip_cull_distance_arrays);
  
-   nir_variable_mode indirect_mask = 0;
-   if (compiler->glsl_compiler_options[nir->info.stage].EmitNoIndirectInput)
-      indirect_mask |= nir_var_shader_in;
-   if (compiler->glsl_compiler_options[nir->info.stage].EmitNoIndirectOutput)
-      indirect_mask |= nir_var_shader_out;
-   if (compiler->glsl_compiler_options[nir->info.stage].EmitNoIndirectTemp)
-      indirect_mask |= nir_var_local;
-
+   nir_variable_mode indirect_mask =
+      brw_nir_no_indirect_mask(compiler, nir->info.stage);
     nir_lower_indirect_derefs(nir, indirect_mask);
  
-   nir_lower_int64(nir, nir_lower_imul64 |
-                        nir_lower_isign64 |
-                        nir_lower_divmod64);
-
     /* Get rid of split copies */
     nir = brw_nir_optimize(nir, compiler, is_scalar);
  
@@ -670,6 +674,36 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir)
     return nir;
  }
  
+void
+brw_nir_link_shaders(const struct brw_compiler *compiler,
+                     nir_shader **producer, nir_shader **consumer)
+{
+   NIR_PASS_V(*producer, nir_remove_dead_variables, nir_var_shader_out);
+   NIR_PASS_V(*consumer, nir_remove_dead_variables, nir_var_shader_in);
+
+   if (nir_remove_unused_varyings(*producer, *consumer)) {
+      NIR_PASS_V(*producer, nir_lower_global_vars_to_local);
+      NIR_PASS_V(*consumer, nir_lower_global_vars_to_local);
+
+      /* The backend might not be able to handle indirects on
+       * temporaries so we need to lower indirects on any of the
+       * varyings we have demoted here.
+       */
+      NIR_PASS_V(*producer, nir_lower_indirect_derefs,
+                 brw_nir_no_indirect_mask(compiler, (*producer)->info.stage));
+      NIR_PASS_V(*consumer, nir_lower_indirect_derefs,
+                 brw_nir_no_indirect_mask(compiler, (*consumer)->info.stage));
+
+      const bool p_is_scalar =
+         compiler->scalar_stage[(*producer)->info.stage];
+      *producer = brw_nir_optimize(*producer, compiler, p_is_scalar);
+
+      const bool c_is_scalar =
+         compiler->scalar_stage[(*producer)->info.stage];
+      *consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar);
+   }
+}
+
  /* Prepare the given shader for codegen
   *
   * This function is intended to be called right before going into the actual
@@ -808,12 +842,18 @@ brw_type_for_nir_type(const struct gen_device_info *devinfo, nir_alu_type type)
     case nir_type_float:
     case nir_type_float32:
        return BRW_REGISTER_TYPE_F;
+   case nir_type_float16:
+      return BRW_REGISTER_TYPE_HF;
     case nir_type_float64:
        return BRW_REGISTER_TYPE_DF;
     case nir_type_int64:
        return devinfo->gen < 8 ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_Q;
     case nir_type_uint64:
        return devinfo->gen < 8 ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_UQ;
+   case nir_type_int16:
+      return BRW_REGISTER_TYPE_W;
+   case nir_type_uint16:
+      return BRW_REGISTER_TYPE_UW;
     default:
        unreachable("unknown type");
     }
@@ -832,6 +872,9 @@ brw_glsl_base_type_for_nir_type(nir_alu_type type)
     case nir_type_float32:
        return GLSL_TYPE_FLOAT;
  
+   case nir_type_float16:
+      return GLSL_TYPE_FLOAT16;
+
     case nir_type_float64:
        return GLSL_TYPE_DOUBLE;
  
@@ -843,6 +886,12 @@ brw_glsl_base_type_for_nir_type(nir_alu_type type)
     case nir_type_uint32:
        return GLSL_TYPE_UINT;
  
+   case nir_type_int16:
+      return GLSL_TYPE_INT16;
+
+   case nir_type_uint16:
+      return GLSL_TYPE_UINT16;
+
     default:
        unreachable("bad type");
     }