From: Rhys Perry <pendingchaos02@gmail.com>
Date: Fri, 17 May 2019 10:53:32 +0000 (+0100)
Subject: nir/lower_io_to_vector: allow FS outputs to be vectorized
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=300e758b7c428289909b318bb0df230b22ab5d9b;p=mesa.git

nir/lower_io_to_vector: allow FS outputs to be vectorized

v2: handle dual-source blending
v3: use a higher MAX_SLOTS

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
---

diff --git a/src/compiler/nir/nir_lower_io_to_vector.c b/src/compiler/nir/nir_lower_io_to_vector.c
index abc89fb2f49..896b9504868 100644
--- a/src/compiler/nir/nir_lower_io_to_vector.c
+++ b/src/compiler/nir/nir_lower_io_to_vector.c
@@ -34,6 +34,19 @@
  * when all is said and done.
  */
 
+/* FRAG_RESULT_MAX+1 instead of just FRAG_RESULT_MAX because of how this pass
+ * handles dual source blending */
+#define MAX_SLOTS MAX2(VARYING_SLOT_TESS_MAX, FRAG_RESULT_MAX+1)
+
+static unsigned
+get_slot(const nir_variable *var)
+{
+   /* This handling of dual-source blending might not be correct when more than
+    * one render target is supported, but it seems no driver supports more than
+    * one. */
+   return var->data.location + var->data.index;
+}
+
 static const struct glsl_type *
 resize_array_vec_type(const struct glsl_type *type, unsigned num_components)
 {
@@ -50,10 +63,6 @@ resize_array_vec_type(const struct glsl_type *type, unsigned num_components)
 static bool
 variable_can_rewrite(const nir_variable *var)
 {
-   /* Only touch user defined varyings as these are the only ones we split */
-   if (var->data.location < VARYING_SLOT_VAR0)
-      return false;
-
    /* Skip complex types we don't split in the first place */
    if (!glsl_type_is_vector_or_scalar(glsl_without_array(var->type)))
       return false;
@@ -97,22 +106,26 @@ variables_can_merge(nir_shader *shader,
        a->data.interpolation != b->data.interpolation)
       return false;
 
+   if (shader->info.stage == MESA_SHADER_FRAGMENT &&
+       a->data.mode == nir_var_shader_out &&
+       a->data.index != b->data.index)
+      return false;
+
    return true;
 }
 
 static bool
 create_new_io_vars(nir_shader *shader, struct exec_list *io_list,
-                   nir_variable *old_vars[MAX_VARYINGS_INCL_PATCH][4],
-                   nir_variable *new_vars[MAX_VARYINGS_INCL_PATCH][4])
+                   nir_variable *old_vars[MAX_SLOTS][4],
+                   nir_variable *new_vars[MAX_SLOTS][4])
 {
    if (exec_list_is_empty(io_list))
       return false;
 
    nir_foreach_variable(var, io_list) {
       if (variable_can_rewrite(var)) {
-         unsigned loc = var->data.location - VARYING_SLOT_VAR0;
          unsigned frac = var->data.location_frac;
-         old_vars[loc][frac] = var;
+         old_vars[get_slot(var)][frac] = var;
       }
    }
 
@@ -121,7 +134,7 @@ create_new_io_vars(nir_shader *shader, struct exec_list *io_list,
    /* We don't handle combining vars of different type e.g. different array
     * lengths.
     */
-   for (unsigned loc = 0; loc < MAX_VARYINGS_INCL_PATCH; loc++) {
+   for (unsigned loc = 0; loc < MAX_SLOTS; loc++) {
       unsigned frac = 0;
       while (frac < 4) {
          nir_variable *first_var = old_vars[loc][frac];
@@ -197,10 +210,10 @@ nir_lower_io_to_vector_impl(nir_function_impl *impl, nir_variable_mode modes)
    nir_metadata_require(impl, nir_metadata_dominance);
 
    nir_shader *shader = impl->function->shader;
-   nir_variable *old_inputs[MAX_VARYINGS_INCL_PATCH][4] = {{0}};
-   nir_variable *new_inputs[MAX_VARYINGS_INCL_PATCH][4] = {{0}};
-   nir_variable *old_outputs[MAX_VARYINGS_INCL_PATCH][4] = {{0}};
-   nir_variable *new_outputs[MAX_VARYINGS_INCL_PATCH][4] = {{0}};
+   nir_variable *old_inputs[MAX_SLOTS][4] = {{0}};
+   nir_variable *new_inputs[MAX_SLOTS][4] = {{0}};
+   nir_variable *old_outputs[MAX_SLOTS][4] = {{0}};
+   nir_variable *new_outputs[MAX_SLOTS][4] = {{0}};
 
    if (modes & nir_var_shader_in) {
       /* Vertex shaders support overlapping inputs.  We don't do those */
@@ -215,11 +228,6 @@ nir_lower_io_to_vector_impl(nir_function_impl *impl, nir_variable_mode modes)
    }
 
    if (modes & nir_var_shader_out) {
-      /* Fragment shader outputs are always vec4.  You shouldn't have
-       * scalarized them and it doesn't make sense to vectorize them.
-       */
-      assert(b.shader->info.stage != MESA_SHADER_FRAGMENT);
-
       /* If we don't actually merge any variables, remove that bit from modes
        * so we don't bother doing extra non-work.
        */
@@ -256,13 +264,12 @@ nir_lower_io_to_vector_impl(nir_function_impl *impl, nir_variable_mode modes)
                break;
 
             if (old_deref->mode == nir_var_shader_out)
-               assert(b.shader->info.stage == MESA_SHADER_TESS_CTRL);
+               assert(b.shader->info.stage == MESA_SHADER_TESS_CTRL ||
+                      b.shader->info.stage == MESA_SHADER_FRAGMENT);
 
             nir_variable *old_var = nir_deref_instr_get_variable(old_deref);
-            if (old_var->data.location < VARYING_SLOT_VAR0)
-               break;
 
-            const unsigned loc = old_var->data.location - VARYING_SLOT_VAR0;
+            const unsigned loc = get_slot(old_var);
             const unsigned old_frac = old_var->data.location_frac;
             nir_variable *new_var = old_deref->mode == nir_var_shader_in ?
                                     new_inputs[loc][old_frac] :
@@ -270,7 +277,7 @@ nir_lower_io_to_vector_impl(nir_function_impl *impl, nir_variable_mode modes)
             if (!new_var)
                break;
 
-            assert(new_var->data.location == VARYING_SLOT_VAR0 + loc);
+            assert(get_slot(new_var) == loc);
             const unsigned new_frac = new_var->data.location_frac;
 
             nir_component_mask_t vec4_comp_mask =
@@ -309,16 +316,14 @@ nir_lower_io_to_vector_impl(nir_function_impl *impl, nir_variable_mode modes)
                break;
 
             nir_variable *old_var = nir_deref_instr_get_variable(old_deref);
-            if (old_var->data.location < VARYING_SLOT_VAR0)
-               break;
 
-            const unsigned loc = old_var->data.location - VARYING_SLOT_VAR0;
+            const unsigned loc = get_slot(old_var);
             const unsigned old_frac = old_var->data.location_frac;
             nir_variable *new_var = new_outputs[loc][old_frac];
             if (!new_var)
                break;
 
-            assert(new_var->data.location == VARYING_SLOT_VAR0 + loc);
+            assert(get_slot(new_var) == loc);
             const unsigned new_frac = new_var->data.location_frac;
 
             b.cursor = nir_before_instr(&intrin->instr);
diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h
index 1a0ec5fb12a..0f402f42741 100644
--- a/src/compiler/shader_enums.h
+++ b/src/compiler/shader_enums.h
@@ -27,6 +27,7 @@
 #define SHADER_ENUMS_H
 
 #include <stdbool.h>
+#include "mesa/main/config.h"
 
 #ifdef __cplusplus
 extern "C" {