nir: fix a bug in is_dual_slot in nir_io_add_const_offset_to_base
[mesa.git] / src / compiler / nir / nir_lower_io_arrays_to_elements.c
index cdf9a76a881edf6bb8e36495d9a89af0c7d1b794..e49abefc0d46d7322a3b72b4e14a36c88b1e8f31 100644 (file)
@@ -23,6 +23,7 @@
 
 #include "nir.h"
 #include "nir_builder.h"
+#include "nir_deref.h"
 
 /** @file nir_lower_io_arrays_to_elements.c
  *
  */
 
 static unsigned
-get_io_offset(nir_builder *b, nir_deref_var *deref, nir_variable *var,
-              unsigned *element_index)
+get_io_offset(nir_builder *b, nir_deref_instr *deref, nir_variable *var,
+              unsigned *element_index, unsigned *xfb_offset,
+              nir_ssa_def **vertex_index)
 {
-   bool vs_in = (b->shader->info.stage == MESA_SHADER_VERTEX) &&
-                (var->data.mode == nir_var_shader_in);
+   nir_deref_path path;
+   nir_deref_path_init(&path, deref, NULL);
 
-   nir_deref *tail = &deref->deref;
+   assert(path.path[0]->deref_type == nir_deref_type_var);
+   nir_deref_instr **p = &path.path[1];
 
    /* For per-vertex input arrays (i.e. geometry shader inputs), skip the
     * outermost array index.  Process the rest normally.
     */
    if (nir_is_per_vertex_io(var, b->shader->info.stage)) {
-      tail = tail->child;
+      *vertex_index = nir_ssa_for_src(b, (*p)->arr.index, 1);
+      p++;
    }
 
    unsigned offset = 0;
-   while (tail->child != NULL) {
-      tail = tail->child;
+   *xfb_offset = 0;
+   for (; *p; p++) {
+      if ((*p)->deref_type == nir_deref_type_array) {
+         /* must not be indirect dereference */
+         unsigned index = nir_src_as_uint((*p)->arr.index);
 
-      if (tail->deref_type == nir_deref_type_array) {
-         nir_deref_array *deref_array = nir_deref_as_array(tail);
-         assert(deref_array->deref_array_type != nir_deref_array_type_indirect);
+         unsigned size = glsl_count_attribute_slots((*p)->type, false);
+         offset += size * index;
 
-         unsigned size = glsl_count_attribute_slots(tail->type, vs_in);
-         offset += size * deref_array->base_offset;
+         xfb_offset += index * glsl_get_component_slots((*p)->type) * 4;
 
-         unsigned num_elements = glsl_type_is_array(tail->type) ?
-            glsl_get_aoa_size(tail->type) : 1;
+         unsigned num_elements = glsl_type_is_array((*p)->type) ?
+            glsl_get_aoa_size((*p)->type) : 1;
 
-         num_elements *= glsl_type_is_matrix(glsl_without_array(tail->type)) ?
-            glsl_get_matrix_columns(glsl_without_array(tail->type)) : 1;
+         num_elements *= glsl_type_is_matrix(glsl_without_array((*p)->type)) ?
+            glsl_get_matrix_columns(glsl_without_array((*p)->type)) : 1;
 
-         *element_index += num_elements * deref_array->base_offset;
-      } else if (tail->deref_type == nir_deref_type_struct) {
+         *element_index += num_elements * index;
+      } else if ((*p)->deref_type == nir_deref_type_struct) {
          /* TODO: we could also add struct splitting support to this pass */
          break;
       }
    }
 
+   nir_deref_path_finish(&path);
+
    return offset;
 }
 
@@ -102,27 +109,6 @@ get_array_elements(struct hash_table *ht, nir_variable *var,
    return elements;
 }
 
-static void
-create_array_deref(nir_intrinsic_instr *arr_intr,
-                   nir_intrinsic_instr *element_intr)
-{
-   assert(arr_intr->variables[0]->deref.child);
-
-   nir_deref *parent = &element_intr->variables[0]->deref;
-   nir_deref_array *darr =
-            nir_deref_as_array(arr_intr->variables[0]->deref.child);
-   nir_deref_array *ndarr = nir_deref_array_create(parent);
-
-   ndarr->deref.type = glsl_get_array_element(parent->type);
-   ndarr->deref_array_type = darr->deref_array_type;
-   ndarr->base_offset = darr->base_offset;
-
-   if (ndarr->deref_array_type == nir_deref_array_type_indirect)
-      nir_src_copy(&ndarr->indirect, &darr->indirect, parent);
-
-   element_intr->variables[0]->deref.child = &ndarr->deref;
-}
-
 static void
 lower_array(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var,
             struct hash_table *varyings)
@@ -132,26 +118,30 @@ lower_array(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var,
    nir_variable **elements =
       get_array_elements(varyings, var, b->shader->info.stage);
 
+   nir_ssa_def *vertex_index = NULL;
    unsigned elements_index = 0;
-   unsigned io_offset = get_io_offset(b, intr->variables[0], var,
-                                      &elements_index);
+   unsigned xfb_offset = 0;
+   unsigned io_offset = get_io_offset(b, nir_src_as_deref(intr->src[0]),
+                                      var, &elements_index, &xfb_offset,
+                                      &vertex_index);
 
    nir_variable *element = elements[elements_index];
    if (!element) {
          element = nir_variable_clone(var, b->shader);
          element->data.location =  var->data.location + io_offset;
 
+         if (var->data.explicit_offset)
+            element->data.offset = var->data.offset + xfb_offset;
+
          const struct glsl_type *type = glsl_without_array(element->type);
 
          /* This pass also splits matrices so we need give them a new type. */
-         if (glsl_type_is_matrix(type)) {
-            type = glsl_vector_type(glsl_get_base_type(type),
-                                    glsl_get_vector_elements(type));
-         }
+         if (glsl_type_is_matrix(type))
+            type = glsl_get_column_type(type);
 
          if (nir_is_per_vertex_io(var, b->shader->info.stage)) {
-            type = glsl_get_array_instance(type,
-                                           glsl_get_length(element->type));
+            type = glsl_array_type(type, glsl_get_length(element->type),
+                                   glsl_get_explicit_stride(element->type));
          }
 
          element->type = type;
@@ -160,18 +150,26 @@ lower_array(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var,
          nir_shader_add_variable(b->shader, element);
    }
 
+   nir_deref_instr *element_deref = nir_build_deref_var(b, element);
+
+   if (nir_is_per_vertex_io(var, b->shader->info.stage)) {
+      assert(vertex_index);
+      element_deref = nir_build_deref_array(b, element_deref, vertex_index);
+   }
+
    nir_intrinsic_instr *element_intr =
       nir_intrinsic_instr_create(b->shader, intr->intrinsic);
    element_intr->num_components = intr->num_components;
-   element_intr->variables[0] = nir_deref_var_create(element_intr, element);
+   element_intr->src[0] = nir_src_for_ssa(&element_deref->dest.ssa);
 
-   if (intr->intrinsic != nir_intrinsic_store_var) {
+   if (intr->intrinsic != nir_intrinsic_store_deref) {
       nir_ssa_dest_init(&element_intr->instr, &element_intr->dest,
                         intr->num_components, intr->dest.ssa.bit_size, NULL);
 
-      if (intr->intrinsic == nir_intrinsic_interp_var_at_offset ||
-          intr->intrinsic == nir_intrinsic_interp_var_at_sample) {
-         nir_src_copy(&element_intr->src[0], &intr->src[0],
+      if (intr->intrinsic == nir_intrinsic_interp_deref_at_offset ||
+          intr->intrinsic == nir_intrinsic_interp_deref_at_sample ||
+          intr->intrinsic == nir_intrinsic_interp_deref_at_vertex) {
+         nir_src_copy(&element_intr->src[1], &intr->src[1],
                       &element_intr->instr);
       }
 
@@ -180,14 +178,10 @@ lower_array(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var,
    } else {
       nir_intrinsic_set_write_mask(element_intr,
                                    nir_intrinsic_write_mask(intr));
-      nir_src_copy(&element_intr->src[0], &intr->src[0],
+      nir_src_copy(&element_intr->src[1], &intr->src[1],
                    &element_intr->instr);
    }
 
-   if (nir_is_per_vertex_io(var, b->shader->info.stage)) {
-      create_array_deref(intr, element_intr);
-   }
-
    nir_builder_instr_insert(b, &element_intr->instr);
 
    /* Remove the old load intrinsic */
@@ -195,20 +189,20 @@ lower_array(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var,
 }
 
 static bool
-deref_has_indirect(nir_builder *b, nir_variable *var, nir_deref_var *deref)
+deref_has_indirect(nir_builder *b, nir_variable *var, nir_deref_path *path)
 {
-   nir_deref *tail = &deref->deref;
+   assert(path->path[0]->deref_type == nir_deref_type_var);
+   nir_deref_instr **p = &path->path[1];
 
    if (nir_is_per_vertex_io(var, b->shader->info.stage)) {
-      tail = tail->child;
+      p++;
    }
 
-   for (tail = tail->child; tail; tail = tail->child) {
-      if (tail->deref_type != nir_deref_type_array)
+   for (; *p; p++) {
+      if ((*p)->deref_type != nir_deref_type_array)
          continue;
 
-      nir_deref_array *arr = nir_deref_as_array(tail);
-      if (arr->deref_array_type == nir_deref_array_type_indirect)
+      if (!nir_src_is_const((*p)->arr.index))
          return true;
    }
 
@@ -219,8 +213,8 @@ deref_has_indirect(nir_builder *b, nir_variable *var, nir_deref_var *deref)
  * indirect indexing.
  */
 static void
-create_indirects_mask(nir_shader *shader, uint64_t *indirects,
-                      uint64_t *patch_indirects, nir_variable_mode mode)
+create_indirects_mask(nir_shader *shader,
+                      BITSET_WORD *indirects, nir_variable_mode mode)
 {
    nir_foreach_function(function, shader) {
       if (function->impl) {
@@ -235,26 +229,28 @@ create_indirects_mask(nir_shader *shader, uint64_t *indirects,
 
                nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 
-               if (intr->intrinsic != nir_intrinsic_load_var &&
-                   intr->intrinsic != nir_intrinsic_store_var &&
-                   intr->intrinsic != nir_intrinsic_interp_var_at_centroid &&
-                   intr->intrinsic != nir_intrinsic_interp_var_at_sample &&
-                   intr->intrinsic != nir_intrinsic_interp_var_at_offset)
+               if (intr->intrinsic != nir_intrinsic_load_deref &&
+                   intr->intrinsic != nir_intrinsic_store_deref &&
+                   intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
+                   intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
+                   intr->intrinsic != nir_intrinsic_interp_deref_at_offset &&
+                   intr->intrinsic != nir_intrinsic_interp_deref_at_vertex)
                   continue;
 
-               nir_variable *var = intr->variables[0]->var;
-
-               if (var->data.mode != mode)
+               nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
+               if (deref->mode != mode)
                   continue;
 
-               uint64_t loc_mask = ((uint64_t)1) << var->data.location;
-               if (var->data.patch) {
-                  if (deref_has_indirect(&b, var, intr->variables[0]))
-                     patch_indirects[var->data.location_frac] |= loc_mask;
-               } else {
-                  if (deref_has_indirect(&b, var, intr->variables[0]))
-                     indirects[var->data.location_frac] |= loc_mask;
-               }
+               nir_variable *var = nir_deref_instr_get_variable(deref);
+
+               nir_deref_path path;
+               nir_deref_path_init(&path, deref, NULL);
+
+               int loc = var->data.location * 4 + var->data.location_frac;
+               if (deref_has_indirect(&b, var, &path))
+                  BITSET_SET(indirects, loc);
+
+               nir_deref_path_finish(&path);
             }
          }
       }
@@ -263,7 +259,7 @@ create_indirects_mask(nir_shader *shader, uint64_t *indirects,
 
 static void
 lower_io_arrays_to_elements(nir_shader *shader, nir_variable_mode mask,
-                            uint64_t *indirects, uint64_t *patch_indirects,
+                            BITSET_WORD *indirects,
                             struct hash_table *varyings,
                             bool after_cross_stage_opts)
 {
@@ -279,24 +275,32 @@ lower_io_arrays_to_elements(nir_shader *shader, nir_variable_mode mask,
 
                nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
 
-               if (intr->intrinsic != nir_intrinsic_load_var &&
-                   intr->intrinsic != nir_intrinsic_store_var &&
-                   intr->intrinsic != nir_intrinsic_interp_var_at_centroid &&
-                   intr->intrinsic != nir_intrinsic_interp_var_at_sample &&
-                   intr->intrinsic != nir_intrinsic_interp_var_at_offset)
+               if (intr->intrinsic != nir_intrinsic_load_deref &&
+                   intr->intrinsic != nir_intrinsic_store_deref &&
+                   intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
+                   intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
+                   intr->intrinsic != nir_intrinsic_interp_deref_at_offset &&
+                   intr->intrinsic != nir_intrinsic_interp_deref_at_vertex)
+                  continue;
+
+               nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
+               if (!(deref->mode & mask))
                   continue;
 
-               nir_variable *var = intr->variables[0]->var;
+               nir_variable *var = nir_deref_instr_get_variable(deref);
+
+               /* Drivers assume compact arrays are, in fact, arrays. */
+               if (var->data.compact)
+                  continue;
+
+               /* Per-view variables are expected to remain arrays. */
+               if (var->data.per_view)
+                  continue;
 
                /* Skip indirects */
-               uint64_t loc_mask = ((uint64_t)1) << var->data.location;
-               if (var->data.patch) {
-                  if (patch_indirects[var->data.location_frac] & loc_mask)
-                     continue;
-               } else {
-                  if (indirects[var->data.location_frac] & loc_mask)
-                     continue;
-               }
+               int loc = var->data.location * 4 + var->data.location_frac;
+               if (BITSET_TEST(indirects, loc))
+                  continue;
 
                nir_variable_mode mode = var->data.mode;
 
@@ -311,7 +315,7 @@ lower_io_arrays_to_elements(nir_shader *shader, nir_variable_mode mask,
                 * TODO: Add support for struct splitting.
                 */
                if ((!glsl_type_is_array(type) && !glsl_type_is_matrix(type))||
-                   glsl_type_is_struct(glsl_without_array(type)))
+                   glsl_type_is_struct_or_ifc(glsl_without_array(type)))
                   continue;
 
                /* Skip builtins */
@@ -327,11 +331,12 @@ lower_io_arrays_to_elements(nir_shader *shader, nir_variable_mode mask,
                   continue;
 
                switch (intr->intrinsic) {
-               case nir_intrinsic_interp_var_at_centroid:
-               case nir_intrinsic_interp_var_at_sample:
-               case nir_intrinsic_interp_var_at_offset:
-               case nir_intrinsic_load_var:
-               case nir_intrinsic_store_var:
+               case nir_intrinsic_interp_deref_at_centroid:
+               case nir_intrinsic_interp_deref_at_sample:
+               case nir_intrinsic_interp_deref_at_offset:
+               case nir_intrinsic_interp_deref_at_vertex:
+               case nir_intrinsic_load_deref:
+               case nir_intrinsic_store_deref:
                   if ((mask & nir_var_shader_in && mode == nir_var_shader_in) ||
                       (mask & nir_var_shader_out && mode == nir_var_shader_out))
                      lower_array(&b, intr, var, varyings);
@@ -346,30 +351,28 @@ lower_io_arrays_to_elements(nir_shader *shader, nir_variable_mode mask,
 }
 
 void
-nir_lower_io_arrays_to_elements_no_indirects(nir_shader *shader)
+nir_lower_io_arrays_to_elements_no_indirects(nir_shader *shader,
+                                             bool outputs_only)
 {
-   struct hash_table *split_inputs =
-      _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                              _mesa_key_pointer_equal);
-   struct hash_table *split_outputs =
-      _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                              _mesa_key_pointer_equal);
+   struct hash_table *split_inputs = _mesa_pointer_hash_table_create(NULL);
+   struct hash_table *split_outputs = _mesa_pointer_hash_table_create(NULL);
 
-   uint64_t indirects[4] = {0}, patch_indirects[4] = {0};
+   BITSET_DECLARE(indirects, 4 * VARYING_SLOT_TESS_MAX) = {0};
 
-   lower_io_arrays_to_elements(shader, nir_var_shader_out, indirects,
-                               patch_indirects, split_outputs, true);
+   lower_io_arrays_to_elements(shader, nir_var_shader_out,
+                               indirects, split_outputs, true);
 
-   lower_io_arrays_to_elements(shader, nir_var_shader_in, indirects,
-                               patch_indirects, split_inputs, true);
+   if (!outputs_only) {
+      lower_io_arrays_to_elements(shader, nir_var_shader_in,
+                                  indirects, split_inputs, true);
 
-   /* Remove old input from the shaders inputs list */
-   struct hash_entry *entry;
-   hash_table_foreach(split_inputs, entry) {
-      nir_variable *var = (nir_variable *) entry->key;
-      exec_node_remove(&var->node);
+      /* Remove old input from the shaders inputs list */
+      hash_table_foreach(split_inputs, entry) {
+         nir_variable *var = (nir_variable *) entry->key;
+         exec_node_remove(&var->node);
 
-      free(entry->data);
+         free(entry->data);
+      }
    }
 
    /* Remove old output from the shaders outputs list */
@@ -382,32 +385,28 @@ nir_lower_io_arrays_to_elements_no_indirects(nir_shader *shader)
 
    _mesa_hash_table_destroy(split_inputs, NULL);
    _mesa_hash_table_destroy(split_outputs, NULL);
+
+   nir_remove_dead_derefs(shader);
 }
 
 void
 nir_lower_io_arrays_to_elements(nir_shader *producer, nir_shader *consumer)
 {
-   struct hash_table *split_inputs =
-      _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                              _mesa_key_pointer_equal);
-   struct hash_table *split_outputs =
-      _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                              _mesa_key_pointer_equal);
+   struct hash_table *split_inputs = _mesa_pointer_hash_table_create(NULL);
+   struct hash_table *split_outputs = _mesa_pointer_hash_table_create(NULL);
 
-   uint64_t indirects[4] = {0}, patch_indirects[4] = {0};
-   create_indirects_mask(producer, indirects, patch_indirects,
-                         nir_var_shader_out);
-   create_indirects_mask(consumer, indirects, patch_indirects,
-                         nir_var_shader_in);
+   BITSET_DECLARE(indirects, 4 * VARYING_SLOT_TESS_MAX) = {0};
 
-   lower_io_arrays_to_elements(producer, nir_var_shader_out, indirects,
-                               patch_indirects, split_outputs, false);
+   create_indirects_mask(producer, indirects, nir_var_shader_out);
+   create_indirects_mask(consumer, indirects, nir_var_shader_in);
 
-   lower_io_arrays_to_elements(consumer, nir_var_shader_in, indirects,
-                               patch_indirects, split_inputs, false);
+   lower_io_arrays_to_elements(producer, nir_var_shader_out,
+                               indirects, split_outputs, false);
+
+   lower_io_arrays_to_elements(consumer, nir_var_shader_in,
+                               indirects, split_inputs, false);
 
    /* Remove old input from the shaders inputs list */
-   struct hash_entry *entry;
    hash_table_foreach(split_inputs, entry) {
       nir_variable *var = (nir_variable *) entry->key;
       exec_node_remove(&var->node);
@@ -425,4 +424,7 @@ nir_lower_io_arrays_to_elements(nir_shader *producer, nir_shader *consumer)
 
    _mesa_hash_table_destroy(split_inputs, NULL);
    _mesa_hash_table_destroy(split_outputs, NULL);
+
+   nir_remove_dead_derefs(producer);
+   nir_remove_dead_derefs(consumer);
 }