From 663b2e9a92f152c314f611526b8a16ff4c06249f Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Mon, 3 Oct 2016 20:32:22 -0700
Subject: [PATCH] nir: Add a "compact array" flag and IO lowering code.

Certain built-in arrays, such as gl_ClipDistance[], gl_CullDistance[],
gl_TessLevelInner[], and gl_TessLevelOuter[] are specified as scalar
arrays.  Normal scalar arrays are sparse - each array element usually
occupies a whole vec4 slot.  However, most hardware assumes these
built-in arrays are tightly packed.

The new var->data.compact flag indicates that a scalar array should
be tightly packed, so a float[4] array would take up a single vec4
slot, and a float[8] array would take up two slots.

They are still arrays, not vec4s, however.  nir_lower_io will generate
intrinsics using ARB_enhanced_layouts style component qualifiers.

v2: Add nir_validate code to enforce type restrictions.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
---
 src/compiler/glsl/glsl_to_nir.cpp            |  1 +
 src/compiler/nir/nir.h                       |  7 ++++
 src/compiler/nir/nir_gather_info.c           |  9 ++--
 src/compiler/nir/nir_lower_indirect_derefs.c |  8 +++-
 src/compiler/nir/nir_lower_io.c              | 44 ++++++++++++++------
 src/compiler/nir/nir_print.c                 |  3 +-
 src/compiler/nir/nir_validate.c              | 13 ++++++
 7 files changed, 67 insertions(+), 18 deletions(-)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp
index 90fdd694007..628f8de14b3 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -329,6 +329,7 @@ nir_visitor::visit(ir_variable *ir)
    var->data.explicit_index = ir->data.explicit_index;
    var->data.explicit_binding = ir->data.explicit_binding;
    var->data.has_initializer = ir->data.has_initializer;
+   var->data.compact = false;
    var->data.location_frac = ir->data.location_frac;
 
    switch (ir->data.depth_layout) {
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 3d463840793..68d62d98d86 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -229,6 +229,13 @@ typedef struct nir_variable {
        */
       unsigned location_frac:2;
 
+      /**
+       * If true, this variable represents an array of scalars that should
+       * be tightly packed.  In other words, consecutive array elements
+       * should be stored one component apart, rather than one slot apart.
+       */
+      bool compact:1;
+
       /**
        * Whether this is a fragment shader output implicitly initialized with
        * the previous contents of the specified render target at the
diff --git a/src/compiler/nir/nir_gather_info.c b/src/compiler/nir/nir_gather_info.c
index 82452b439a6..07c99497146 100644
--- a/src/compiler/nir/nir_gather_info.c
+++ b/src/compiler/nir/nir_gather_info.c
@@ -94,8 +94,11 @@ mark_whole_variable(nir_shader *shader, nir_variable *var)
        var->data.mode == nir_var_shader_in)
       is_vertex_input = true;
 
-   set_io_mask(shader, var, 0,
-               glsl_count_attribute_slots(type, is_vertex_input));
+   const unsigned slots =
+      var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)
+                        : glsl_count_attribute_slots(type, is_vertex_input);
+
+   set_io_mask(shader, var, 0, slots);
 }
 
 static unsigned
@@ -150,7 +153,7 @@ try_mask_partial_io(nir_shader *shader, nir_deref_var *deref)
     * here marking the entire variable as used.
     */
    if (!(glsl_type_is_matrix(type) ||
-         (glsl_type_is_array(type) &&
+         (glsl_type_is_array(type) && !var->data.compact &&
           (glsl_type_is_numeric(glsl_without_array(type)) ||
            glsl_type_is_boolean(glsl_without_array(type)))))) {
 
diff --git a/src/compiler/nir/nir_lower_indirect_derefs.c b/src/compiler/nir/nir_lower_indirect_derefs.c
index 356373e2788..5c97dc8e5fe 100644
--- a/src/compiler/nir/nir_lower_indirect_derefs.c
+++ b/src/compiler/nir/nir_lower_indirect_derefs.c
@@ -175,8 +175,12 @@ lower_indirect_block(nir_block *block, nir_builder *b,
       if (!deref_has_indirect(intrin->variables[0]))
          continue;
 
-      /* Only lower variables whose mode is in the mask */
-      if (!(modes & intrin->variables[0]->var->data.mode))
+      /* Only lower variables whose mode is in the mask, or compact
+       * array variables.  (We can't handle indirects on tightly packed
+       * scalar arrays, so we need to lower them regardless.)
+       */
+      if (!(modes & intrin->variables[0]->var->data.mode) &&
+          !intrin->variables[0]->var->data.compact)
          continue;
 
       b->cursor = nir_before_instr(&intrin->instr);
diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c
index a7e7f148f13..66289477adb 100644
--- a/src/compiler/nir/nir_lower_io.c
+++ b/src/compiler/nir/nir_lower_io.c
@@ -88,7 +88,8 @@ nir_is_per_vertex_io(nir_variable *var, gl_shader_stage stage)
 static nir_ssa_def *
 get_io_offset(nir_builder *b, nir_deref_var *deref,
               nir_ssa_def **vertex_index,
-              int (*type_size)(const struct glsl_type *))
+              int (*type_size)(const struct glsl_type *),
+              unsigned *component)
 {
    nir_deref *tail = &deref->deref;
 
@@ -106,6 +107,19 @@ get_io_offset(nir_builder *b, nir_deref_var *deref,
       *vertex_index = vtx;
    }
 
+   if (deref->var->data.compact) {
+      assert(tail->child->deref_type == nir_deref_type_array);
+      assert(glsl_type_is_scalar(glsl_without_array(deref->var->type)));
+      nir_deref_array *deref_array = nir_deref_as_array(tail->child);
+      /* We always lower indirect dereferences for "compact" array vars. */
+      assert(deref_array->deref_array_type == nir_deref_array_type_direct);
+
+      const unsigned total_offset = *component + deref_array->base_offset;
+      const unsigned slot_offset = total_offset / 4;
+      *component = total_offset % 4;
+      return nir_imm_int(b, type_size(glsl_vec4_type()) * slot_offset);
+   }
+
    /* Just emit code and let constant-folding go to town */
    nir_ssa_def *offset = nir_imm_int(b, 0);
 
@@ -143,7 +157,8 @@ get_io_offset(nir_builder *b, nir_deref_var *deref,
 
 static nir_intrinsic_instr *
 lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
-           nir_ssa_def *vertex_index, nir_ssa_def *offset)
+           nir_ssa_def *vertex_index, nir_ssa_def *offset,
+           unsigned component)
 {
    const nir_shader *nir = state->builder.shader;
    nir_variable *var = intrin->variables[0]->var;
@@ -194,7 +209,7 @@ lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
 
    nir_intrinsic_set_base(load, var->data.driver_location);
    if (mode == nir_var_shader_in || mode == nir_var_shader_out)
-      nir_intrinsic_set_component(load, var->data.location_frac);
+      nir_intrinsic_set_component(load, component);
 
    if (load->intrinsic == nir_intrinsic_load_uniform)
       nir_intrinsic_set_range(load, state->type_size(var->type));
@@ -214,7 +229,8 @@ lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
 
 static nir_intrinsic_instr *
 lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
-            nir_ssa_def *vertex_index, nir_ssa_def *offset)
+            nir_ssa_def *vertex_index, nir_ssa_def *offset,
+            unsigned component)
 {
    nir_variable *var = intrin->variables[0]->var;
    nir_variable_mode mode = var->data.mode;
@@ -236,7 +252,7 @@ lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
    nir_intrinsic_set_base(store, var->data.driver_location);
 
    if (mode == nir_var_shader_out)
-      nir_intrinsic_set_component(store, var->data.location_frac);
+      nir_intrinsic_set_component(store, component);
 
    nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intrin));
 
@@ -289,7 +305,7 @@ lower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state,
 
 static nir_intrinsic_instr *
 lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
-                     nir_ssa_def *offset)
+                     nir_ssa_def *offset, unsigned component)
 {
    nir_variable *var = intrin->variables[0]->var;
 
@@ -297,7 +313,7 @@ lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
 
    /* Ignore interpolateAt() for flat variables - flat is flat. */
    if (var->data.interpolation == INTERP_MODE_FLAT)
-      return lower_load(intrin, state, NULL, offset);
+      return lower_load(intrin, state, NULL, offset, component);
 
    nir_intrinsic_op bary_op;
    switch (intrin->intrinsic) {
@@ -333,7 +349,7 @@ lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
    load->num_components = intrin->num_components;
 
    nir_intrinsic_set_base(load, var->data.driver_location);
-   nir_intrinsic_set_component(load, var->data.location_frac);
+   nir_intrinsic_set_component(load, component);
 
    load->src[0] = nir_src_for_ssa(&bary_setup->dest.ssa);
    load->src[1] = nir_src_for_ssa(offset);
@@ -398,20 +414,23 @@ nir_lower_io_block(nir_block *block,
 
       nir_ssa_def *offset;
       nir_ssa_def *vertex_index = NULL;
+      unsigned component_offset = var->data.location_frac;
 
       offset = get_io_offset(b, intrin->variables[0],
                              per_vertex ? &vertex_index : NULL,
-                             state->type_size);
+                             state->type_size, &component_offset);
 
       nir_intrinsic_instr *replacement;
 
       switch (intrin->intrinsic) {
       case nir_intrinsic_load_var:
-         replacement = lower_load(intrin, state, vertex_index, offset);
+         replacement = lower_load(intrin, state, vertex_index, offset,
+                                  component_offset);
          break;
 
       case nir_intrinsic_store_var:
-         replacement = lower_store(intrin, state, vertex_index, offset);
+         replacement = lower_store(intrin, state, vertex_index, offset,
+                                   component_offset);
          break;
 
       case nir_intrinsic_var_atomic_add:
@@ -432,7 +451,8 @@ nir_lower_io_block(nir_block *block,
       case nir_intrinsic_interp_var_at_sample:
       case nir_intrinsic_interp_var_at_offset:
          assert(vertex_index == NULL);
-         replacement = lower_interpolate_at(intrin, state, offset);
+         replacement = lower_interpolate_at(intrin, state, offset,
+                                            component_offset);
          break;
 
       default:
diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
index 242bffba472..ed0243506d5 100644
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@@ -432,7 +432,8 @@ print_var_decl(nir_variable *var, print_state *state)
          loc = buf;
       }
 
-      fprintf(fp, " (%s, %u)", loc, var->data.driver_location);
+      fprintf(fp, " (%s, %u)%s", loc, var->data.driver_location,
+              var->data.compact ? " compact" : "");
    }
 
    if (var->constant_initializer) {
diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c
index bd73f0437f2..15ba65f2b12 100644
--- a/src/compiler/nir/nir_validate.c
+++ b/src/compiler/nir/nir_validate.c
@@ -942,6 +942,19 @@ validate_var_decl(nir_variable *var, bool is_global, validate_state *state)
    /* Must have exactly one mode set */
    validate_assert(state, util_bitcount(var->data.mode) == 1);
 
+   if (var->data.compact) {
+      /* The "compact" flag is only valid on arrays of scalars. */
+      assert(glsl_type_is_array(var->type));
+
+      const struct glsl_type *type = glsl_get_array_element(var->type);
+      if (nir_is_per_vertex_io(var, state->shader->stage)) {
+         assert(glsl_type_is_array(type));
+         assert(glsl_type_is_scalar(glsl_get_array_element(type)));
+      } else {
+         assert(glsl_type_is_scalar(type));
+      }
+   }
+
    /*
     * TODO validate some things ir_validate.cpp does (requires more GLSL type
     * support)
-- 
2.30.2