glsl/linker: Pack within compound varyings.
authorPaul Berry <stereotype441@gmail.com>
Mon, 10 Dec 2012 04:59:26 +0000 (20:59 -0800)
committerPaul Berry <stereotype441@gmail.com>
Fri, 14 Dec 2012 18:51:18 +0000 (10:51 -0800)
This patch implements varying packing within varyings that are
composed of multiple vectors of size less than 4 (e.g. arrays of
vec2's, or matrices with height less than 4).

Previously, such varyings used up a full 4-wide varying slot for each
constituent vector, meaning that some of the components of each
varying slot went unused.  For example, a mat4x3 would be stored as
follows:

 <----slot1----> <----slot2----> <----slot3----> <----slot4---->  slots
  *   *   *   *   *   *   *   *   *   *   *   *   *   *   *   *
 <-column1->  x  <-column2->  x  <-column3->  x  <-column4->  x   matrix

(Each * represents a varying component, and the "x"s represent wasted
space).  In addition to wasting precious varying components, this
layout complicated transform feedback, since the constituents of the
varying are expected to be output to the transform feedback buffer
contiguously (e.g. without gaps between the columns, in the case of a
matrix).

This change packs the constituents of each varying together so that
all wasted space is at the end.  For the mat4x3 example, this looks
like so:

 <----slot1----> <----slot2----> <----slot3----> <----slot4---->  slots
  *   *   *   *   *   *   *   *   *   *   *   *   *   *   *   *
 <-column1-> <-column2-> <-column3-> <-column4->  x   x   x   x   matrix

Note that matrix columns 2 and 3 now cross a boundary between varying
slots (a characteristic I call "double parking" of a varying).

We don't bother trying to eliminate the wasted space at the end of the
varying, since the patch that follows will take care of that.

Since compiler back-ends don't (yet) support this packed layout, the
lower_packed_varyings function is used to rewrite the shader into a
form where each varying occupies a full varying slot.  Later, if we
add native back-end support for varying packing, we can make this
lowering pass optional.

Reviewed-by: Eric Anholt <eric@anholt.net>
v2: Skip varying packing if ctx->Const.DisableVaryingPacking is true.

src/glsl/linker.cpp

index 55d23d1c34261c3d9c6585062a04297e36d92833..1ae8fad22d0c40b1a47b883d28bb903727c56a4d 100644 (file)
@@ -1600,6 +1600,17 @@ private:
     */
    int location;
 
+   /**
+    * If non-zero, then this variable may be packed along with other variables
+    * into a single varying slot, so this offset should be applied when
+    * accessing components.  For example, an offset of 1 means that the x
+    * component of this variable is actually stored in component y of the
+    * location specified by \c location.
+    *
+    * Only valid if location != -1.
+    */
+   unsigned location_frac;
+
    /**
     * If location != -1, the number of vector elements in this variable, or 1
     * if this variable is a scalar.
@@ -1739,6 +1750,8 @@ tfeedback_decl::assign_location(struct gl_context *ctx,
       /* Array variable */
       const unsigned matrix_cols =
          output_var->type->fields.array->matrix_columns;
+      const unsigned vector_elements =
+         output_var->type->fields.array->vector_elements;
       unsigned actual_array_size = this->is_clip_distance_mesa ?
          prog->Vert.ClipDistanceArraySize : output_var->type->array_size();
 
@@ -1754,16 +1767,22 @@ tfeedback_decl::assign_location(struct gl_context *ctx,
          if (this->is_clip_distance_mesa) {
             this->location =
                output_var->location + this->array_subscript / 4;
+            this->location_frac = this->array_subscript % 4;
          } else {
-            this->location =
-               output_var->location + this->array_subscript * matrix_cols;
+            unsigned fine_location
+               = output_var->location * 4 + output_var->location_frac;
+            unsigned array_elem_size = vector_elements * matrix_cols;
+            fine_location += array_elem_size * this->array_subscript;
+            this->location = fine_location / 4;
+            this->location_frac = fine_location % 4;
          }
          this->size = 1;
       } else {
          this->location = output_var->location;
+         this->location_frac = output_var->location_frac;
          this->size = actual_array_size;
       }
-      this->vector_elements = output_var->type->fields.array->vector_elements;
+      this->vector_elements = vector_elements;
       this->matrix_columns = matrix_cols;
       if (this->is_clip_distance_mesa)
          this->type = GL_FLOAT;
@@ -1778,6 +1797,7 @@ tfeedback_decl::assign_location(struct gl_context *ctx,
          return false;
       }
       this->location = output_var->location;
+      this->location_frac = output_var->location_frac;
       this->size = 1;
       this->vector_elements = output_var->type->vector_elements;
       this->matrix_columns = output_var->type->matrix_columns;
@@ -1812,11 +1832,7 @@ tfeedback_decl::get_num_outputs() const
       return 0;
    }
 
-   unsigned translated_size = this->size;
-   if (this->is_clip_distance_mesa)
-      translated_size = (translated_size + 3) / 4;
-
-   return translated_size * this->matrix_columns;
+   return (this->num_components() + this->location_frac + 3)/4;
 }
 
 
@@ -1854,35 +1870,23 @@ tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog,
       return false;
    }
 
-   unsigned translated_size = this->size;
-   if (this->is_clip_distance_mesa)
-      translated_size = (translated_size + 3) / 4;
-   unsigned components_so_far = 0;
-   for (unsigned index = 0; index < translated_size; ++index) {
-      for (unsigned v = 0; v < this->matrix_columns; ++v) {
-         unsigned num_components = this->vector_elements;
-         assert(info->NumOutputs < max_outputs);
-         info->Outputs[info->NumOutputs].ComponentOffset = 0;
-         if (this->is_clip_distance_mesa) {
-            if (this->is_subscripted) {
-               num_components = 1;
-               info->Outputs[info->NumOutputs].ComponentOffset =
-                  this->array_subscript % 4;
-            } else {
-               num_components = MIN2(4, this->size - components_so_far);
-            }
-         }
-         info->Outputs[info->NumOutputs].OutputRegister =
-            this->location + v + index * this->matrix_columns;
-         info->Outputs[info->NumOutputs].NumComponents = num_components;
-         info->Outputs[info->NumOutputs].OutputBuffer = buffer;
-         info->Outputs[info->NumOutputs].DstOffset = info->BufferStride[buffer];
-         ++info->NumOutputs;
-         info->BufferStride[buffer] += num_components;
-         components_so_far += num_components;
-      }
+   unsigned location = this->location;
+   unsigned location_frac = this->location_frac;
+   unsigned num_components = this->num_components();
+   while (num_components > 0) {
+      unsigned output_size = MIN2(num_components, 4 - location_frac);
+      assert(info->NumOutputs < max_outputs);
+      info->Outputs[info->NumOutputs].ComponentOffset = location_frac;
+      info->Outputs[info->NumOutputs].OutputRegister = location;
+      info->Outputs[info->NumOutputs].NumComponents = output_size;
+      info->Outputs[info->NumOutputs].OutputBuffer = buffer;
+      info->Outputs[info->NumOutputs].DstOffset = info->BufferStride[buffer];
+      ++info->NumOutputs;
+      info->BufferStride[buffer] += output_size;
+      num_components -= output_size;
+      location++;
+      location_frac = 0;
    }
-   assert(components_so_far == this->num_components());
 
    info->Varyings[info->NumVarying].Name = ralloc_strdup(prog, this->orig_name);
    info->Varyings[info->NumVarying].Type = this->type;
@@ -2330,7 +2334,7 @@ assign_varying_locations(struct gl_context *ctx,
       }
    }
 
-   matches.assign_locations();
+   const unsigned slots_used = matches.assign_locations();
    matches.store_locations(producer_base, consumer_base);
 
    for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
@@ -2344,6 +2348,21 @@ assign_varying_locations(struct gl_context *ctx,
          return false;
    }
 
+   if (ctx->Const.DisableVaryingPacking) {
+      /* Transform feedback code assumes varyings are packed, so if the driver
+       * has disabled varying packing, make sure it does not support transform
+       * feedback.
+       */
+      assert(!ctx->Extensions.EXT_transform_feedback);
+   } else {
+      lower_packed_varyings(ctx, producer_base, slots_used, ir_var_out,
+                            producer);
+      if (consumer) {
+         lower_packed_varyings(ctx, consumer_base, slots_used, ir_var_in,
+                               consumer);
+      }
+   }
+
    unsigned varying_vectors = 0;
 
    if (consumer) {