glsl: record number of components used in each slot for varying packing
authorIlia Mirkin <imirkin@alum.mit.edu>
Sat, 5 Nov 2016 13:31:21 +0000 (09:31 -0400)
committerIlia Mirkin <imirkin@alum.mit.edu>
Thu, 10 Nov 2016 01:26:48 +0000 (20:26 -0500)
Instead of packing varyings into vec4's, keep track of how many
components each slot uses and create varyings with matching types. This
ensures that we don't end up using more components than the orginal
shader, which is especially important for geometry shader output limits.

This comes up for NVIDIA hw, where the limit is 1024 output components
for a GS, and the hardware complains *loudly* if you even think about
going over.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Timothy Arceri <timothy.arceri@collabora.com>
src/compiler/glsl/ir_optimization.h
src/compiler/glsl/link_varyings.cpp
src/compiler/glsl/lower_packed_varyings.cpp

index 6f2bc321fa78079b3ccb3258f609b31c99054ee6..8cee41843dc534665e7b52ce4bdd92149174b95f 100644 (file)
@@ -136,7 +136,9 @@ void lower_shared_reference(struct gl_linked_shader *shader,
 void lower_ubo_reference(struct gl_linked_shader *shader,
                          bool clamp_block_indices);
 void lower_packed_varyings(void *mem_ctx,
-                           unsigned locations_used, ir_variable_mode mode,
+                           unsigned locations_used,
+                           const uint8_t *components,
+                           ir_variable_mode mode,
                            unsigned gs_input_vertices,
                            gl_linked_shader *shader,
                            bool disable_varying_packing, bool xfb_enabled);
index 43204a7c3f2563a0d2ba75bf7c3a4731b308af08..87d7c91604ec588ee37d8e87657095c0c7dd44e4 100644 (file)
@@ -1217,6 +1217,7 @@ public:
    ~varying_matches();
    void record(ir_variable *producer_var, ir_variable *consumer_var);
    unsigned assign_locations(struct gl_shader_program *prog,
+                             uint8_t *components,
                              uint64_t reserved_slots);
    void store_locations() const;
 
@@ -1482,6 +1483,7 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
  */
 unsigned
 varying_matches::assign_locations(struct gl_shader_program *prog,
+                                  uint8_t *components,
                                   uint64_t reserved_slots)
 {
    /* If packing has been disabled then we cannot safely sort the varyings by
@@ -1585,6 +1587,12 @@ varying_matches::assign_locations(struct gl_shader_program *prog,
                       var->name);
       }
 
+      if (slot_end < MAX_VARYINGS_INCL_PATCH * 4u) {
+         for (unsigned j = *location / 4u; j < slot_end / 4u; j++)
+            components[j] = 4;
+         components[slot_end / 4u] = (slot_end & 3) + 1;
+      }
+
       this->matches[i].generic_location = *location;
 
       *location = slot_end + 1;
@@ -2203,7 +2211,9 @@ assign_varying_locations(struct gl_context *ctx,
       }
    }
 
-   const unsigned slots_used = matches.assign_locations(prog, reserved_slots);
+   uint8_t components[MAX_VARYINGS_INCL_PATCH] = {0};
+   const unsigned slots_used = matches.assign_locations(
+         prog, components, reserved_slots);
    matches.store_locations();
 
    for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
@@ -2263,13 +2273,13 @@ assign_varying_locations(struct gl_context *ctx,
    }
 
    if (producer) {
-      lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_out,
+      lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_out,
                             0, producer, disable_varying_packing,
                             xfb_enabled);
    }
 
    if (consumer) {
-      lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_in,
+      lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_in,
                             consumer_vertices, consumer,
                             disable_varying_packing, xfb_enabled);
    }
index 19bbe576a695dd79db32762affacdfd4a50aea24..b16f25fc93df851cc5b0e321e44fc8fa64061310 100644 (file)
@@ -164,7 +164,9 @@ namespace {
 class lower_packed_varyings_visitor
 {
 public:
-   lower_packed_varyings_visitor(void *mem_ctx, unsigned locations_used,
+   lower_packed_varyings_visitor(void *mem_ctx,
+                                 unsigned locations_used,
+                                 const uint8_t *components,
                                  ir_variable_mode mode,
                                  unsigned gs_input_vertices,
                                  exec_list *out_instructions,
@@ -203,6 +205,8 @@ private:
     */
    const unsigned locations_used;
 
+   const uint8_t* components;
+
    /**
     * Array of pointers to the packed varyings that have been created for each
     * generic varying slot.  NULL entries in this array indicate varying slots
@@ -241,12 +245,14 @@ private:
 } /* anonymous namespace */
 
 lower_packed_varyings_visitor::lower_packed_varyings_visitor(
-      void *mem_ctx, unsigned locations_used, ir_variable_mode mode,
+      void *mem_ctx, unsigned locations_used, const uint8_t *components,
+      ir_variable_mode mode,
       unsigned gs_input_vertices, exec_list *out_instructions,
       exec_list *out_variables, bool disable_varying_packing,
       bool xfb_enabled)
    : mem_ctx(mem_ctx),
      locations_used(locations_used),
+     components(components),
      packed_varyings((ir_variable **)
                      rzalloc_array_size(mem_ctx, sizeof(*packed_varyings),
                                         locations_used)),
@@ -607,10 +613,11 @@ lower_packed_varyings_visitor::get_packed_varying_deref(
    if (this->packed_varyings[slot] == NULL) {
       char *packed_name = ralloc_asprintf(this->mem_ctx, "packed:%s", name);
       const glsl_type *packed_type;
+      assert(components[slot] != 0);
       if (unpacked_var->is_interpolation_flat())
-         packed_type = glsl_type::ivec4_type;
+         packed_type = glsl_type::get_instance(GLSL_TYPE_INT, components[slot], 1);
       else
-         packed_type = glsl_type::vec4_type;
+         packed_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, components[slot], 1);
       if (this->gs_input_vertices != 0) {
          packed_type =
             glsl_type::get_array_instance(packed_type,
@@ -771,6 +778,7 @@ lower_packed_varyings_return_splicer::visit_leave(ir_return *ret)
 
 void
 lower_packed_varyings(void *mem_ctx, unsigned locations_used,
+                      const uint8_t *components,
                       ir_variable_mode mode, unsigned gs_input_vertices,
                       gl_linked_shader *shader, bool disable_varying_packing,
                       bool xfb_enabled)
@@ -781,7 +789,10 @@ lower_packed_varyings(void *mem_ctx, unsigned locations_used,
    ir_function_signature *main_func_sig
       = main_func->matching_signature(NULL, &void_parameters, false);
    exec_list new_instructions, new_variables;
-   lower_packed_varyings_visitor visitor(mem_ctx, locations_used, mode,
+   lower_packed_varyings_visitor visitor(mem_ctx,
+                                         locations_used,
+                                         components,
+                                         mode,
                                          gs_input_vertices,
                                          &new_instructions,
                                          &new_variables,