glsl: remember per-component vertex streams for packed varyings
[mesa.git] / src / compiler / glsl / lower_packed_varyings.cpp
index 825cc9ee8cd3e7518fefa1ae2c4613db4fc214dd..7a2f187229b611e84675d62ae37f245cad7c704f 100644 (file)
@@ -164,7 +164,9 @@ namespace {
 class lower_packed_varyings_visitor
 {
 public:
-   lower_packed_varyings_visitor(void *mem_ctx, unsigned locations_used,
+   lower_packed_varyings_visitor(void *mem_ctx,
+                                 unsigned locations_used,
+                                 const uint8_t *components,
                                  ir_variable_mode mode,
                                  unsigned gs_input_vertices,
                                  exec_list *out_instructions,
@@ -172,7 +174,7 @@ public:
                                  bool disable_varying_packing,
                                  bool xfb_enabled);
 
-   void run(struct gl_shader *shader);
+   void run(struct gl_linked_shader *shader);
 
 private:
    void bitwise_assign_pack(ir_rvalue *lhs, ir_rvalue *rhs);
@@ -203,6 +205,8 @@ private:
     */
    const unsigned locations_used;
 
+   const uint8_t* components;
+
    /**
     * Array of pointers to the packed varyings that have been created for each
     * generic varying slot.  NULL entries in this array indicate varying slots
@@ -241,12 +245,14 @@ private:
 } /* anonymous namespace */
 
 lower_packed_varyings_visitor::lower_packed_varyings_visitor(
-      void *mem_ctx, unsigned locations_used, ir_variable_mode mode,
+      void *mem_ctx, unsigned locations_used, const uint8_t *components,
+      ir_variable_mode mode,
       unsigned gs_input_vertices, exec_list *out_instructions,
       exec_list *out_variables, bool disable_varying_packing,
       bool xfb_enabled)
    : mem_ctx(mem_ctx),
      locations_used(locations_used),
+     components(components),
      packed_varyings((ir_variable **)
                      rzalloc_array_size(mem_ctx, sizeof(*packed_varyings),
                                         locations_used)),
@@ -260,7 +266,7 @@ lower_packed_varyings_visitor::lower_packed_varyings_visitor(
 }
 
 void
-lower_packed_varyings_visitor::run(struct gl_shader *shader)
+lower_packed_varyings_visitor::run(struct gl_linked_shader *shader)
 {
    foreach_in_list(ir_instruction, node, shader->ir) {
       ir_variable *var = node->as_variable();
@@ -273,11 +279,11 @@ lower_packed_varyings_visitor::run(struct gl_shader *shader)
          continue;
 
       /* This lowering pass is only capable of packing floats and ints
-       * together when their interpolation mode is "flat".  Therefore, to be
-       * safe, caller should ensure that integral varyings always use flat
-       * interpolation, even when this is not required by GLSL.
+       * together when their interpolation mode is "flat".  Treat integers as
+       * being flat when the interpolation mode is none.
        */
-      assert(var->data.interpolation == INTERP_QUALIFIER_FLAT ||
+      assert(var->data.interpolation == INTERP_MODE_FLAT ||
+             var->data.interpolation == INTERP_MODE_NONE ||
              !var->type->contains_integer());
 
       /* Clone the variable for program resource list before
@@ -432,7 +438,7 @@ lower_packed_varyings_visitor::lower_rvalue(ir_rvalue *rvalue,
                                             bool gs_input_toplevel,
                                             unsigned vertex_index)
 {
-   unsigned dmul = rvalue->type->is_double() ? 2 : 1;
+   unsigned dmul = rvalue->type->is_64bit() ? 2 : 1;
    /* When gs_input_toplevel is set, we should be looking at a geometry shader
     * input array.
     */
@@ -480,7 +486,7 @@ lower_packed_varyings_visitor::lower_rvalue(ir_rvalue *rvalue,
       char right_swizzle_name[4] = { 0, 0, 0, 0 };
 
       left_components = 4 - fine_location % 4;
-      if (rvalue->type->is_double()) {
+      if (rvalue->type->is_64bit()) {
          /* We might actually end up with 0 left components! */
          left_components /= 2;
       }
@@ -525,6 +531,14 @@ lower_packed_varyings_visitor::lower_rvalue(ir_rvalue *rvalue,
       ir_dereference *packed_deref =
          this->get_packed_varying_deref(location, unpacked_var, name,
                                         vertex_index);
+      if (unpacked_var->data.stream != 0) {
+         assert(unpacked_var->data.stream < 4);
+         ir_variable *packed_var = packed_deref->variable_referenced();
+         for (unsigned i = 0; i < components; ++i) {
+            packed_var->data.stream |=
+               unpacked_var->data.stream << (2 * (location_frac + i));
+         }
+      }
       ir_swizzle *swizzle = new(this->mem_ctx)
          ir_swizzle(packed_deref, swizzle_values, components);
       if (this->mode == ir_var_shader_out) {
@@ -607,10 +621,11 @@ lower_packed_varyings_visitor::get_packed_varying_deref(
    if (this->packed_varyings[slot] == NULL) {
       char *packed_name = ralloc_asprintf(this->mem_ctx, "packed:%s", name);
       const glsl_type *packed_type;
-      if (unpacked_var->data.interpolation == INTERP_QUALIFIER_FLAT)
-         packed_type = glsl_type::ivec4_type;
+      assert(components[slot] != 0);
+      if (unpacked_var->is_interpolation_flat())
+         packed_type = glsl_type::get_instance(GLSL_TYPE_INT, components[slot], 1);
       else
-         packed_type = glsl_type::vec4_type;
+         packed_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, components[slot], 1);
       if (this->gs_input_vertices != 0) {
          packed_type =
             glsl_type::get_array_instance(packed_type,
@@ -627,10 +642,12 @@ lower_packed_varyings_visitor::get_packed_varying_deref(
       packed_var->data.centroid = unpacked_var->data.centroid;
       packed_var->data.sample = unpacked_var->data.sample;
       packed_var->data.patch = unpacked_var->data.patch;
-      packed_var->data.interpolation = unpacked_var->data.interpolation;
+      packed_var->data.interpolation = packed_type == glsl_type::ivec4_type
+         ? unsigned(INTERP_MODE_FLAT) : unpacked_var->data.interpolation;
       packed_var->data.location = location;
       packed_var->data.precision = unpacked_var->data.precision;
       packed_var->data.always_active_io = unpacked_var->data.always_active_io;
+      packed_var->data.stream = 1u << 31;
       unpacked_var->insert_before(packed_var);
       this->packed_varyings[slot] = packed_var;
    } else {
@@ -638,8 +655,12 @@ lower_packed_varyings_visitor::get_packed_varying_deref(
        * first time we visit each component.
        */
       if (this->gs_input_vertices == 0 || vertex_index == 0) {
-         ralloc_asprintf_append((char **) &this->packed_varyings[slot]->name,
-                                ",%s", name);
+         ir_variable *var = this->packed_varyings[slot];
+
+         if (var->is_name_ralloced())
+            ralloc_asprintf_append((char **) &var->name, ",%s", name);
+         else
+            var->name = ralloc_asprintf(var, "%s,%s", var->name, name);
       }
    }
 
@@ -676,7 +697,7 @@ lower_packed_varyings_visitor::needs_lowering(ir_variable *var)
       return false;
 
    type = type->without_array();
-   if (type->vector_elements == 4 && !type->is_double())
+   if (type->vector_elements == 4 && !type->is_64bit())
       return false;
    return true;
 }
@@ -724,11 +745,51 @@ lower_packed_varyings_gs_splicer::visit_leave(ir_emit_vertex *ev)
    return visit_continue;
 }
 
+/**
+ * Visitor that splices varying packing code before every return.
+ */
+class lower_packed_varyings_return_splicer : public ir_hierarchical_visitor
+{
+public:
+   explicit lower_packed_varyings_return_splicer(void *mem_ctx,
+                                                 const exec_list *instructions);
+
+   virtual ir_visitor_status visit_leave(ir_return *ret);
+
+private:
+   /**
+    * Memory context used to allocate new instructions for the shader.
+    */
+   void * const mem_ctx;
+
+   /**
+    * Instructions that should be spliced into place before each return.
+    */
+   const exec_list *instructions;
+};
+
+
+lower_packed_varyings_return_splicer::lower_packed_varyings_return_splicer(
+      void *mem_ctx, const exec_list *instructions)
+   : mem_ctx(mem_ctx), instructions(instructions)
+{
+}
+
+
+ir_visitor_status
+lower_packed_varyings_return_splicer::visit_leave(ir_return *ret)
+{
+   foreach_in_list(ir_instruction, ir, this->instructions) {
+      ret->insert_before(ir->clone(this->mem_ctx, NULL));
+   }
+   return visit_continue;
+}
 
 void
 lower_packed_varyings(void *mem_ctx, unsigned locations_used,
+                      const uint8_t *components,
                       ir_variable_mode mode, unsigned gs_input_vertices,
-                      gl_shader *shader, bool disable_varying_packing,
+                      gl_linked_shader *shader, bool disable_varying_packing,
                       bool xfb_enabled)
 {
    exec_list *instructions = shader->ir;
@@ -737,7 +798,10 @@ lower_packed_varyings(void *mem_ctx, unsigned locations_used,
    ir_function_signature *main_func_sig
       = main_func->matching_signature(NULL, &void_parameters, false);
    exec_list new_instructions, new_variables;
-   lower_packed_varyings_visitor visitor(mem_ctx, locations_used, mode,
+   lower_packed_varyings_visitor visitor(mem_ctx,
+                                         locations_used,
+                                         components,
+                                         mode,
                                          gs_input_vertices,
                                          &new_instructions,
                                          &new_variables,
@@ -752,20 +816,31 @@ lower_packed_varyings(void *mem_ctx, unsigned locations_used,
          lower_packed_varyings_gs_splicer splicer(mem_ctx, &new_instructions);
 
          /* Add all the variables in first. */
-         main_func_sig->body.head->insert_before(&new_variables);
+         main_func_sig->body.get_head_raw()->insert_before(&new_variables);
 
          /* Now update all the EmitVertex instances */
          splicer.run(instructions);
       } else {
-         /* For other shader types, outputs need to be lowered at the end of
-          * main()
+         /* For other shader types, outputs need to be lowered before each
+          * return statement and at the end of main()
+          */
+
+         lower_packed_varyings_return_splicer splicer(mem_ctx, &new_instructions);
+
+         main_func_sig->body.get_head_raw()->insert_before(&new_variables);
+
+         splicer.run(instructions);
+
+         /* Lower outputs at the end of main() if the last instruction is not
+          * a return statement
           */
-         main_func_sig->body.append_list(&new_variables);
-         main_func_sig->body.append_list(&new_instructions);
+         if (((ir_instruction*)instructions->get_tail())->ir_type != ir_type_return) {
+            main_func_sig->body.append_list(&new_instructions);
+         }
       }
    } else {
       /* Shader inputs need to be lowered at the beginning of main() */
-      main_func_sig->body.head->insert_before(&new_instructions);
-      main_func_sig->body.head->insert_before(&new_variables);
+      main_func_sig->body.get_head_raw()->insert_before(&new_instructions);
+      main_func_sig->body.get_head_raw()->insert_before(&new_variables);
    }
 }