glsl: fix the type of ir_constant_data::u16
[mesa.git] / src / compiler / glsl / lower_packed_varyings.cpp
index 825cc9ee8cd3e7518fefa1ae2c4613db4fc214dd..9c418ebae63afd92cbb362730c6b9b1955c73c3d 100644 (file)
 #include "ir_builder.h"
 #include "ir_optimization.h"
 #include "program/prog_instruction.h"
+#include "main/mtypes.h"
 
 using namespace ir_builder;
 
@@ -164,15 +165,18 @@ namespace {
 class lower_packed_varyings_visitor
 {
 public:
-   lower_packed_varyings_visitor(void *mem_ctx, unsigned locations_used,
+   lower_packed_varyings_visitor(void *mem_ctx,
+                                 unsigned locations_used,
+                                 const uint8_t *components,
                                  ir_variable_mode mode,
                                  unsigned gs_input_vertices,
                                  exec_list *out_instructions,
                                  exec_list *out_variables,
                                  bool disable_varying_packing,
+                                 bool disable_xfb_packing,
                                  bool xfb_enabled);
 
-   void run(struct gl_shader *shader);
+   void run(struct gl_linked_shader *shader);
 
 private:
    void bitwise_assign_pack(ir_rvalue *lhs, ir_rvalue *rhs);
@@ -203,6 +207,8 @@ private:
     */
    const unsigned locations_used;
 
+   const uint8_t* components;
+
    /**
     * Array of pointers to the packed varyings that have been created for each
     * generic varying slot.  NULL entries in this array indicate varying slots
@@ -235,18 +241,21 @@ private:
    exec_list *out_variables;
 
    bool disable_varying_packing;
+   bool disable_xfb_packing;
    bool xfb_enabled;
 };
 
 } /* anonymous namespace */
 
 lower_packed_varyings_visitor::lower_packed_varyings_visitor(
-      void *mem_ctx, unsigned locations_used, ir_variable_mode mode,
+      void *mem_ctx, unsigned locations_used, const uint8_t *components,
+      ir_variable_mode mode,
       unsigned gs_input_vertices, exec_list *out_instructions,
       exec_list *out_variables, bool disable_varying_packing,
-      bool xfb_enabled)
+      bool disable_xfb_packing, bool xfb_enabled)
    : mem_ctx(mem_ctx),
      locations_used(locations_used),
+     components(components),
      packed_varyings((ir_variable **)
                      rzalloc_array_size(mem_ctx, sizeof(*packed_varyings),
                                         locations_used)),
@@ -255,12 +264,13 @@ lower_packed_varyings_visitor::lower_packed_varyings_visitor(
      out_instructions(out_instructions),
      out_variables(out_variables),
      disable_varying_packing(disable_varying_packing),
+     disable_xfb_packing(disable_xfb_packing),
      xfb_enabled(xfb_enabled)
 {
 }
 
 void
-lower_packed_varyings_visitor::run(struct gl_shader *shader)
+lower_packed_varyings_visitor::run(struct gl_linked_shader *shader)
 {
    foreach_in_list(ir_instruction, node, shader->ir) {
       ir_variable *var = node->as_variable();
@@ -273,11 +283,11 @@ lower_packed_varyings_visitor::run(struct gl_shader *shader)
          continue;
 
       /* This lowering pass is only capable of packing floats and ints
-       * together when their interpolation mode is "flat".  Therefore, to be
-       * safe, caller should ensure that integral varyings always use flat
-       * interpolation, even when this is not required by GLSL.
+       * together when their interpolation mode is "flat".  Treat integers as
+       * being flat when the interpolation mode is none.
        */
-      assert(var->data.interpolation == INTERP_QUALIFIER_FLAT ||
+      assert(var->data.interpolation == INTERP_MODE_FLAT ||
+             var->data.interpolation == INTERP_MODE_NONE ||
              !var->type->contains_integer());
 
       /* Clone the variable for program resource list before
@@ -345,6 +355,44 @@ lower_packed_varyings_visitor::bitwise_assign_pack(ir_rvalue *lhs,
             rhs = u2i(expr(ir_unop_unpack_double_2x32, rhs));
          }
          break;
+      case GLSL_TYPE_INT64:
+         assert(rhs->type->vector_elements <= 2);
+         if (rhs->type->vector_elements == 2) {
+            ir_variable *t = new(mem_ctx) ir_variable(lhs->type, "pack", ir_var_temporary);
+
+            assert(lhs->type->vector_elements == 4);
+            this->out_variables->push_tail(t);
+            this->out_instructions->push_tail(
+               assign(t, expr(ir_unop_unpack_int_2x32, swizzle_x(rhs->clone(mem_ctx, NULL))), 0x3));
+            this->out_instructions->push_tail(
+               assign(t,  expr(ir_unop_unpack_int_2x32, swizzle_y(rhs)), 0xc));
+            rhs = deref(t).val;
+         } else {
+            rhs = expr(ir_unop_unpack_int_2x32, rhs);
+         }
+         break;
+      case GLSL_TYPE_UINT64:
+         assert(rhs->type->vector_elements <= 2);
+         if (rhs->type->vector_elements == 2) {
+            ir_variable *t = new(mem_ctx) ir_variable(lhs->type, "pack", ir_var_temporary);
+
+            assert(lhs->type->vector_elements == 4);
+            this->out_variables->push_tail(t);
+            this->out_instructions->push_tail(
+                  assign(t, u2i(expr(ir_unop_unpack_uint_2x32, swizzle_x(rhs->clone(mem_ctx, NULL)))), 0x3));
+            this->out_instructions->push_tail(
+                  assign(t,  u2i(expr(ir_unop_unpack_uint_2x32, swizzle_y(rhs))), 0xc));
+            rhs = deref(t).val;
+         } else {
+            rhs = u2i(expr(ir_unop_unpack_uint_2x32, rhs));
+         }
+         break;
+      case GLSL_TYPE_SAMPLER:
+         rhs = u2i(expr(ir_unop_unpack_sampler_2x32, rhs));
+         break;
+      case GLSL_TYPE_IMAGE:
+         rhs = u2i(expr(ir_unop_unpack_image_2x32, rhs));
+         break;
       default:
          assert(!"Unexpected type conversion while lowering varyings");
          break;
@@ -394,6 +442,44 @@ lower_packed_varyings_visitor::bitwise_assign_unpack(ir_rvalue *lhs,
             rhs = expr(ir_unop_pack_double_2x32, i2u(rhs));
          }
          break;
+      case GLSL_TYPE_INT64:
+         assert(lhs->type->vector_elements <= 2);
+         if (lhs->type->vector_elements == 2) {
+            ir_variable *t = new(mem_ctx) ir_variable(lhs->type, "unpack", ir_var_temporary);
+            assert(rhs->type->vector_elements == 4);
+            this->out_variables->push_tail(t);
+            this->out_instructions->push_tail(
+                  assign(t, expr(ir_unop_pack_int_2x32, swizzle_xy(rhs->clone(mem_ctx, NULL))), 0x1));
+            this->out_instructions->push_tail(
+                  assign(t, expr(ir_unop_pack_int_2x32, swizzle(rhs->clone(mem_ctx, NULL), SWIZZLE_ZWZW, 2)), 0x2));
+            rhs = deref(t).val;
+         } else {
+            rhs = expr(ir_unop_pack_int_2x32, rhs);
+         }
+         break;
+      case GLSL_TYPE_UINT64:
+         assert(lhs->type->vector_elements <= 2);
+         if (lhs->type->vector_elements == 2) {
+            ir_variable *t = new(mem_ctx) ir_variable(lhs->type, "unpack", ir_var_temporary);
+            assert(rhs->type->vector_elements == 4);
+            this->out_variables->push_tail(t);
+            this->out_instructions->push_tail(
+                  assign(t, expr(ir_unop_pack_uint_2x32, i2u(swizzle_xy(rhs->clone(mem_ctx, NULL)))), 0x1));
+            this->out_instructions->push_tail(
+                  assign(t, expr(ir_unop_pack_uint_2x32, i2u(swizzle(rhs->clone(mem_ctx, NULL), SWIZZLE_ZWZW, 2))), 0x2));
+            rhs = deref(t).val;
+         } else {
+            rhs = expr(ir_unop_pack_uint_2x32, i2u(rhs));
+         }
+         break;
+      case GLSL_TYPE_SAMPLER:
+         rhs = new(mem_ctx)
+            ir_expression(ir_unop_pack_sampler_2x32, lhs->type, i2u(rhs));
+         break;
+      case GLSL_TYPE_IMAGE:
+         rhs = new(mem_ctx)
+            ir_expression(ir_unop_pack_image_2x32, lhs->type, i2u(rhs));
+         break;
       default:
          assert(!"Unexpected type conversion while lowering varyings");
          break;
@@ -432,13 +518,13 @@ lower_packed_varyings_visitor::lower_rvalue(ir_rvalue *rvalue,
                                             bool gs_input_toplevel,
                                             unsigned vertex_index)
 {
-   unsigned dmul = rvalue->type->is_double() ? 2 : 1;
+   unsigned dmul = rvalue->type->is_64bit() ? 2 : 1;
    /* When gs_input_toplevel is set, we should be looking at a geometry shader
     * input array.
     */
    assert(!gs_input_toplevel || rvalue->type->is_array());
 
-   if (rvalue->type->is_record()) {
+   if (rvalue->type->is_struct()) {
       for (unsigned i = 0; i < rvalue->type->length; i++) {
          if (i != 0)
             rvalue = rvalue->clone(this->mem_ctx, NULL);
@@ -480,7 +566,7 @@ lower_packed_varyings_visitor::lower_rvalue(ir_rvalue *rvalue,
       char right_swizzle_name[4] = { 0, 0, 0, 0 };
 
       left_components = 4 - fine_location % 4;
-      if (rvalue->type->is_double()) {
+      if (rvalue->type->is_64bit()) {
          /* We might actually end up with 0 left components! */
          left_components /= 2;
       }
@@ -525,6 +611,14 @@ lower_packed_varyings_visitor::lower_rvalue(ir_rvalue *rvalue,
       ir_dereference *packed_deref =
          this->get_packed_varying_deref(location, unpacked_var, name,
                                         vertex_index);
+      if (unpacked_var->data.stream != 0) {
+         assert(unpacked_var->data.stream < 4);
+         ir_variable *packed_var = packed_deref->variable_referenced();
+         for (unsigned i = 0; i < components; ++i) {
+            packed_var->data.stream |=
+               unpacked_var->data.stream << (2 * (location_frac + i));
+         }
+      }
       ir_swizzle *swizzle = new(this->mem_ctx)
          ir_swizzle(packed_deref, swizzle_values, components);
       if (this->mode == ir_var_shader_out) {
@@ -607,10 +701,11 @@ lower_packed_varyings_visitor::get_packed_varying_deref(
    if (this->packed_varyings[slot] == NULL) {
       char *packed_name = ralloc_asprintf(this->mem_ctx, "packed:%s", name);
       const glsl_type *packed_type;
-      if (unpacked_var->data.interpolation == INTERP_QUALIFIER_FLAT)
-         packed_type = glsl_type::ivec4_type;
+      assert(components[slot] != 0);
+      if (unpacked_var->is_interpolation_flat())
+         packed_type = glsl_type::get_instance(GLSL_TYPE_INT, components[slot], 1);
       else
-         packed_type = glsl_type::vec4_type;
+         packed_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, components[slot], 1);
       if (this->gs_input_vertices != 0) {
          packed_type =
             glsl_type::get_array_instance(packed_type,
@@ -627,19 +722,31 @@ lower_packed_varyings_visitor::get_packed_varying_deref(
       packed_var->data.centroid = unpacked_var->data.centroid;
       packed_var->data.sample = unpacked_var->data.sample;
       packed_var->data.patch = unpacked_var->data.patch;
-      packed_var->data.interpolation = unpacked_var->data.interpolation;
+      packed_var->data.interpolation =
+         packed_type->without_array() == glsl_type::ivec4_type
+         ? unsigned(INTERP_MODE_FLAT) : unpacked_var->data.interpolation;
       packed_var->data.location = location;
       packed_var->data.precision = unpacked_var->data.precision;
       packed_var->data.always_active_io = unpacked_var->data.always_active_io;
+      packed_var->data.stream = 1u << 31;
       unpacked_var->insert_before(packed_var);
       this->packed_varyings[slot] = packed_var;
    } else {
+      ir_variable *var = this->packed_varyings[slot];
+
+      /* The slot needs to be marked as always active if any variable that got
+       * packed there was.
+       */
+      var->data.always_active_io |= unpacked_var->data.always_active_io;
+
       /* For geometry shader inputs, only update the packed variable name the
        * first time we visit each component.
        */
       if (this->gs_input_vertices == 0 || vertex_index == 0) {
-         ralloc_asprintf_append((char **) &this->packed_varyings[slot]->name,
-                                ",%s", name);
+         if (var->is_name_ralloced())
+            ralloc_asprintf_append((char **) &var->name, ",%s", name);
+         else
+            var->name = ralloc_asprintf(var, "%s,%s", var->name, name);
       }
    }
 
@@ -658,25 +765,35 @@ lower_packed_varyings_visitor::get_packed_varying_deref(
 bool
 lower_packed_varyings_visitor::needs_lowering(ir_variable *var)
 {
-   /* Things composed of vec4's and varyings with explicitly assigned
-    * locations don't need lowering.  Everything else does.
+   /* Things composed of vec4's, varyings with explicitly assigned
+    * locations or varyings marked as must_be_shader_input (which might be used
+    * by interpolateAt* functions) shouldn't be lowered. Everything else can be.
     */
-   if (var->data.explicit_location)
+   if (var->data.explicit_location || var->data.must_be_shader_input)
+      return false;
+
+   const glsl_type *type = var->type;
+
+   /* Some drivers (e.g. panfrost) don't support packing of transform
+    * feedback varyings.
+    */
+   if (disable_xfb_packing && var->data.is_xfb &&
+       !(type->is_array() || type->is_struct() || type->is_matrix()) &&
+       xfb_enabled)
       return false;
 
    /* Override disable_varying_packing if the var is only used by transform
     * feedback. Also override it if transform feedback is enabled and the
     * variable is an array, struct or matrix as the elements of these types
-    * will always has the same interpolation and therefore asre safe to pack.
+    * will always have the same interpolation and therefore are safe to pack.
     */
-   const glsl_type *type = var->type;
    if (disable_varying_packing && !var->data.is_xfb_only &&
-       !((type->is_array() || type->is_record() || type->is_matrix()) &&
+       !((type->is_array() || type->is_struct() || type->is_matrix()) &&
          xfb_enabled))
       return false;
 
    type = type->without_array();
-   if (type->vector_elements == 4 && !type->is_double())
+   if (type->vector_elements == 4 && !type->is_64bit())
       return false;
    return true;
 }
@@ -724,12 +841,52 @@ lower_packed_varyings_gs_splicer::visit_leave(ir_emit_vertex *ev)
    return visit_continue;
 }
 
+/**
+ * Visitor that splices varying packing code before every return.
+ */
+class lower_packed_varyings_return_splicer : public ir_hierarchical_visitor
+{
+public:
+   explicit lower_packed_varyings_return_splicer(void *mem_ctx,
+                                                 const exec_list *instructions);
+
+   virtual ir_visitor_status visit_leave(ir_return *ret);
+
+private:
+   /**
+    * Memory context used to allocate new instructions for the shader.
+    */
+   void * const mem_ctx;
+
+   /**
+    * Instructions that should be spliced into place before each return.
+    */
+   const exec_list *instructions;
+};
+
+
+lower_packed_varyings_return_splicer::lower_packed_varyings_return_splicer(
+      void *mem_ctx, const exec_list *instructions)
+   : mem_ctx(mem_ctx), instructions(instructions)
+{
+}
+
+
+ir_visitor_status
+lower_packed_varyings_return_splicer::visit_leave(ir_return *ret)
+{
+   foreach_in_list(ir_instruction, ir, this->instructions) {
+      ret->insert_before(ir->clone(this->mem_ctx, NULL));
+   }
+   return visit_continue;
+}
 
 void
 lower_packed_varyings(void *mem_ctx, unsigned locations_used,
+                      const uint8_t *components,
                       ir_variable_mode mode, unsigned gs_input_vertices,
-                      gl_shader *shader, bool disable_varying_packing,
-                      bool xfb_enabled)
+                      gl_linked_shader *shader, bool disable_varying_packing,
+                      bool disable_xfb_packing, bool xfb_enabled)
 {
    exec_list *instructions = shader->ir;
    ir_function *main_func = shader->symbols->get_function("main");
@@ -737,11 +894,15 @@ lower_packed_varyings(void *mem_ctx, unsigned locations_used,
    ir_function_signature *main_func_sig
       = main_func->matching_signature(NULL, &void_parameters, false);
    exec_list new_instructions, new_variables;
-   lower_packed_varyings_visitor visitor(mem_ctx, locations_used, mode,
+   lower_packed_varyings_visitor visitor(mem_ctx,
+                                         locations_used,
+                                         components,
+                                         mode,
                                          gs_input_vertices,
                                          &new_instructions,
                                          &new_variables,
                                          disable_varying_packing,
+                                         disable_xfb_packing,
                                          xfb_enabled);
    visitor.run(shader);
    if (mode == ir_var_shader_out) {
@@ -752,20 +913,31 @@ lower_packed_varyings(void *mem_ctx, unsigned locations_used,
          lower_packed_varyings_gs_splicer splicer(mem_ctx, &new_instructions);
 
          /* Add all the variables in first. */
-         main_func_sig->body.head->insert_before(&new_variables);
+         main_func_sig->body.get_head_raw()->insert_before(&new_variables);
 
          /* Now update all the EmitVertex instances */
          splicer.run(instructions);
       } else {
-         /* For other shader types, outputs need to be lowered at the end of
-          * main()
+         /* For other shader types, outputs need to be lowered before each
+          * return statement and at the end of main()
+          */
+
+         lower_packed_varyings_return_splicer splicer(mem_ctx, &new_instructions);
+
+         main_func_sig->body.get_head_raw()->insert_before(&new_variables);
+
+         splicer.run(instructions);
+
+         /* Lower outputs at the end of main() if the last instruction is not
+          * a return statement
           */
-         main_func_sig->body.append_list(&new_variables);
-         main_func_sig->body.append_list(&new_instructions);
+         if (((ir_instruction*)instructions->get_tail())->ir_type != ir_type_return) {
+            main_func_sig->body.append_list(&new_instructions);
+         }
       }
    } else {
       /* Shader inputs need to be lowered at the beginning of main() */
-      main_func_sig->body.head->insert_before(&new_instructions);
-      main_func_sig->body.head->insert_before(&new_variables);
+      main_func_sig->body.get_head_raw()->insert_before(&new_instructions);
+      main_func_sig->body.get_head_raw()->insert_before(&new_variables);
    }
 }