glsl: lower builtins to mediump that always return mediump or lowp

[mesa.git] / src / compiler / glsl / lower_precision.cpp
diff --git a/src/compiler/glsl/lower_precision.cpp b/src/compiler/glsl/lower_precision.cpp

index 11ae598920c2aa0a946a3dbb4fd9b3aae875fbe8..7fb7257f2ab08a25881fccb5353184d8dcfc7003 100644 (file)
--- a/src/compiler/glsl/lower_precision.cpp
+++ b/src/compiler/glsl/lower_precision.cpp
@@ -49,8 +49,6 @@ public:
  
     ir_function_signature *map_builtin(ir_function_signature *sig);
  
-   bool progress;
-
     /* Set of rvalues that can be lowered. This will be filled in by
      * find_lowerable_rvalues_visitor. Only the root node of a lowerable section
      * will be added to this set.
@@ -107,7 +105,6 @@ public:
  
     find_lowerable_rvalues_visitor(struct set *result,
                                    const struct gl_shader_compiler_options *options);
-   bool can_lower_type(const glsl_type *type) const;
  
     static void stack_enter(class ir_instruction *ir, void *data);
     static void stack_leave(class ir_instruction *ir, void *data);
@@ -147,8 +144,9 @@ public:
     virtual ir_visitor_status visit_leave(ir_expression *);
  };
  
-bool
-find_lowerable_rvalues_visitor::can_lower_type(const glsl_type *type) const
+static bool
+can_lower_type(const struct gl_shader_compiler_options *options,
+               const glsl_type *type)
  {
     /* Don’t lower any expressions involving non-float types except bool and
      * texture samplers. This will rule out operations that change the type such
@@ -290,7 +288,7 @@ enum find_lowerable_rvalues_visitor::can_lower_state
  find_lowerable_rvalues_visitor::handle_precision(const glsl_type *type,
                                                   int precision) const
  {
-   if (!can_lower_type(type))
+   if (!can_lower_type(options, type))
        return CANT_LOWER;
  
     switch (precision) {
@@ -332,7 +330,7 @@ find_lowerable_rvalues_visitor::visit(ir_constant *ir)
  {
     stack_enter(ir, this);
  
-   if (!can_lower_type(ir->type))
+   if (!can_lower_type(options, ir->type))
        stack.back().state = CANT_LOWER;
  
     stack_leave(ir, this);
@@ -380,14 +378,11 @@ find_lowerable_rvalues_visitor::visit_enter(ir_texture *ir)
  {
     ir_hierarchical_visitor::visit_enter(ir);
  
-   if (stack.back().state == UNKNOWN) {
-      /* The precision of the sample value depends on the precision of the
-       * sampler.
-       */
-      stack.back().state = handle_precision(ir->type,
-                                            ir->sampler->precision());
-   }
-
+   /* The precision of the sample value depends on the precision of the
+    * sampler.
+    */
+   stack.back().state = handle_precision(ir->type,
+                                         ir->sampler->precision());
     return visit_continue;
  }
  
@@ -396,7 +391,7 @@ find_lowerable_rvalues_visitor::visit_enter(ir_expression *ir)
  {
     ir_hierarchical_visitor::visit_enter(ir);
  
-   if (!can_lower_type(ir->type))
+   if (!can_lower_type(options, ir->type))
        stack.back().state = CANT_LOWER;
  
     /* Don't lower precision for derivative calculations */
@@ -413,6 +408,17 @@ find_lowerable_rvalues_visitor::visit_enter(ir_expression *ir)
     return visit_continue;
  }
  
+static bool
+function_always_returns_mediump_or_lowp(const char *name)
+{
+   return !strcmp(name, "bitCount") ||
+          !strcmp(name, "findLSB") ||
+          !strcmp(name, "findMSB") ||
+          !strcmp(name, "unpackHalf2x16") ||
+          !strcmp(name, "unpackUnorm4x8") ||
+          !strcmp(name, "unpackSnorm4x8");
+}
+
  static bool
  is_lowerable_builtin(ir_call *ir,
                       const struct set *lowerable_rvalues)
@@ -462,20 +468,75 @@ is_lowerable_builtin(ir_call *ir,
         * uses lower precision. The function parameters don't matter.
         */
        if (var && var->type->without_array()->is_sampler()) {
+         /* textureSize always returns highp. */
+         if (!strcmp(ir->callee_name(), "textureSize"))
+            return false;
+
           return var->data.precision == GLSL_PRECISION_MEDIUM ||
                  var->data.precision == GLSL_PRECISION_LOW;
        }
     }
  
-   if (!ir->callee->is_builtin())
+   if (!ir->callee->is_builtin() ||
+       /* Parameters are always highp: */
+       !strcmp(ir->callee_name(), "floatBitsToInt") ||
+       !strcmp(ir->callee_name(), "floatBitsToUint") ||
+       !strcmp(ir->callee_name(), "intBitsToFloat") ||
+       !strcmp(ir->callee_name(), "uintBitsToFloat") ||
+       !strcmp(ir->callee_name(), "bitfieldReverse") ||
+       !strcmp(ir->callee_name(), "frexp") ||
+       !strcmp(ir->callee_name(), "ldexp") ||
+       /* Parameters and outputs are always highp: */
+       /* TODO: The operations are highp, but carry and borrow outputs are lowp. */
+       !strcmp(ir->callee_name(), "uaddCarry") ||
+       !strcmp(ir->callee_name(), "usubBorrow") ||
+       !strcmp(ir->callee_name(), "imulExtended") ||
+       !strcmp(ir->callee_name(), "umulExtended") ||
+       !strcmp(ir->callee_name(), "unpackUnorm2x16") ||
+       !strcmp(ir->callee_name(), "unpackSnorm2x16") ||
+       /* Outputs are highp: */
+       !strcmp(ir->callee_name(), "packUnorm2x16") ||
+       !strcmp(ir->callee_name(), "packSnorm2x16") ||
+       /* Parameters are mediump and outputs are highp. The parameters should
+        * be optimized in NIR, not here, e.g:
+        * - packHalf2x16 can just be a bitcast from f16vec2 to uint32
+        * - Other opcodes don't have to convert parameters to highp if the hw
+        *   has f16 versions. Optimize in NIR accordingly.
+        */
+       !strcmp(ir->callee_name(), "packHalf2x16") ||
+       !strcmp(ir->callee_name(), "packUnorm4x8") ||
+       !strcmp(ir->callee_name(), "packSnorm4x8"))
        return false;
  
     assert(ir->callee->return_precision == GLSL_PRECISION_NONE);
  
+   /* Number of parameters to check if they are lowerable. */
+   unsigned check_parameters = ir->actual_parameters.length();
+
+   /* Interpolation functions only consider the precision of the interpolant. */
+   /* Bitfield functions ignore the precision of "offset" and "bits". */
+   if (!strcmp(ir->callee_name(), "interpolateAtOffset") ||
+       !strcmp(ir->callee_name(), "interpolateAtSample") ||
+       !strcmp(ir->callee_name(), "bitfieldExtract")) {
+      check_parameters = 1;
+   } else if (!strcmp(ir->callee_name(), "bitfieldInsert")) {
+      check_parameters = 2;
+   } if (function_always_returns_mediump_or_lowp(ir->callee_name())) {
+      /* These only lower the return value. Parameters keep their precision,
+       * which is preserved in map_builtin.
+       */
+      check_parameters = 0;
+   }
+
     foreach_in_list(ir_rvalue, param, &ir->actual_parameters) {
+      if (!check_parameters)
+         break;
+
        if (!param->as_constant() &&
            _mesa_set_search(lowerable_rvalues, param) == NULL)
           return false;
+
+      --check_parameters;
     }
  
     return true;
@@ -564,13 +625,40 @@ find_lowerable_rvalues(const struct gl_shader_compiler_options *options,
     assert(v.stack.empty());
  }
  
+static const glsl_type *
+lower_glsl_type(const glsl_type *type)
+{
+   glsl_base_type new_base_type;
+
+   switch (type->base_type) {
+   case GLSL_TYPE_FLOAT:
+      new_base_type = GLSL_TYPE_FLOAT16;
+      break;
+   case GLSL_TYPE_INT:
+      new_base_type = GLSL_TYPE_INT16;
+      break;
+   case GLSL_TYPE_UINT:
+      new_base_type = GLSL_TYPE_UINT16;
+      break;
+   default:
+      unreachable("invalid type");
+      return NULL;
+   }
+
+   return glsl_type::get_instance(new_base_type,
+                                  type->vector_elements,
+                                  type->matrix_columns,
+                                  type->explicit_stride,
+                                  type->interface_row_major);
+}
+
  static ir_rvalue *
-convert_precision(glsl_base_type type, bool up, ir_rvalue *ir)
+convert_precision(bool up, ir_rvalue *ir)
  {
     unsigned new_type, op;
  
     if (up) {
-      switch (type) {
+      switch (ir->type->base_type) {
        case GLSL_TYPE_FLOAT16:
           new_type = GLSL_TYPE_FLOAT;
           op = ir_unop_f162f;
@@ -588,7 +676,7 @@ convert_precision(glsl_base_type type, bool up, ir_rvalue *ir)
           return NULL;
        }
     } else {
-      switch (type) {
+      switch (ir->type->base_type) {
        case GLSL_TYPE_FLOAT:
           new_type = GLSL_TYPE_FLOAT16;
           op = ir_unop_f2fmp;
@@ -616,22 +704,6 @@ convert_precision(glsl_base_type type, bool up, ir_rvalue *ir)
     return new(mem_ctx) ir_expression(op, desired_type, ir, NULL);
  }
  
-static glsl_base_type
-lower_type(glsl_base_type type)
-{
-   switch (type) {
-   case GLSL_TYPE_FLOAT:
-      return GLSL_TYPE_FLOAT16;
-   case GLSL_TYPE_INT:
-      return GLSL_TYPE_INT16;
-   case GLSL_TYPE_UINT:
-      return GLSL_TYPE_UINT16;
-   default:
-      unreachable("invalid type");
-      return GLSL_TYPE_ERROR;;
-   }
-}
-
  void
  lower_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
  {
@@ -642,15 +714,9 @@ lower_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
  
     if (ir->as_dereference()) {
        if (!ir->type->is_boolean())
-         *rvalue = convert_precision(ir->type->base_type, false, ir);
-   } else if (ir->type->base_type == GLSL_TYPE_FLOAT ||
-              ir->type->base_type == GLSL_TYPE_INT ||
-              ir->type->base_type == GLSL_TYPE_UINT) {
-      ir->type = glsl_type::get_instance(lower_type(ir->type->base_type),
-                                         ir->type->vector_elements,
-                                         ir->type->matrix_columns,
-                                         ir->type->explicit_stride,
-                                         ir->type->interface_row_major);
+         *rvalue = convert_precision(false, ir);
+   } else if (ir->type->is_32bit()) {
+      ir->type = lower_glsl_type(ir->type);
  
        ir_constant *const_ir = ir->as_constant();
  
@@ -768,10 +834,9 @@ find_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
     /* We don’t need to add the final conversion if the final type has been
      * converted to bool
      */
-   if ((*rvalue)->type->base_type != GLSL_TYPE_BOOL)
-      *rvalue = convert_precision((*rvalue)->type->base_type, true, *rvalue);
-
-   progress = true;
+   if ((*rvalue)->type->base_type != GLSL_TYPE_BOOL) {
+      *rvalue = convert_precision(true, *rvalue);
+   }
  }
  
  ir_visitor_status
@@ -826,8 +891,14 @@ find_precision_visitor::map_builtin(ir_function_signature *sig)
     ir_function_signature *lowered_sig =
        sig->clone(lowered_builtin_mem_ctx, clone_ht);
  
-   foreach_in_list(ir_variable, param, &lowered_sig->parameters) {
-      param->data.precision = GLSL_PRECISION_MEDIUM;
+   /* Functions that always return mediump or lowp should keep their
+    * parameters intact, because they can be highp. NIR can lower
+    * the up-conversion for parameters if needed.
+    */
+   if (!function_always_returns_mediump_or_lowp(sig->function_name())) {
+      foreach_in_list(ir_variable, param, &lowered_sig->parameters) {
+         param->data.precision = GLSL_PRECISION_MEDIUM;
+      }
     }
  
     lower_precision(options, &lowered_sig->body);
@@ -840,8 +911,7 @@ find_precision_visitor::map_builtin(ir_function_signature *sig)
  }
  
  find_precision_visitor::find_precision_visitor(const struct gl_shader_compiler_options *options)
-   : progress(false),
-     lowerable_rvalues(_mesa_pointer_set_create(NULL)),
+   : lowerable_rvalues(_mesa_pointer_set_create(NULL)),
       lowered_builtins(NULL),
       clone_ht(NULL),
       lowered_builtin_mem_ctx(NULL),
@@ -862,7 +932,7 @@ find_precision_visitor::~find_precision_visitor()
  
  }
  
-bool
+void
  lower_precision(const struct gl_shader_compiler_options *options,
                  exec_list *instructions)
  {
@@ -871,6 +941,4 @@ lower_precision(const struct gl_shader_compiler_options *options,
     find_lowerable_rvalues(options, instructions, v.lowerable_rvalues);
  
     visit_list_elements(&v, instructions);
-
-   return v.progress;
  }