glsl/lower_precision: Lower builtins depending on arguments
authorNeil Roberts <nroberts@igalia.com>
Mon, 18 Nov 2019 16:05:54 +0000 (17:05 +0100)
committerMarge Bot <eric+marge@anholt.net>
Tue, 24 Mar 2020 23:21:21 +0000 (23:21 +0000)
When an ir_call is encountered that invokes a builtin, it will now try
to generate a lowered version of the builtin. This only happens if all
of the arguments to the function are lowerable. Previously the builtin
would be inlined before the lowering pass is invoked and then the
implementation would be lowered as a consequence of the pass. However
this causes problems if the builtin has multiple arguments and the
implementation has operations on only a few of the arguments before
combining it with the others. In that case the entire builtin should
only be lowered if all of the arguments are lower precision. The
previous approach would end up lowering only parts of the
implementation.

The lowered implementations are cached in a hash table in case they can
be reused.

Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3885>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3885>

src/compiler/glsl/lower_precision.cpp

index 2a944f73c96f47e93d77bfd357ca007a6c34913b..4e4ef8ef2e58b309bff6dddc6339b820d4e72f66 100644 (file)
@@ -33,6 +33,7 @@
 #include "ir_rvalue_visitor.h"
 #include "util/half_float.h"
 #include "util/set.h"
+#include "util/hash_table.h"
 #include <vector>
 
 namespace {
@@ -43,6 +44,9 @@ public:
    ~find_precision_visitor();
 
    virtual void handle_rvalue(ir_rvalue **rvalue);
+   virtual ir_visitor_status visit_enter(ir_call *ir);
+
+   ir_function_signature *map_builtin(ir_function_signature *sig);
 
    bool progress;
 
@@ -51,6 +55,18 @@ public:
     * will be added to this set.
     */
    struct set *lowerable_rvalues;
+
+   /**
+    * A mapping of builtin signature functions to lowered versions. This is
+    * filled in lazily when a lowered version is needed.
+    */
+   struct hash_table *lowered_builtins;
+   /**
+    * A temporary hash table only used in order to clone functions.
+    */
+   struct hash_table *clone_ht;
+
+   void *lowered_builtin_mem_ctx;
 };
 
 class find_lowerable_rvalues_visitor : public ir_hierarchical_visitor {
@@ -378,6 +394,24 @@ find_lowerable_rvalues_visitor::visit_enter(ir_expression *ir)
    return visit_continue;
 }
 
+static bool
+is_lowerable_builtin(ir_call *ir,
+                     const struct set *lowerable_rvalues)
+{
+   if (!ir->callee->is_builtin())
+      return false;
+
+   assert(ir->callee->return_precision == GLSL_PRECISION_NONE);
+
+   foreach_in_list(ir_rvalue, param, &ir->actual_parameters) {
+      if (!param->as_constant() &&
+          _mesa_set_search(lowerable_rvalues, param) == NULL)
+         return false;
+   }
+
+   return true;
+}
+
 ir_visitor_status
 find_lowerable_rvalues_visitor::visit_leave(ir_call *ir)
 {
@@ -397,8 +431,16 @@ find_lowerable_rvalues_visitor::visit_leave(ir_call *ir)
 
    assert(var->data.mode == ir_var_temporary);
 
+   unsigned return_precision = ir->callee->return_precision;
+
+   /* If the call is to a builtin, then the function won’t have a return
+    * precision and we should determine it from the precision of the arguments.
+    */
+   if (is_lowerable_builtin(ir, lowerable_rvalues))
+      return_precision = GLSL_PRECISION_MEDIUM;
+
    can_lower_state lower_state =
-      handle_precision(var->type, ir->callee->return_precision);
+      handle_precision(var->type, return_precision);
 
    if (lower_state == SHOULD_LOWER) {
       /* There probably shouldn’t be any situations where multiple ir_call
@@ -592,15 +634,76 @@ find_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
    progress = true;
 }
 
+ir_visitor_status
+find_precision_visitor::visit_enter(ir_call *ir)
+{
+   ir_rvalue_enter_visitor::visit_enter(ir);
+
+   /* If this is a call to a builtin and the find_lowerable_rvalues_visitor
+    * overrode the precision of the temporary return variable, then we can
+    * replace the builtin implementation with a lowered version.
+    */
+
+   if (!ir->callee->is_builtin() ||
+       ir->return_deref == NULL ||
+       ir->return_deref->variable_referenced()->data.precision !=
+       GLSL_PRECISION_MEDIUM)
+      return visit_continue;
+
+   ir->callee = map_builtin(ir->callee);
+   ir->generate_inline(ir);
+   ir->remove();
+
+   return visit_continue_with_parent;
+}
+
+ir_function_signature *
+find_precision_visitor::map_builtin(ir_function_signature *sig)
+{
+   if (lowered_builtins == NULL) {
+      lowered_builtins = _mesa_pointer_hash_table_create(NULL);
+      clone_ht =_mesa_pointer_hash_table_create(NULL);
+      lowered_builtin_mem_ctx = ralloc_context(NULL);
+   } else {
+      struct hash_entry *entry = _mesa_hash_table_search(lowered_builtins, sig);
+      if (entry)
+         return (ir_function_signature *) entry->data;
+   }
+
+   ir_function_signature *lowered_sig =
+      sig->clone(lowered_builtin_mem_ctx, clone_ht);
+
+   foreach_in_list(ir_variable, param, &lowered_sig->parameters) {
+      param->data.precision = GLSL_PRECISION_MEDIUM;
+   }
+
+   lower_precision(&lowered_sig->body);
+
+   _mesa_hash_table_clear(clone_ht, NULL);
+
+   _mesa_hash_table_insert(lowered_builtins, sig, lowered_sig);
+
+   return lowered_sig;
+}
+
 find_precision_visitor::find_precision_visitor()
    : progress(false),
-     lowerable_rvalues(_mesa_pointer_set_create(NULL))
+     lowerable_rvalues(_mesa_pointer_set_create(NULL)),
+     lowered_builtins(NULL),
+     clone_ht(NULL),
+     lowered_builtin_mem_ctx(NULL)
 {
 }
 
 find_precision_visitor::~find_precision_visitor()
 {
    _mesa_set_destroy(lowerable_rvalues, NULL);
+
+   if (lowered_builtins) {
+      _mesa_hash_table_destroy(lowered_builtins, NULL);
+      _mesa_hash_table_destroy(clone_ht, NULL);
+      ralloc_free(lowered_builtin_mem_ctx);
+   }
 }
 
 }