2 * Copyright © 2019 Google, Inc
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * \file lower_precision.cpp
28 #include "main/macros.h"
29 #include "compiler/glsl_types.h"
31 #include "ir_builder.h"
32 #include "ir_optimization.h"
33 #include "ir_rvalue_visitor.h"
34 #include "util/half_float.h"
36 #include "util/hash_table.h"
41 class find_precision_visitor
: public ir_rvalue_enter_visitor
{
43 find_precision_visitor();
44 ~find_precision_visitor();
46 virtual void handle_rvalue(ir_rvalue
**rvalue
);
47 virtual ir_visitor_status
visit_enter(ir_call
*ir
);
49 ir_function_signature
*map_builtin(ir_function_signature
*sig
);
53 /* Set of rvalues that can be lowered. This will be filled in by
54 * find_lowerable_rvalues_visitor. Only the root node of a lowerable section
55 * will be added to this set.
57 struct set
*lowerable_rvalues
;
60 * A mapping of builtin signature functions to lowered versions. This is
61 * filled in lazily when a lowered version is needed.
63 struct hash_table
*lowered_builtins
;
65 * A temporary hash table only used in order to clone functions.
67 struct hash_table
*clone_ht
;
69 void *lowered_builtin_mem_ctx
;
72 class find_lowerable_rvalues_visitor
: public ir_hierarchical_visitor
{
74 enum can_lower_state
{
80 enum parent_relation
{
81 /* The parent performs a further operation involving the result from the
82 * child and can be lowered along with it.
85 /* The parent instruction’s operation is independent of the child type so
86 * the child should be lowered separately.
88 INDEPENDENT_OPERATION
,
92 ir_instruction
*instr
;
93 enum can_lower_state state
;
94 /* List of child rvalues that can be lowered. When this stack entry is
95 * popped, if this node itself can’t be lowered than all of the children
96 * are root nodes to lower so we will add them to lowerable_rvalues.
97 * Otherwise if this node can also be lowered then we won’t add the
98 * children because we only want to add the topmost lowerable nodes to
99 * lowerable_rvalues and the children will be lowered as part of lowering
102 std::vector
<ir_instruction
*> lowerable_children
;
105 find_lowerable_rvalues_visitor(struct set
*result
);
107 static void stack_enter(class ir_instruction
*ir
, void *data
);
108 static void stack_leave(class ir_instruction
*ir
, void *data
);
110 virtual ir_visitor_status
visit(ir_constant
*ir
);
111 virtual ir_visitor_status
visit(ir_dereference_variable
*ir
);
113 virtual ir_visitor_status
visit_enter(ir_dereference_record
*ir
);
114 virtual ir_visitor_status
visit_enter(ir_dereference_array
*ir
);
115 virtual ir_visitor_status
visit_enter(ir_texture
*ir
);
116 virtual ir_visitor_status
visit_enter(ir_expression
*ir
);
118 virtual ir_visitor_status
visit_leave(ir_assignment
*ir
);
119 virtual ir_visitor_status
visit_leave(ir_call
*ir
);
121 static can_lower_state
handle_precision(const glsl_type
*type
,
124 static parent_relation
get_parent_relation(ir_instruction
*parent
,
125 ir_instruction
*child
);
127 std::vector
<stack_entry
> stack
;
128 struct set
*lowerable_rvalues
;
130 void pop_stack_entry();
131 void add_lowerable_children(const stack_entry
&entry
);
134 class lower_precision_visitor
: public ir_rvalue_visitor
{
136 virtual void handle_rvalue(ir_rvalue
**rvalue
);
137 virtual ir_visitor_status
visit_enter(ir_dereference_array
*);
138 virtual ir_visitor_status
visit_enter(ir_dereference_record
*);
139 virtual ir_visitor_status
visit_enter(ir_call
*ir
);
140 virtual ir_visitor_status
visit_enter(ir_texture
*ir
);
141 virtual ir_visitor_status
visit_leave(ir_expression
*);
145 can_lower_type(const glsl_type
*type
)
147 /* Don’t lower any expressions involving non-float types except bool and
148 * texture samplers. This will rule out operations that change the type such
149 * as conversion to ints. Instead it will end up lowering the arguments
150 * instead and adding a final conversion to float32. We want to handle
151 * boolean types so that it will do comparisons as 16-bit.
154 switch (type
->base_type
) {
155 case GLSL_TYPE_FLOAT
:
157 case GLSL_TYPE_SAMPLER
:
165 find_lowerable_rvalues_visitor::find_lowerable_rvalues_visitor(struct set
*res
)
167 lowerable_rvalues
= res
;
168 callback_enter
= stack_enter
;
169 callback_leave
= stack_leave
;
175 find_lowerable_rvalues_visitor::stack_enter(class ir_instruction
*ir
,
178 find_lowerable_rvalues_visitor
*state
=
179 (find_lowerable_rvalues_visitor
*) data
;
181 /* Add a new stack entry for this instruction */
185 entry
.state
= state
->in_assignee
? CANT_LOWER
: UNKNOWN
;
187 state
->stack
.push_back(entry
);
191 find_lowerable_rvalues_visitor::add_lowerable_children(const stack_entry
&entry
)
193 /* We can’t lower this node so if there were any pending children then they
194 * are all root lowerable nodes and we should add them to the set.
196 for (auto &it
: entry
.lowerable_children
)
197 _mesa_set_add(lowerable_rvalues
, it
);
201 find_lowerable_rvalues_visitor::pop_stack_entry()
203 const stack_entry
&entry
= stack
.back();
205 if (stack
.size() >= 2) {
206 /* Combine this state into the parent state, unless the parent operation
207 * doesn’t have any relation to the child operations
209 stack_entry
&parent
= stack
.end()[-2];
210 parent_relation rel
= get_parent_relation(parent
.instr
, entry
.instr
);
212 if (rel
== COMBINED_OPERATION
) {
213 switch (entry
.state
) {
215 parent
.state
= CANT_LOWER
;
218 if (parent
.state
== UNKNOWN
)
219 parent
.state
= SHOULD_LOWER
;
227 if (entry
.state
== SHOULD_LOWER
) {
228 ir_rvalue
*rv
= entry
.instr
->as_rvalue();
231 add_lowerable_children(entry
);
232 } else if (stack
.size() >= 2) {
233 stack_entry
&parent
= stack
.end()[-2];
235 switch (get_parent_relation(parent
.instr
, rv
)) {
236 case COMBINED_OPERATION
:
237 /* We only want to add the toplevel lowerable instructions to the
238 * lowerable set. Therefore if there is a parent then instead of
239 * adding this instruction to the set we will queue depending on
240 * the result of the parent instruction.
242 parent
.lowerable_children
.push_back(entry
.instr
);
244 case INDEPENDENT_OPERATION
:
245 _mesa_set_add(lowerable_rvalues
, rv
);
249 /* This is a toplevel node so add it directly to the lowerable
252 _mesa_set_add(lowerable_rvalues
, rv
);
254 } else if (entry
.state
== CANT_LOWER
) {
255 add_lowerable_children(entry
);
262 find_lowerable_rvalues_visitor::stack_leave(class ir_instruction
*ir
,
265 find_lowerable_rvalues_visitor
*state
=
266 (find_lowerable_rvalues_visitor
*) data
;
268 state
->pop_stack_entry();
271 enum find_lowerable_rvalues_visitor::can_lower_state
272 find_lowerable_rvalues_visitor::handle_precision(const glsl_type
*type
,
275 if (!can_lower_type(type
))
279 case GLSL_PRECISION_NONE
:
281 case GLSL_PRECISION_HIGH
:
283 case GLSL_PRECISION_MEDIUM
:
284 case GLSL_PRECISION_LOW
:
291 enum find_lowerable_rvalues_visitor::parent_relation
292 find_lowerable_rvalues_visitor::get_parent_relation(ir_instruction
*parent
,
293 ir_instruction
*child
)
295 /* If the parent is a dereference instruction then the only child could be
296 * for example an array dereference and that should be lowered independently
299 if (parent
->as_dereference())
300 return INDEPENDENT_OPERATION
;
302 /* The precision of texture sampling depend on the precision of the sampler.
303 * The rest of the arguments don’t matter so we can treat it as an
304 * independent operation.
306 if (parent
->as_texture())
307 return INDEPENDENT_OPERATION
;
309 return COMBINED_OPERATION
;
313 find_lowerable_rvalues_visitor::visit(ir_constant
*ir
)
315 stack_enter(ir
, this);
317 if (!can_lower_type(ir
->type
))
318 stack
.back().state
= CANT_LOWER
;
320 stack_leave(ir
, this);
322 return visit_continue
;
326 find_lowerable_rvalues_visitor::visit(ir_dereference_variable
*ir
)
328 stack_enter(ir
, this);
330 if (stack
.back().state
== UNKNOWN
)
331 stack
.back().state
= handle_precision(ir
->type
, ir
->precision());
333 stack_leave(ir
, this);
335 return visit_continue
;
339 find_lowerable_rvalues_visitor::visit_enter(ir_dereference_record
*ir
)
341 ir_hierarchical_visitor::visit_enter(ir
);
343 if (stack
.back().state
== UNKNOWN
)
344 stack
.back().state
= handle_precision(ir
->type
, ir
->precision());
346 return visit_continue
;
350 find_lowerable_rvalues_visitor::visit_enter(ir_dereference_array
*ir
)
352 ir_hierarchical_visitor::visit_enter(ir
);
354 if (stack
.back().state
== UNKNOWN
)
355 stack
.back().state
= handle_precision(ir
->type
, ir
->precision());
357 return visit_continue
;
361 find_lowerable_rvalues_visitor::visit_enter(ir_texture
*ir
)
363 ir_hierarchical_visitor::visit_enter(ir
);
365 if (stack
.back().state
== UNKNOWN
) {
366 /* The precision of the sample value depends on the precision of the
369 stack
.back().state
= handle_precision(ir
->type
,
370 ir
->sampler
->precision());
373 return visit_continue
;
377 find_lowerable_rvalues_visitor::visit_enter(ir_expression
*ir
)
379 ir_hierarchical_visitor::visit_enter(ir
);
381 if (!can_lower_type(ir
->type
))
382 stack
.back().state
= CANT_LOWER
;
384 /* Don't lower precision for derivative calculations */
385 if (ir
->operation
== ir_unop_dFdx
||
386 ir
->operation
== ir_unop_dFdx_coarse
||
387 ir
->operation
== ir_unop_dFdx_fine
||
388 ir
->operation
== ir_unop_dFdy
||
389 ir
->operation
== ir_unop_dFdy_coarse
||
390 ir
->operation
== ir_unop_dFdy_fine
) {
391 stack
.back().state
= CANT_LOWER
;
394 return visit_continue
;
398 is_lowerable_builtin(ir_call
*ir
,
399 const struct set
*lowerable_rvalues
)
401 if (!ir
->callee
->is_builtin())
404 assert(ir
->callee
->return_precision
== GLSL_PRECISION_NONE
);
406 foreach_in_list(ir_rvalue
, param
, &ir
->actual_parameters
) {
407 if (!param
->as_constant() &&
408 _mesa_set_search(lowerable_rvalues
, param
) == NULL
)
416 find_lowerable_rvalues_visitor::visit_leave(ir_call
*ir
)
418 ir_hierarchical_visitor::visit_leave(ir
);
420 /* Special case for handling temporary variables generated by the compiler
421 * for function calls. If we assign to one of these using a function call
422 * that has a lowerable return type then we can assume the temporary
423 * variable should have a medium precision too.
426 /* Do nothing if the return type is void. */
427 if (!ir
->return_deref
)
428 return visit_continue
;
430 ir_variable
*var
= ir
->return_deref
->variable_referenced();
432 assert(var
->data
.mode
== ir_var_temporary
);
434 unsigned return_precision
= ir
->callee
->return_precision
;
436 /* If the call is to a builtin, then the function won’t have a return
437 * precision and we should determine it from the precision of the arguments.
439 if (is_lowerable_builtin(ir
, lowerable_rvalues
))
440 return_precision
= GLSL_PRECISION_MEDIUM
;
442 can_lower_state lower_state
=
443 handle_precision(var
->type
, return_precision
);
445 if (lower_state
== SHOULD_LOWER
) {
446 /* There probably shouldn’t be any situations where multiple ir_call
447 * instructions write to the same temporary?
449 assert(var
->data
.precision
== GLSL_PRECISION_NONE
);
450 var
->data
.precision
= GLSL_PRECISION_MEDIUM
;
452 var
->data
.precision
= GLSL_PRECISION_HIGH
;
455 return visit_continue
;
459 find_lowerable_rvalues_visitor::visit_leave(ir_assignment
*ir
)
461 ir_hierarchical_visitor::visit_leave(ir
);
463 /* Special case for handling temporary variables generated by the compiler.
464 * If we assign to one of these using a lowered precision then we can assume
465 * the temporary variable should have a medium precision too.
467 ir_variable
*var
= ir
->lhs
->variable_referenced();
469 if (var
->data
.mode
== ir_var_temporary
) {
470 if (_mesa_set_search(lowerable_rvalues
, ir
->rhs
)) {
471 /* Only override the precision if this is the first assignment. For
472 * temporaries such as the ones generated for the ?: operator there
473 * can be multiple assignments with different precisions. This way we
474 * get the highest precision of all of the assignments.
476 if (var
->data
.precision
== GLSL_PRECISION_NONE
)
477 var
->data
.precision
= GLSL_PRECISION_MEDIUM
;
478 } else if (!ir
->rhs
->as_constant()) {
479 var
->data
.precision
= GLSL_PRECISION_HIGH
;
483 return visit_continue
;
487 find_lowerable_rvalues(exec_list
*instructions
,
490 find_lowerable_rvalues_visitor
v(result
);
492 visit_list_elements(&v
, instructions
);
494 assert(v
.stack
.empty());
498 convert_precision(int op
, ir_rvalue
*ir
)
500 unsigned base_type
= (op
== ir_unop_f2fmp
?
501 GLSL_TYPE_FLOAT16
: GLSL_TYPE_FLOAT
);
502 const glsl_type
*desired_type
;
503 desired_type
= glsl_type::get_instance(base_type
,
504 ir
->type
->vector_elements
,
505 ir
->type
->matrix_columns
);
507 void *mem_ctx
= ralloc_parent(ir
);
508 return new(mem_ctx
) ir_expression(op
, desired_type
, ir
, NULL
);
512 lower_precision_visitor::handle_rvalue(ir_rvalue
**rvalue
)
514 ir_rvalue
*ir
= *rvalue
;
519 if (ir
->as_dereference()) {
520 if (!ir
->type
->is_boolean())
521 *rvalue
= convert_precision(ir_unop_f2fmp
, ir
);
522 } else if (ir
->type
->is_float()) {
523 ir
->type
= glsl_type::get_instance(GLSL_TYPE_FLOAT16
,
524 ir
->type
->vector_elements
,
525 ir
->type
->matrix_columns
,
526 ir
->type
->explicit_stride
,
527 ir
->type
->interface_row_major
);
529 ir_constant
*const_ir
= ir
->as_constant();
532 ir_constant_data value
;
534 for (unsigned i
= 0; i
< ARRAY_SIZE(value
.f16
); i
++)
535 value
.f16
[i
] = _mesa_float_to_half(const_ir
->value
.f
[i
]);
537 const_ir
->value
= value
;
543 lower_precision_visitor::visit_enter(ir_dereference_record
*ir
)
545 /* We don’t want to lower the variable */
546 return visit_continue_with_parent
;
550 lower_precision_visitor::visit_enter(ir_dereference_array
*ir
)
552 /* We don’t want to convert the array index or the variable. If the array
553 * index itself is lowerable that will be handled separately.
555 return visit_continue_with_parent
;
559 lower_precision_visitor::visit_enter(ir_call
*ir
)
561 /* We don’t want to convert the arguments. These will be handled separately.
563 return visit_continue_with_parent
;
567 lower_precision_visitor::visit_enter(ir_texture
*ir
)
569 /* We don’t want to convert the arguments. These will be handled separately.
571 return visit_continue_with_parent
;
575 lower_precision_visitor::visit_leave(ir_expression
*ir
)
577 ir_rvalue_visitor::visit_leave(ir
);
579 /* If the expression is a conversion operation to or from bool then fix the
582 switch (ir
->operation
) {
584 ir
->operation
= ir_unop_b2f16
;
587 ir
->operation
= ir_unop_f162b
;
593 return visit_continue
;
597 find_precision_visitor::handle_rvalue(ir_rvalue
**rvalue
)
599 /* Checking the precision of rvalue can be lowered first throughout
600 * find_lowerable_rvalues_visitor.
601 * Once it found the precision of rvalue can be lowered, then we can
602 * add conversion f2fmp through lower_precision_visitor.
607 struct set_entry
*entry
= _mesa_set_search(lowerable_rvalues
, *rvalue
);
612 _mesa_set_remove(lowerable_rvalues
, entry
);
614 /* If the entire expression is just a variable dereference then trying to
615 * lower it will just directly add pointless to and from conversions without
616 * any actual operation in-between. Although these will eventually get
617 * optimised out, avoiding generating them here also avoids breaking inout
618 * parameters to functions.
620 if ((*rvalue
)->as_dereference())
623 lower_precision_visitor v
;
625 (*rvalue
)->accept(&v
);
626 v
.handle_rvalue(rvalue
);
628 /* We don’t need to add the final conversion if the final type has been
631 if ((*rvalue
)->type
->base_type
!= GLSL_TYPE_BOOL
)
632 *rvalue
= convert_precision(ir_unop_f162f
, *rvalue
);
638 find_precision_visitor::visit_enter(ir_call
*ir
)
640 ir_rvalue_enter_visitor::visit_enter(ir
);
642 /* If this is a call to a builtin and the find_lowerable_rvalues_visitor
643 * overrode the precision of the temporary return variable, then we can
644 * replace the builtin implementation with a lowered version.
647 if (!ir
->callee
->is_builtin() ||
648 ir
->return_deref
== NULL
||
649 ir
->return_deref
->variable_referenced()->data
.precision
!=
650 GLSL_PRECISION_MEDIUM
)
651 return visit_continue
;
653 ir
->callee
= map_builtin(ir
->callee
);
654 ir
->generate_inline(ir
);
657 return visit_continue_with_parent
;
660 ir_function_signature
*
661 find_precision_visitor::map_builtin(ir_function_signature
*sig
)
663 if (lowered_builtins
== NULL
) {
664 lowered_builtins
= _mesa_pointer_hash_table_create(NULL
);
665 clone_ht
=_mesa_pointer_hash_table_create(NULL
);
666 lowered_builtin_mem_ctx
= ralloc_context(NULL
);
668 struct hash_entry
*entry
= _mesa_hash_table_search(lowered_builtins
, sig
);
670 return (ir_function_signature
*) entry
->data
;
673 ir_function_signature
*lowered_sig
=
674 sig
->clone(lowered_builtin_mem_ctx
, clone_ht
);
676 foreach_in_list(ir_variable
, param
, &lowered_sig
->parameters
) {
677 param
->data
.precision
= GLSL_PRECISION_MEDIUM
;
680 lower_precision(&lowered_sig
->body
);
682 _mesa_hash_table_clear(clone_ht
, NULL
);
684 _mesa_hash_table_insert(lowered_builtins
, sig
, lowered_sig
);
689 find_precision_visitor::find_precision_visitor()
691 lowerable_rvalues(_mesa_pointer_set_create(NULL
)),
692 lowered_builtins(NULL
),
694 lowered_builtin_mem_ctx(NULL
)
698 find_precision_visitor::~find_precision_visitor()
700 _mesa_set_destroy(lowerable_rvalues
, NULL
);
702 if (lowered_builtins
) {
703 _mesa_hash_table_destroy(lowered_builtins
, NULL
);
704 _mesa_hash_table_destroy(clone_ht
, NULL
);
705 ralloc_free(lowered_builtin_mem_ctx
);
712 lower_precision(exec_list
*instructions
)
714 find_precision_visitor v
;
716 find_lowerable_rvalues(instructions
, v
.lowerable_rvalues
);
718 visit_list_elements(&v
, instructions
);