2 * Copyright © 2019 Google, Inc
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * \file lower_precision.cpp
28 #include "main/macros.h"
29 #include "main/mtypes.h"
30 #include "compiler/glsl_types.h"
32 #include "ir_builder.h"
33 #include "ir_optimization.h"
34 #include "ir_rvalue_visitor.h"
35 #include "util/half_float.h"
37 #include "util/hash_table.h"
42 class find_precision_visitor
: public ir_rvalue_enter_visitor
{
44 find_precision_visitor(const struct gl_shader_compiler_options
*options
);
45 ~find_precision_visitor();
47 virtual void handle_rvalue(ir_rvalue
**rvalue
);
48 virtual ir_visitor_status
visit_enter(ir_call
*ir
);
50 ir_function_signature
*map_builtin(ir_function_signature
*sig
);
52 /* Set of rvalues that can be lowered. This will be filled in by
53 * find_lowerable_rvalues_visitor. Only the root node of a lowerable section
54 * will be added to this set.
56 struct set
*lowerable_rvalues
;
59 * A mapping of builtin signature functions to lowered versions. This is
60 * filled in lazily when a lowered version is needed.
62 struct hash_table
*lowered_builtins
;
64 * A temporary hash table only used in order to clone functions.
66 struct hash_table
*clone_ht
;
68 void *lowered_builtin_mem_ctx
;
70 const struct gl_shader_compiler_options
*options
;
73 class find_lowerable_rvalues_visitor
: public ir_hierarchical_visitor
{
75 enum can_lower_state
{
81 enum parent_relation
{
82 /* The parent performs a further operation involving the result from the
83 * child and can be lowered along with it.
86 /* The parent instruction’s operation is independent of the child type so
87 * the child should be lowered separately.
89 INDEPENDENT_OPERATION
,
93 ir_instruction
*instr
;
94 enum can_lower_state state
;
95 /* List of child rvalues that can be lowered. When this stack entry is
96 * popped, if this node itself can’t be lowered than all of the children
97 * are root nodes to lower so we will add them to lowerable_rvalues.
98 * Otherwise if this node can also be lowered then we won’t add the
99 * children because we only want to add the topmost lowerable nodes to
100 * lowerable_rvalues and the children will be lowered as part of lowering
103 std::vector
<ir_instruction
*> lowerable_children
;
106 find_lowerable_rvalues_visitor(struct set
*result
,
107 const struct gl_shader_compiler_options
*options
);
109 static void stack_enter(class ir_instruction
*ir
, void *data
);
110 static void stack_leave(class ir_instruction
*ir
, void *data
);
112 virtual ir_visitor_status
visit(ir_constant
*ir
);
113 virtual ir_visitor_status
visit(ir_dereference_variable
*ir
);
115 virtual ir_visitor_status
visit_enter(ir_dereference_record
*ir
);
116 virtual ir_visitor_status
visit_enter(ir_dereference_array
*ir
);
117 virtual ir_visitor_status
visit_enter(ir_texture
*ir
);
118 virtual ir_visitor_status
visit_enter(ir_expression
*ir
);
120 virtual ir_visitor_status
visit_leave(ir_assignment
*ir
);
121 virtual ir_visitor_status
visit_leave(ir_call
*ir
);
123 can_lower_state
handle_precision(const glsl_type
*type
,
124 int precision
) const;
126 static parent_relation
get_parent_relation(ir_instruction
*parent
,
127 ir_instruction
*child
);
129 std::vector
<stack_entry
> stack
;
130 struct set
*lowerable_rvalues
;
131 const struct gl_shader_compiler_options
*options
;
133 void pop_stack_entry();
134 void add_lowerable_children(const stack_entry
&entry
);
137 class lower_precision_visitor
: public ir_rvalue_visitor
{
139 virtual void handle_rvalue(ir_rvalue
**rvalue
);
140 virtual ir_visitor_status
visit_enter(ir_dereference_array
*);
141 virtual ir_visitor_status
visit_enter(ir_dereference_record
*);
142 virtual ir_visitor_status
visit_enter(ir_call
*ir
);
143 virtual ir_visitor_status
visit_enter(ir_texture
*ir
);
144 virtual ir_visitor_status
visit_leave(ir_expression
*);
148 can_lower_type(const struct gl_shader_compiler_options
*options
,
149 const glsl_type
*type
)
151 /* Don’t lower any expressions involving non-float types except bool and
152 * texture samplers. This will rule out operations that change the type such
153 * as conversion to ints. Instead it will end up lowering the arguments
154 * instead and adding a final conversion to float32. We want to handle
155 * boolean types so that it will do comparisons as 16-bit.
158 switch (type
->without_array()->base_type
) {
159 /* TODO: should we do anything for these two with regard to Int16 vs FP16
163 case GLSL_TYPE_SAMPLER
:
164 case GLSL_TYPE_IMAGE
:
167 case GLSL_TYPE_FLOAT
:
168 return options
->LowerPrecisionFloat16
;
172 return options
->LowerPrecisionInt16
;
179 find_lowerable_rvalues_visitor::find_lowerable_rvalues_visitor(struct set
*res
,
180 const struct gl_shader_compiler_options
*opts
)
182 lowerable_rvalues
= res
;
184 callback_enter
= stack_enter
;
185 callback_leave
= stack_leave
;
191 find_lowerable_rvalues_visitor::stack_enter(class ir_instruction
*ir
,
194 find_lowerable_rvalues_visitor
*state
=
195 (find_lowerable_rvalues_visitor
*) data
;
197 /* Add a new stack entry for this instruction */
201 entry
.state
= state
->in_assignee
? CANT_LOWER
: UNKNOWN
;
203 state
->stack
.push_back(entry
);
207 find_lowerable_rvalues_visitor::add_lowerable_children(const stack_entry
&entry
)
209 /* We can’t lower this node so if there were any pending children then they
210 * are all root lowerable nodes and we should add them to the set.
212 for (auto &it
: entry
.lowerable_children
)
213 _mesa_set_add(lowerable_rvalues
, it
);
217 find_lowerable_rvalues_visitor::pop_stack_entry()
219 const stack_entry
&entry
= stack
.back();
221 if (stack
.size() >= 2) {
222 /* Combine this state into the parent state, unless the parent operation
223 * doesn’t have any relation to the child operations
225 stack_entry
&parent
= stack
.end()[-2];
226 parent_relation rel
= get_parent_relation(parent
.instr
, entry
.instr
);
228 if (rel
== COMBINED_OPERATION
) {
229 switch (entry
.state
) {
231 parent
.state
= CANT_LOWER
;
234 if (parent
.state
== UNKNOWN
)
235 parent
.state
= SHOULD_LOWER
;
243 if (entry
.state
== SHOULD_LOWER
) {
244 ir_rvalue
*rv
= entry
.instr
->as_rvalue();
247 add_lowerable_children(entry
);
248 } else if (stack
.size() >= 2) {
249 stack_entry
&parent
= stack
.end()[-2];
251 switch (get_parent_relation(parent
.instr
, rv
)) {
252 case COMBINED_OPERATION
:
253 /* We only want to add the toplevel lowerable instructions to the
254 * lowerable set. Therefore if there is a parent then instead of
255 * adding this instruction to the set we will queue depending on
256 * the result of the parent instruction.
258 parent
.lowerable_children
.push_back(entry
.instr
);
260 case INDEPENDENT_OPERATION
:
261 _mesa_set_add(lowerable_rvalues
, rv
);
265 /* This is a toplevel node so add it directly to the lowerable
268 _mesa_set_add(lowerable_rvalues
, rv
);
270 } else if (entry
.state
== CANT_LOWER
) {
271 add_lowerable_children(entry
);
278 find_lowerable_rvalues_visitor::stack_leave(class ir_instruction
*ir
,
281 find_lowerable_rvalues_visitor
*state
=
282 (find_lowerable_rvalues_visitor
*) data
;
284 state
->pop_stack_entry();
287 enum find_lowerable_rvalues_visitor::can_lower_state
288 find_lowerable_rvalues_visitor::handle_precision(const glsl_type
*type
,
291 if (!can_lower_type(options
, type
))
295 case GLSL_PRECISION_NONE
:
297 case GLSL_PRECISION_HIGH
:
299 case GLSL_PRECISION_MEDIUM
:
300 case GLSL_PRECISION_LOW
:
307 enum find_lowerable_rvalues_visitor::parent_relation
308 find_lowerable_rvalues_visitor::get_parent_relation(ir_instruction
*parent
,
309 ir_instruction
*child
)
311 /* If the parent is a dereference instruction then the only child could be
312 * for example an array dereference and that should be lowered independently
315 if (parent
->as_dereference())
316 return INDEPENDENT_OPERATION
;
318 /* The precision of texture sampling depend on the precision of the sampler.
319 * The rest of the arguments don’t matter so we can treat it as an
320 * independent operation.
322 if (parent
->as_texture())
323 return INDEPENDENT_OPERATION
;
325 return COMBINED_OPERATION
;
329 find_lowerable_rvalues_visitor::visit(ir_constant
*ir
)
331 stack_enter(ir
, this);
333 if (!can_lower_type(options
, ir
->type
))
334 stack
.back().state
= CANT_LOWER
;
336 stack_leave(ir
, this);
338 return visit_continue
;
342 find_lowerable_rvalues_visitor::visit(ir_dereference_variable
*ir
)
344 stack_enter(ir
, this);
346 if (stack
.back().state
== UNKNOWN
)
347 stack
.back().state
= handle_precision(ir
->type
, ir
->precision());
349 stack_leave(ir
, this);
351 return visit_continue
;
355 find_lowerable_rvalues_visitor::visit_enter(ir_dereference_record
*ir
)
357 ir_hierarchical_visitor::visit_enter(ir
);
359 if (stack
.back().state
== UNKNOWN
)
360 stack
.back().state
= handle_precision(ir
->type
, ir
->precision());
362 return visit_continue
;
366 find_lowerable_rvalues_visitor::visit_enter(ir_dereference_array
*ir
)
368 ir_hierarchical_visitor::visit_enter(ir
);
370 if (stack
.back().state
== UNKNOWN
)
371 stack
.back().state
= handle_precision(ir
->type
, ir
->precision());
373 return visit_continue
;
377 find_lowerable_rvalues_visitor::visit_enter(ir_texture
*ir
)
379 ir_hierarchical_visitor::visit_enter(ir
);
381 /* The precision of the sample value depends on the precision of the
384 stack
.back().state
= handle_precision(ir
->type
,
385 ir
->sampler
->precision());
386 return visit_continue
;
390 find_lowerable_rvalues_visitor::visit_enter(ir_expression
*ir
)
392 ir_hierarchical_visitor::visit_enter(ir
);
394 if (!can_lower_type(options
, ir
->type
))
395 stack
.back().state
= CANT_LOWER
;
397 /* Don't lower precision for derivative calculations */
398 if (!options
->LowerPrecisionDerivatives
&&
399 (ir
->operation
== ir_unop_dFdx
||
400 ir
->operation
== ir_unop_dFdx_coarse
||
401 ir
->operation
== ir_unop_dFdx_fine
||
402 ir
->operation
== ir_unop_dFdy
||
403 ir
->operation
== ir_unop_dFdy_coarse
||
404 ir
->operation
== ir_unop_dFdy_fine
)) {
405 stack
.back().state
= CANT_LOWER
;
408 return visit_continue
;
412 function_always_returns_mediump_or_lowp(const char *name
)
414 return !strcmp(name
, "bitCount") ||
415 !strcmp(name
, "findLSB") ||
416 !strcmp(name
, "findMSB") ||
417 !strcmp(name
, "unpackHalf2x16") ||
418 !strcmp(name
, "unpackUnorm4x8") ||
419 !strcmp(name
, "unpackSnorm4x8");
423 handle_call(ir_call
*ir
, const struct set
*lowerable_rvalues
)
425 /* The intrinsic call is inside the wrapper imageLoad function that will
426 * be inlined. We have to handle both of them.
428 if (ir
->callee
->intrinsic_id
== ir_intrinsic_image_load
||
429 (ir
->callee
->is_builtin() &&
430 !strcmp(ir
->callee_name(), "imageLoad"))) {
431 ir_rvalue
*param
= (ir_rvalue
*)ir
->actual_parameters
.get_head();
432 ir_variable
*resource
= param
->variable_referenced();
434 assert(ir
->callee
->return_precision
== GLSL_PRECISION_NONE
);
435 assert(resource
->type
->without_array()->is_image());
437 /* GLSL ES 3.20 requires that images have a precision modifier, but if
438 * you set one, it doesn't do anything, because all intrinsics are
439 * defined with highp. This seems to be a spec bug.
441 * In theory we could set the return value to mediump if the image
442 * format has a lower precision. This appears to be the most sensible
445 const struct util_format_description
*desc
=
446 util_format_description(resource
->data
.image_format
);
448 util_format_get_first_non_void_channel(resource
->data
.image_format
);
453 if (desc
->channel
[i
].pure_integer
||
454 desc
->channel
[i
].type
== UTIL_FORMAT_TYPE_FLOAT
)
455 mediump
= desc
->channel
[i
].size
<= 16;
457 mediump
= desc
->channel
[i
].size
<= 10; /* unorm/snorm */
459 return mediump
? GLSL_PRECISION_MEDIUM
: GLSL_PRECISION_HIGH
;
462 /* Return the declared precision for user-defined functions. */
463 if (!ir
->callee
->is_builtin())
464 return ir
->callee
->return_precision
;
466 /* Handle special calls. */
467 if (ir
->callee
->is_builtin() && ir
->actual_parameters
.length()) {
468 ir_rvalue
*param
= (ir_rvalue
*)ir
->actual_parameters
.get_head();
469 ir_variable
*var
= param
->variable_referenced();
471 /* Handle builtin wrappers around ir_texture opcodes. These wrappers will
472 * be inlined by lower_precision() if we return true here, so that we can
473 * get to ir_texture later and do proper lowering.
475 * We should lower the type of the return value if the sampler type
476 * uses lower precision. The function parameters don't matter.
478 if (var
&& var
->type
->without_array()->is_sampler()) {
479 /* textureSize always returns highp. */
480 if (!strcmp(ir
->callee_name(), "textureSize"))
481 return GLSL_PRECISION_HIGH
;
483 return var
->data
.precision
;
487 if (/* Parameters are always highp: */
488 !strcmp(ir
->callee_name(), "floatBitsToInt") ||
489 !strcmp(ir
->callee_name(), "floatBitsToUint") ||
490 !strcmp(ir
->callee_name(), "intBitsToFloat") ||
491 !strcmp(ir
->callee_name(), "uintBitsToFloat") ||
492 !strcmp(ir
->callee_name(), "bitfieldReverse") ||
493 !strcmp(ir
->callee_name(), "frexp") ||
494 !strcmp(ir
->callee_name(), "ldexp") ||
495 /* Parameters and outputs are always highp: */
496 /* TODO: The operations are highp, but carry and borrow outputs are lowp. */
497 !strcmp(ir
->callee_name(), "uaddCarry") ||
498 !strcmp(ir
->callee_name(), "usubBorrow") ||
499 !strcmp(ir
->callee_name(), "imulExtended") ||
500 !strcmp(ir
->callee_name(), "umulExtended") ||
501 !strcmp(ir
->callee_name(), "unpackUnorm2x16") ||
502 !strcmp(ir
->callee_name(), "unpackSnorm2x16") ||
503 /* Outputs are highp: */
504 !strcmp(ir
->callee_name(), "packUnorm2x16") ||
505 !strcmp(ir
->callee_name(), "packSnorm2x16") ||
506 /* Parameters are mediump and outputs are highp. The parameters should
507 * be optimized in NIR, not here, e.g:
508 * - packHalf2x16 can just be a bitcast from f16vec2 to uint32
509 * - Other opcodes don't have to convert parameters to highp if the hw
510 * has f16 versions. Optimize in NIR accordingly.
512 !strcmp(ir
->callee_name(), "packHalf2x16") ||
513 !strcmp(ir
->callee_name(), "packUnorm4x8") ||
514 !strcmp(ir
->callee_name(), "packSnorm4x8") ||
515 /* Atomic functions are not lowered. */
516 strstr(ir
->callee_name(), "atomic") == ir
->callee_name())
517 return GLSL_PRECISION_HIGH
;
519 assert(ir
->callee
->return_precision
== GLSL_PRECISION_NONE
);
521 /* Number of parameters to check if they are lowerable. */
522 unsigned check_parameters
= ir
->actual_parameters
.length();
524 /* Interpolation functions only consider the precision of the interpolant. */
525 /* Bitfield functions ignore the precision of "offset" and "bits". */
526 if (!strcmp(ir
->callee_name(), "interpolateAtOffset") ||
527 !strcmp(ir
->callee_name(), "interpolateAtSample") ||
528 !strcmp(ir
->callee_name(), "bitfieldExtract")) {
529 check_parameters
= 1;
530 } else if (!strcmp(ir
->callee_name(), "bitfieldInsert")) {
531 check_parameters
= 2;
532 } if (function_always_returns_mediump_or_lowp(ir
->callee_name())) {
533 /* These only lower the return value. Parameters keep their precision,
534 * which is preserved in map_builtin.
536 check_parameters
= 0;
539 /* If the call is to a builtin, then the function won’t have a return
540 * precision and we should determine it from the precision of the arguments.
542 foreach_in_list(ir_rvalue
, param
, &ir
->actual_parameters
) {
543 if (!check_parameters
)
546 if (!param
->as_constant() &&
547 _mesa_set_search(lowerable_rvalues
, param
) == NULL
)
548 return GLSL_PRECISION_HIGH
;
553 return GLSL_PRECISION_MEDIUM
;
557 find_lowerable_rvalues_visitor::visit_leave(ir_call
*ir
)
559 ir_hierarchical_visitor::visit_leave(ir
);
561 /* Special case for handling temporary variables generated by the compiler
562 * for function calls. If we assign to one of these using a function call
563 * that has a lowerable return type then we can assume the temporary
564 * variable should have a medium precision too.
567 /* Do nothing if the return type is void. */
568 if (!ir
->return_deref
)
569 return visit_continue
;
571 ir_variable
*var
= ir
->return_deref
->variable_referenced();
573 assert(var
->data
.mode
== ir_var_temporary
);
575 unsigned return_precision
= handle_call(ir
, lowerable_rvalues
);
577 can_lower_state lower_state
=
578 handle_precision(var
->type
, return_precision
);
580 if (lower_state
== SHOULD_LOWER
) {
581 /* There probably shouldn’t be any situations where multiple ir_call
582 * instructions write to the same temporary?
584 assert(var
->data
.precision
== GLSL_PRECISION_NONE
);
585 var
->data
.precision
= GLSL_PRECISION_MEDIUM
;
587 var
->data
.precision
= GLSL_PRECISION_HIGH
;
590 return visit_continue
;
594 find_lowerable_rvalues_visitor::visit_leave(ir_assignment
*ir
)
596 ir_hierarchical_visitor::visit_leave(ir
);
598 /* Special case for handling temporary variables generated by the compiler.
599 * If we assign to one of these using a lowered precision then we can assume
600 * the temporary variable should have a medium precision too.
602 ir_variable
*var
= ir
->lhs
->variable_referenced();
604 if (var
->data
.mode
== ir_var_temporary
) {
605 if (_mesa_set_search(lowerable_rvalues
, ir
->rhs
)) {
606 /* Only override the precision if this is the first assignment. For
607 * temporaries such as the ones generated for the ?: operator there
608 * can be multiple assignments with different precisions. This way we
609 * get the highest precision of all of the assignments.
611 if (var
->data
.precision
== GLSL_PRECISION_NONE
)
612 var
->data
.precision
= GLSL_PRECISION_MEDIUM
;
613 } else if (!ir
->rhs
->as_constant()) {
614 var
->data
.precision
= GLSL_PRECISION_HIGH
;
618 return visit_continue
;
622 find_lowerable_rvalues(const struct gl_shader_compiler_options
*options
,
623 exec_list
*instructions
,
626 find_lowerable_rvalues_visitor
v(result
, options
);
628 visit_list_elements(&v
, instructions
);
630 assert(v
.stack
.empty());
633 static const glsl_type
*
634 convert_type(bool up
, const glsl_type
*type
)
636 if (type
->is_array()) {
637 return glsl_type::get_array_instance(convert_type(up
, type
->fields
.array
),
639 type
->explicit_stride
);
642 glsl_base_type new_base_type
;
645 switch (type
->base_type
) {
646 case GLSL_TYPE_FLOAT16
:
647 new_base_type
= GLSL_TYPE_FLOAT
;
649 case GLSL_TYPE_INT16
:
650 new_base_type
= GLSL_TYPE_INT
;
652 case GLSL_TYPE_UINT16
:
653 new_base_type
= GLSL_TYPE_UINT
;
656 unreachable("invalid type");
660 switch (type
->base_type
) {
661 case GLSL_TYPE_FLOAT
:
662 new_base_type
= GLSL_TYPE_FLOAT16
;
665 new_base_type
= GLSL_TYPE_INT16
;
668 new_base_type
= GLSL_TYPE_UINT16
;
671 unreachable("invalid type");
676 return glsl_type::get_instance(new_base_type
,
677 type
->vector_elements
,
678 type
->matrix_columns
,
679 type
->explicit_stride
,
680 type
->interface_row_major
);
683 static const glsl_type
*
684 lower_glsl_type(const glsl_type
*type
)
686 return convert_type(false, type
);
690 convert_precision(bool up
, ir_rvalue
*ir
)
695 switch (ir
->type
->base_type
) {
696 case GLSL_TYPE_FLOAT16
:
699 case GLSL_TYPE_INT16
:
702 case GLSL_TYPE_UINT16
:
706 unreachable("invalid type");
710 switch (ir
->type
->base_type
) {
711 case GLSL_TYPE_FLOAT
:
721 unreachable("invalid type");
726 const glsl_type
*desired_type
= convert_type(up
, ir
->type
);
727 void *mem_ctx
= ralloc_parent(ir
);
728 return new(mem_ctx
) ir_expression(op
, desired_type
, ir
, NULL
);
732 lower_precision_visitor::handle_rvalue(ir_rvalue
**rvalue
)
734 ir_rvalue
*ir
= *rvalue
;
739 if (ir
->as_dereference()) {
740 if (!ir
->type
->is_boolean())
741 *rvalue
= convert_precision(false, ir
);
742 } else if (ir
->type
->is_32bit()) {
743 ir
->type
= lower_glsl_type(ir
->type
);
745 ir_constant
*const_ir
= ir
->as_constant();
748 ir_constant_data value
;
750 if (ir
->type
->base_type
== GLSL_TYPE_FLOAT16
) {
751 for (unsigned i
= 0; i
< ARRAY_SIZE(value
.f16
); i
++)
752 value
.f16
[i
] = _mesa_float_to_half(const_ir
->value
.f
[i
]);
753 } else if (ir
->type
->base_type
== GLSL_TYPE_INT16
) {
754 for (unsigned i
= 0; i
< ARRAY_SIZE(value
.i16
); i
++)
755 value
.i16
[i
] = const_ir
->value
.i
[i
];
756 } else if (ir
->type
->base_type
== GLSL_TYPE_UINT16
) {
757 for (unsigned i
= 0; i
< ARRAY_SIZE(value
.u16
); i
++)
758 value
.u16
[i
] = const_ir
->value
.u
[i
];
760 unreachable("invalid type");
763 const_ir
->value
= value
;
769 lower_precision_visitor::visit_enter(ir_dereference_record
*ir
)
771 /* We don’t want to lower the variable */
772 return visit_continue_with_parent
;
776 lower_precision_visitor::visit_enter(ir_dereference_array
*ir
)
778 /* We don’t want to convert the array index or the variable. If the array
779 * index itself is lowerable that will be handled separately.
781 return visit_continue_with_parent
;
785 lower_precision_visitor::visit_enter(ir_call
*ir
)
787 /* We don’t want to convert the arguments. These will be handled separately.
789 return visit_continue_with_parent
;
793 lower_precision_visitor::visit_enter(ir_texture
*ir
)
795 /* We don’t want to convert the arguments. These will be handled separately.
797 return visit_continue_with_parent
;
801 lower_precision_visitor::visit_leave(ir_expression
*ir
)
803 ir_rvalue_visitor::visit_leave(ir
);
805 /* If the expression is a conversion operation to or from bool then fix the
808 switch (ir
->operation
) {
810 ir
->operation
= ir_unop_b2f16
;
813 ir
->operation
= ir_unop_f162b
;
817 /* Nothing to do - they both support int16. */
823 return visit_continue
;
827 find_precision_visitor::handle_rvalue(ir_rvalue
**rvalue
)
829 /* Checking the precision of rvalue can be lowered first throughout
830 * find_lowerable_rvalues_visitor.
831 * Once it found the precision of rvalue can be lowered, then we can
832 * add conversion f2fmp, etc. through lower_precision_visitor.
837 struct set_entry
*entry
= _mesa_set_search(lowerable_rvalues
, *rvalue
);
842 _mesa_set_remove(lowerable_rvalues
, entry
);
844 /* If the entire expression is just a variable dereference then trying to
845 * lower it will just directly add pointless to and from conversions without
846 * any actual operation in-between. Although these will eventually get
847 * optimised out, avoiding generating them here also avoids breaking inout
848 * parameters to functions.
850 if ((*rvalue
)->as_dereference())
853 lower_precision_visitor v
;
855 (*rvalue
)->accept(&v
);
856 v
.handle_rvalue(rvalue
);
858 /* We don’t need to add the final conversion if the final type has been
861 if ((*rvalue
)->type
->base_type
!= GLSL_TYPE_BOOL
) {
862 *rvalue
= convert_precision(true, *rvalue
);
867 find_precision_visitor::visit_enter(ir_call
*ir
)
869 ir_rvalue_enter_visitor::visit_enter(ir
);
871 ir_variable
*return_var
=
872 ir
->return_deref
? ir
->return_deref
->variable_referenced() : NULL
;
874 /* Don't do anything for image_load here. We have only changed the return
875 * value to mediump/lowp, so that following instructions can use reduced
878 * The return value type of the intrinsic itself isn't changed here, but
879 * can be changed in NIR if all users use the *2*mp opcode.
881 if (ir
->callee
->intrinsic_id
== ir_intrinsic_image_load
)
882 return visit_continue
;
884 /* If this is a call to a builtin and the find_lowerable_rvalues_visitor
885 * overrode the precision of the temporary return variable, then we can
886 * replace the builtin implementation with a lowered version.
889 if (!ir
->callee
->is_builtin() ||
890 ir
->callee
->is_intrinsic() ||
891 return_var
== NULL
||
892 (return_var
->data
.precision
!= GLSL_PRECISION_MEDIUM
&&
893 return_var
->data
.precision
!= GLSL_PRECISION_LOW
))
894 return visit_continue
;
896 ir
->callee
= map_builtin(ir
->callee
);
897 ir
->generate_inline(ir
);
900 return visit_continue_with_parent
;
903 ir_function_signature
*
904 find_precision_visitor::map_builtin(ir_function_signature
*sig
)
906 if (lowered_builtins
== NULL
) {
907 lowered_builtins
= _mesa_pointer_hash_table_create(NULL
);
908 clone_ht
=_mesa_pointer_hash_table_create(NULL
);
909 lowered_builtin_mem_ctx
= ralloc_context(NULL
);
911 struct hash_entry
*entry
= _mesa_hash_table_search(lowered_builtins
, sig
);
913 return (ir_function_signature
*) entry
->data
;
916 ir_function_signature
*lowered_sig
=
917 sig
->clone(lowered_builtin_mem_ctx
, clone_ht
);
919 /* Functions that always return mediump or lowp should keep their
920 * parameters intact, because they can be highp. NIR can lower
921 * the up-conversion for parameters if needed.
923 if (!function_always_returns_mediump_or_lowp(sig
->function_name())) {
924 foreach_in_list(ir_variable
, param
, &lowered_sig
->parameters
) {
925 param
->data
.precision
= GLSL_PRECISION_MEDIUM
;
929 lower_precision(options
, &lowered_sig
->body
);
931 _mesa_hash_table_clear(clone_ht
, NULL
);
933 _mesa_hash_table_insert(lowered_builtins
, sig
, lowered_sig
);
938 find_precision_visitor::find_precision_visitor(const struct gl_shader_compiler_options
*options
)
939 : lowerable_rvalues(_mesa_pointer_set_create(NULL
)),
940 lowered_builtins(NULL
),
942 lowered_builtin_mem_ctx(NULL
),
947 find_precision_visitor::~find_precision_visitor()
949 _mesa_set_destroy(lowerable_rvalues
, NULL
);
951 if (lowered_builtins
) {
952 _mesa_hash_table_destroy(lowered_builtins
, NULL
);
953 _mesa_hash_table_destroy(clone_ht
, NULL
);
954 ralloc_free(lowered_builtin_mem_ctx
);
958 /* Lowering opcodes to 16 bits is not enough for programs with control flow
959 * (and the ?: operator, which is represented by if-then-else in the IR),
960 * because temporary variables, which are used for passing values between
961 * code blocks, are not lowered, resulting in 32-bit phis in NIR.
963 * First change the variable types to 16 bits, then change all ir_dereference
966 class lower_variables_visitor
: public ir_rvalue_enter_visitor
{
968 lower_variables_visitor(const struct gl_shader_compiler_options
*options
)
970 lower_vars
= _mesa_pointer_set_create(NULL
);
973 virtual ~lower_variables_visitor()
975 _mesa_set_destroy(lower_vars
, NULL
);
978 virtual ir_visitor_status
visit(ir_variable
*var
);
979 virtual ir_visitor_status
visit_enter(ir_assignment
*ir
);
980 virtual ir_visitor_status
visit_enter(ir_return
*ir
);
981 virtual ir_visitor_status
visit_enter(ir_call
*ir
);
982 virtual void handle_rvalue(ir_rvalue
**rvalue
);
984 void fix_types_in_deref_chain(ir_dereference
*ir
);
985 void convert_split_assignment(ir_dereference
*lhs
, ir_rvalue
*rhs
,
988 const struct gl_shader_compiler_options
*options
;
993 lower_constant(ir_constant
*ir
)
995 if (ir
->type
->is_array()) {
996 for (int i
= 0; i
< ir
->type
->array_size(); i
++)
997 lower_constant(ir
->get_array_element(i
));
999 ir
->type
= lower_glsl_type(ir
->type
);
1003 ir
->type
= lower_glsl_type(ir
->type
);
1004 ir_constant_data value
;
1006 if (ir
->type
->base_type
== GLSL_TYPE_FLOAT16
) {
1007 for (unsigned i
= 0; i
< ARRAY_SIZE(value
.f16
); i
++)
1008 value
.f16
[i
] = _mesa_float_to_half(ir
->value
.f
[i
]);
1009 } else if (ir
->type
->base_type
== GLSL_TYPE_INT16
) {
1010 for (unsigned i
= 0; i
< ARRAY_SIZE(value
.i16
); i
++)
1011 value
.i16
[i
] = ir
->value
.i
[i
];
1012 } else if (ir
->type
->base_type
== GLSL_TYPE_UINT16
) {
1013 for (unsigned i
= 0; i
< ARRAY_SIZE(value
.u16
); i
++)
1014 value
.u16
[i
] = ir
->value
.u
[i
];
1016 unreachable("invalid type");
1023 lower_variables_visitor::visit(ir_variable
*var
)
1025 if ((var
->data
.mode
!= ir_var_temporary
&&
1026 var
->data
.mode
!= ir_var_auto
) ||
1027 !var
->type
->without_array()->is_32bit() ||
1028 (var
->data
.precision
!= GLSL_PRECISION_MEDIUM
&&
1029 var
->data
.precision
!= GLSL_PRECISION_LOW
) ||
1030 !can_lower_type(options
, var
->type
))
1031 return visit_continue
;
1033 /* Lower constant initializers. */
1034 if (var
->constant_value
&&
1035 var
->type
== var
->constant_value
->type
) {
1036 if (!options
->LowerPrecisionConstants
)
1037 return visit_continue
;
1038 var
->constant_value
=
1039 var
->constant_value
->clone(ralloc_parent(var
), NULL
);
1040 lower_constant(var
->constant_value
);
1043 if (var
->constant_initializer
&&
1044 var
->type
== var
->constant_initializer
->type
) {
1045 if (!options
->LowerPrecisionConstants
)
1046 return visit_continue
;
1047 var
->constant_initializer
=
1048 var
->constant_initializer
->clone(ralloc_parent(var
), NULL
);
1049 lower_constant(var
->constant_initializer
);
1052 var
->type
= lower_glsl_type(var
->type
);
1053 _mesa_set_add(lower_vars
, var
);
1055 return visit_continue
;
1059 lower_variables_visitor::fix_types_in_deref_chain(ir_dereference
*ir
)
1061 assert(ir
->type
->without_array()->is_32bit());
1062 assert(_mesa_set_search(lower_vars
, ir
->variable_referenced()));
1064 /* Fix the type in the dereference node. */
1065 ir
->type
= lower_glsl_type(ir
->type
);
1067 /* If it's an array, fix the types in the whole dereference chain. */
1068 for (ir_dereference_array
*deref_array
= ir
->as_dereference_array();
1070 deref_array
= deref_array
->array
->as_dereference_array()) {
1071 assert(deref_array
->array
->type
->without_array()->is_32bit());
1072 deref_array
->array
->type
= lower_glsl_type(deref_array
->array
->type
);
1077 lower_variables_visitor::convert_split_assignment(ir_dereference
*lhs
,
1081 void *mem_ctx
= ralloc_parent(lhs
);
1083 if (lhs
->type
->is_array()) {
1084 for (unsigned i
= 0; i
< lhs
->type
->length
; i
++) {
1085 ir_dereference
*l
, *r
;
1087 l
= new(mem_ctx
) ir_dereference_array(lhs
->clone(mem_ctx
, NULL
),
1088 new(mem_ctx
) ir_constant(i
));
1089 r
= new(mem_ctx
) ir_dereference_array(rhs
->clone(mem_ctx
, NULL
),
1090 new(mem_ctx
) ir_constant(i
));
1091 convert_split_assignment(l
, r
, insert_before
);
1096 assert(lhs
->type
->is_16bit() || lhs
->type
->is_32bit());
1097 assert(rhs
->type
->is_16bit() || rhs
->type
->is_32bit());
1098 assert(lhs
->type
->is_16bit() != rhs
->type
->is_16bit());
1100 ir_assignment
*assign
=
1101 new(mem_ctx
) ir_assignment(lhs
, convert_precision(lhs
->type
->is_32bit(), rhs
));
1104 base_ir
->insert_before(assign
);
1106 base_ir
->insert_after(assign
);
1110 lower_variables_visitor::visit_enter(ir_assignment
*ir
)
1112 ir_dereference
*lhs
= ir
->lhs
;
1113 ir_variable
*var
= lhs
->variable_referenced();
1114 ir_dereference
*rhs_deref
= ir
->rhs
->as_dereference();
1115 ir_variable
*rhs_var
= rhs_deref
? rhs_deref
->variable_referenced() : NULL
;
1116 ir_constant
*rhs_const
= ir
->rhs
->as_constant();
1118 /* Legalize array assignments between lowered and non-lowered variables. */
1119 if (lhs
->type
->is_array() &&
1120 (rhs_var
|| rhs_const
) &&
1123 var
->type
->without_array()->is_16bit() !=
1124 rhs_var
->type
->without_array()->is_16bit())) &&
1127 var
->type
->without_array()->is_16bit() &&
1128 rhs_const
->type
->without_array()->is_32bit()))) {
1129 assert(ir
->rhs
->type
->is_array());
1131 /* Fix array assignments from lowered to non-lowered. */
1132 if (rhs_var
&& _mesa_set_search(lower_vars
, rhs_var
)) {
1133 fix_types_in_deref_chain(rhs_deref
);
1134 /* Convert to 32 bits for LHS. */
1135 convert_split_assignment(lhs
, rhs_deref
, true);
1137 return visit_continue
;
1140 /* Fix array assignments from non-lowered to lowered. */
1142 _mesa_set_search(lower_vars
, var
) &&
1143 ir
->rhs
->type
->without_array()->is_32bit()) {
1144 fix_types_in_deref_chain(lhs
);
1145 /* Convert to 16 bits for LHS. */
1146 convert_split_assignment(lhs
, ir
->rhs
, true);
1148 return visit_continue
;
1152 /* Fix assignment types. */
1154 _mesa_set_search(lower_vars
, var
)) {
1155 /* Fix the LHS type. */
1156 if (lhs
->type
->without_array()->is_32bit())
1157 fix_types_in_deref_chain(lhs
);
1159 /* Fix the RHS type if it's a lowered variable. */
1161 _mesa_set_search(lower_vars
, rhs_var
) &&
1162 rhs_deref
->type
->without_array()->is_32bit())
1163 fix_types_in_deref_chain(rhs_deref
);
1165 /* Fix the RHS type if it's a non-array expression. */
1166 if (ir
->rhs
->type
->is_32bit()) {
1167 ir_expression
*expr
= ir
->rhs
->as_expression();
1169 /* Convert the RHS to the LHS type. */
1171 (expr
->operation
== ir_unop_f162f
||
1172 expr
->operation
== ir_unop_i2i
||
1173 expr
->operation
== ir_unop_u2u
) &&
1174 expr
->operands
[0]->type
->is_16bit()) {
1175 /* If there is an "up" conversion, just remove it.
1176 * This is optional. We could as well execute the else statement and
1177 * let NIR eliminate the up+down conversions.
1179 ir
->rhs
= expr
->operands
[0];
1181 /* Add a "down" conversion operation to fix the type of RHS. */
1182 ir
->rhs
= convert_precision(false, ir
->rhs
);
1187 return ir_rvalue_enter_visitor::visit_enter(ir
);
1191 lower_variables_visitor::visit_enter(ir_return
*ir
)
1193 void *mem_ctx
= ralloc_parent(ir
);
1195 ir_dereference
*deref
= ir
->value
? ir
->value
->as_dereference() : NULL
;
1197 ir_variable
*var
= deref
->variable_referenced();
1199 /* Fix the type of the return value. */
1201 _mesa_set_search(lower_vars
, var
) &&
1202 deref
->type
->without_array()->is_32bit()) {
1203 /* Create a 32-bit temporary variable. */
1204 ir_variable
*new_var
=
1205 new(mem_ctx
) ir_variable(deref
->type
, "lowerp", ir_var_temporary
);
1206 base_ir
->insert_before(new_var
);
1208 /* Fix types in dereferences. */
1209 fix_types_in_deref_chain(deref
);
1211 /* Convert to 32 bits for the return value. */
1212 convert_split_assignment(new(mem_ctx
) ir_dereference_variable(new_var
),
1214 ir
->value
= new(mem_ctx
) ir_dereference_variable(new_var
);
1218 return ir_rvalue_enter_visitor::visit_enter(ir
);
1221 void lower_variables_visitor::handle_rvalue(ir_rvalue
**rvalue
)
1223 ir_rvalue
*ir
= *rvalue
;
1225 if (in_assignee
|| ir
== NULL
)
1228 ir_expression
*expr
= ir
->as_expression();
1229 ir_dereference
*expr_op0_deref
= expr
? expr
->operands
[0]->as_dereference() : NULL
;
1231 /* Remove f2fmp(float16). Same for int16 and uint16. */
1234 (expr
->operation
== ir_unop_f2fmp
||
1235 expr
->operation
== ir_unop_i2imp
||
1236 expr
->operation
== ir_unop_u2ump
||
1237 expr
->operation
== ir_unop_f2f16
||
1238 expr
->operation
== ir_unop_i2i
||
1239 expr
->operation
== ir_unop_u2u
) &&
1240 expr
->type
->without_array()->is_16bit() &&
1241 expr_op0_deref
->type
->without_array()->is_32bit() &&
1242 expr_op0_deref
->variable_referenced() &&
1243 _mesa_set_search(lower_vars
, expr_op0_deref
->variable_referenced())) {
1244 fix_types_in_deref_chain(expr_op0_deref
);
1246 /* Remove f2fmp/i2imp/u2ump. */
1247 *rvalue
= expr_op0_deref
;
1251 ir_dereference
*deref
= ir
->as_dereference();
1254 ir_variable
*var
= deref
->variable_referenced();
1256 /* var can be NULL if we are dereferencing ir_constant. */
1258 _mesa_set_search(lower_vars
, var
) &&
1259 deref
->type
->without_array()->is_32bit()) {
1260 void *mem_ctx
= ralloc_parent(ir
);
1262 /* Create a 32-bit temporary variable. */
1263 ir_variable
*new_var
=
1264 new(mem_ctx
) ir_variable(deref
->type
, "lowerp", ir_var_temporary
);
1265 base_ir
->insert_before(new_var
);
1267 /* Fix types in dereferences. */
1268 fix_types_in_deref_chain(deref
);
1270 /* Convert to 32 bits for the rvalue. */
1271 convert_split_assignment(new(mem_ctx
) ir_dereference_variable(new_var
),
1273 *rvalue
= new(mem_ctx
) ir_dereference_variable(new_var
);
1279 lower_variables_visitor::visit_enter(ir_call
*ir
)
1281 void *mem_ctx
= ralloc_parent(ir
);
1283 /* We can't pass 16-bit variables as 32-bit inout/out parameters. */
1284 foreach_two_lists(formal_node
, &ir
->callee
->parameters
,
1285 actual_node
, &ir
->actual_parameters
) {
1286 ir_dereference
*param_deref
=
1287 ((ir_rvalue
*)actual_node
)->as_dereference();
1288 ir_variable
*param
= (ir_variable
*)formal_node
;
1293 ir_variable
*var
= param_deref
->variable_referenced();
1295 /* var can be NULL if we are dereferencing ir_constant. */
1297 _mesa_set_search(lower_vars
, var
) &&
1298 param
->type
->without_array()->is_32bit()) {
1299 fix_types_in_deref_chain(param_deref
);
1301 /* Create a 32-bit temporary variable for the parameter. */
1302 ir_variable
*new_var
=
1303 new(mem_ctx
) ir_variable(param
->type
, "lowerp", ir_var_temporary
);
1304 base_ir
->insert_before(new_var
);
1306 /* Replace the parameter. */
1307 actual_node
->replace_with(new(mem_ctx
) ir_dereference_variable(new_var
));
1309 if (param
->data
.mode
== ir_var_function_in
||
1310 param
->data
.mode
== ir_var_function_inout
) {
1311 /* Convert to 32 bits for passing in. */
1312 convert_split_assignment(new(mem_ctx
) ir_dereference_variable(new_var
),
1313 param_deref
->clone(mem_ctx
, NULL
), true);
1315 if (param
->data
.mode
== ir_var_function_out
||
1316 param
->data
.mode
== ir_var_function_inout
) {
1317 /* Convert to 16 bits after returning. */
1318 convert_split_assignment(param_deref
,
1319 new(mem_ctx
) ir_dereference_variable(new_var
),
1325 /* Fix the type of return value dereferencies. */
1326 ir_dereference_variable
*ret_deref
= ir
->return_deref
;
1327 ir_variable
*ret_var
= ret_deref
? ret_deref
->variable_referenced() : NULL
;
1330 _mesa_set_search(lower_vars
, ret_var
) &&
1331 ret_deref
->type
->without_array()->is_32bit()) {
1332 /* Create a 32-bit temporary variable. */
1333 ir_variable
*new_var
=
1334 new(mem_ctx
) ir_variable(ir
->callee
->return_type
, "lowerp",
1336 base_ir
->insert_before(new_var
);
1338 /* Replace the return variable. */
1339 ret_deref
->var
= new_var
;
1341 /* Convert to 16 bits after returning. */
1342 convert_split_assignment(new(mem_ctx
) ir_dereference_variable(ret_var
),
1343 new(mem_ctx
) ir_dereference_variable(new_var
),
1347 return ir_rvalue_enter_visitor::visit_enter(ir
);
1353 lower_precision(const struct gl_shader_compiler_options
*options
,
1354 exec_list
*instructions
)
1356 find_precision_visitor
v(options
);
1357 find_lowerable_rvalues(options
, instructions
, v
.lowerable_rvalues
);
1358 visit_list_elements(&v
, instructions
);
1360 lower_variables_visitor
vars(options
);
1361 visit_list_elements(&vars
, instructions
);