glsl: lower the precision of imageLoad
[mesa.git] / src / compiler / glsl / lower_precision.cpp
1 /*
2 * Copyright © 2019 Google, Inc
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file lower_precision.cpp
26 */
27
28 #include "main/macros.h"
29 #include "main/mtypes.h"
30 #include "compiler/glsl_types.h"
31 #include "ir.h"
32 #include "ir_builder.h"
33 #include "ir_optimization.h"
34 #include "ir_rvalue_visitor.h"
35 #include "util/half_float.h"
36 #include "util/set.h"
37 #include "util/hash_table.h"
38 #include <vector>
39
40 namespace {
41
42 class find_precision_visitor : public ir_rvalue_enter_visitor {
43 public:
44 find_precision_visitor(const struct gl_shader_compiler_options *options);
45 ~find_precision_visitor();
46
47 virtual void handle_rvalue(ir_rvalue **rvalue);
48 virtual ir_visitor_status visit_enter(ir_call *ir);
49
50 ir_function_signature *map_builtin(ir_function_signature *sig);
51
52 bool progress;
53
54 /* Set of rvalues that can be lowered. This will be filled in by
55 * find_lowerable_rvalues_visitor. Only the root node of a lowerable section
56 * will be added to this set.
57 */
58 struct set *lowerable_rvalues;
59
60 /**
61 * A mapping of builtin signature functions to lowered versions. This is
62 * filled in lazily when a lowered version is needed.
63 */
64 struct hash_table *lowered_builtins;
65 /**
66 * A temporary hash table only used in order to clone functions.
67 */
68 struct hash_table *clone_ht;
69
70 void *lowered_builtin_mem_ctx;
71
72 const struct gl_shader_compiler_options *options;
73 };
74
75 class find_lowerable_rvalues_visitor : public ir_hierarchical_visitor {
76 public:
77 enum can_lower_state {
78 UNKNOWN,
79 CANT_LOWER,
80 SHOULD_LOWER,
81 };
82
83 enum parent_relation {
84 /* The parent performs a further operation involving the result from the
85 * child and can be lowered along with it.
86 */
87 COMBINED_OPERATION,
88 /* The parent instruction’s operation is independent of the child type so
89 * the child should be lowered separately.
90 */
91 INDEPENDENT_OPERATION,
92 };
93
94 struct stack_entry {
95 ir_instruction *instr;
96 enum can_lower_state state;
97 /* List of child rvalues that can be lowered. When this stack entry is
98 * popped, if this node itself can’t be lowered than all of the children
99 * are root nodes to lower so we will add them to lowerable_rvalues.
100 * Otherwise if this node can also be lowered then we won’t add the
101 * children because we only want to add the topmost lowerable nodes to
102 * lowerable_rvalues and the children will be lowered as part of lowering
103 * this node.
104 */
105 std::vector<ir_instruction *> lowerable_children;
106 };
107
108 find_lowerable_rvalues_visitor(struct set *result,
109 const struct gl_shader_compiler_options *options);
110 bool can_lower_type(const glsl_type *type) const;
111
112 static void stack_enter(class ir_instruction *ir, void *data);
113 static void stack_leave(class ir_instruction *ir, void *data);
114
115 virtual ir_visitor_status visit(ir_constant *ir);
116 virtual ir_visitor_status visit(ir_dereference_variable *ir);
117
118 virtual ir_visitor_status visit_enter(ir_dereference_record *ir);
119 virtual ir_visitor_status visit_enter(ir_dereference_array *ir);
120 virtual ir_visitor_status visit_enter(ir_texture *ir);
121 virtual ir_visitor_status visit_enter(ir_expression *ir);
122
123 virtual ir_visitor_status visit_leave(ir_assignment *ir);
124 virtual ir_visitor_status visit_leave(ir_call *ir);
125
126 can_lower_state handle_precision(const glsl_type *type,
127 int precision) const;
128
129 static parent_relation get_parent_relation(ir_instruction *parent,
130 ir_instruction *child);
131
132 std::vector<stack_entry> stack;
133 struct set *lowerable_rvalues;
134 const struct gl_shader_compiler_options *options;
135
136 void pop_stack_entry();
137 void add_lowerable_children(const stack_entry &entry);
138 };
139
140 class lower_precision_visitor : public ir_rvalue_visitor {
141 public:
142 virtual void handle_rvalue(ir_rvalue **rvalue);
143 virtual ir_visitor_status visit_enter(ir_dereference_array *);
144 virtual ir_visitor_status visit_enter(ir_dereference_record *);
145 virtual ir_visitor_status visit_enter(ir_call *ir);
146 virtual ir_visitor_status visit_enter(ir_texture *ir);
147 virtual ir_visitor_status visit_leave(ir_expression *);
148 };
149
150 bool
151 find_lowerable_rvalues_visitor::can_lower_type(const glsl_type *type) const
152 {
153 /* Don’t lower any expressions involving non-float types except bool and
154 * texture samplers. This will rule out operations that change the type such
155 * as conversion to ints. Instead it will end up lowering the arguments
156 * instead and adding a final conversion to float32. We want to handle
157 * boolean types so that it will do comparisons as 16-bit.
158 */
159
160 switch (type->base_type) {
161 /* TODO: should we do anything for these two with regard to Int16 vs FP16
162 * support?
163 */
164 case GLSL_TYPE_BOOL:
165 case GLSL_TYPE_SAMPLER:
166 case GLSL_TYPE_IMAGE:
167 return true;
168
169 case GLSL_TYPE_FLOAT:
170 return options->LowerPrecisionFloat16;
171
172 case GLSL_TYPE_UINT:
173 case GLSL_TYPE_INT:
174 return options->LowerPrecisionInt16;
175
176 default:
177 return false;
178 }
179 }
180
181 find_lowerable_rvalues_visitor::find_lowerable_rvalues_visitor(struct set *res,
182 const struct gl_shader_compiler_options *opts)
183 {
184 lowerable_rvalues = res;
185 options = opts;
186 callback_enter = stack_enter;
187 callback_leave = stack_leave;
188 data_enter = this;
189 data_leave = this;
190 }
191
192 void
193 find_lowerable_rvalues_visitor::stack_enter(class ir_instruction *ir,
194 void *data)
195 {
196 find_lowerable_rvalues_visitor *state =
197 (find_lowerable_rvalues_visitor *) data;
198
199 /* Add a new stack entry for this instruction */
200 stack_entry entry;
201
202 entry.instr = ir;
203 entry.state = state->in_assignee ? CANT_LOWER : UNKNOWN;
204
205 state->stack.push_back(entry);
206 }
207
208 void
209 find_lowerable_rvalues_visitor::add_lowerable_children(const stack_entry &entry)
210 {
211 /* We can’t lower this node so if there were any pending children then they
212 * are all root lowerable nodes and we should add them to the set.
213 */
214 for (auto &it : entry.lowerable_children)
215 _mesa_set_add(lowerable_rvalues, it);
216 }
217
218 void
219 find_lowerable_rvalues_visitor::pop_stack_entry()
220 {
221 const stack_entry &entry = stack.back();
222
223 if (stack.size() >= 2) {
224 /* Combine this state into the parent state, unless the parent operation
225 * doesn’t have any relation to the child operations
226 */
227 stack_entry &parent = stack.end()[-2];
228 parent_relation rel = get_parent_relation(parent.instr, entry.instr);
229
230 if (rel == COMBINED_OPERATION) {
231 switch (entry.state) {
232 case CANT_LOWER:
233 parent.state = CANT_LOWER;
234 break;
235 case SHOULD_LOWER:
236 if (parent.state == UNKNOWN)
237 parent.state = SHOULD_LOWER;
238 break;
239 case UNKNOWN:
240 break;
241 }
242 }
243 }
244
245 if (entry.state == SHOULD_LOWER) {
246 ir_rvalue *rv = entry.instr->as_rvalue();
247
248 if (rv == NULL) {
249 add_lowerable_children(entry);
250 } else if (stack.size() >= 2) {
251 stack_entry &parent = stack.end()[-2];
252
253 switch (get_parent_relation(parent.instr, rv)) {
254 case COMBINED_OPERATION:
255 /* We only want to add the toplevel lowerable instructions to the
256 * lowerable set. Therefore if there is a parent then instead of
257 * adding this instruction to the set we will queue depending on
258 * the result of the parent instruction.
259 */
260 parent.lowerable_children.push_back(entry.instr);
261 break;
262 case INDEPENDENT_OPERATION:
263 _mesa_set_add(lowerable_rvalues, rv);
264 break;
265 }
266 } else {
267 /* This is a toplevel node so add it directly to the lowerable
268 * set.
269 */
270 _mesa_set_add(lowerable_rvalues, rv);
271 }
272 } else if (entry.state == CANT_LOWER) {
273 add_lowerable_children(entry);
274 }
275
276 stack.pop_back();
277 }
278
279 void
280 find_lowerable_rvalues_visitor::stack_leave(class ir_instruction *ir,
281 void *data)
282 {
283 find_lowerable_rvalues_visitor *state =
284 (find_lowerable_rvalues_visitor *) data;
285
286 state->pop_stack_entry();
287 }
288
289 enum find_lowerable_rvalues_visitor::can_lower_state
290 find_lowerable_rvalues_visitor::handle_precision(const glsl_type *type,
291 int precision) const
292 {
293 if (!can_lower_type(type))
294 return CANT_LOWER;
295
296 switch (precision) {
297 case GLSL_PRECISION_NONE:
298 return UNKNOWN;
299 case GLSL_PRECISION_HIGH:
300 return CANT_LOWER;
301 case GLSL_PRECISION_MEDIUM:
302 case GLSL_PRECISION_LOW:
303 return SHOULD_LOWER;
304 }
305
306 return CANT_LOWER;
307 }
308
309 enum find_lowerable_rvalues_visitor::parent_relation
310 find_lowerable_rvalues_visitor::get_parent_relation(ir_instruction *parent,
311 ir_instruction *child)
312 {
313 /* If the parent is a dereference instruction then the only child could be
314 * for example an array dereference and that should be lowered independently
315 * of the parent.
316 */
317 if (parent->as_dereference())
318 return INDEPENDENT_OPERATION;
319
320 /* The precision of texture sampling depend on the precision of the sampler.
321 * The rest of the arguments don’t matter so we can treat it as an
322 * independent operation.
323 */
324 if (parent->as_texture())
325 return INDEPENDENT_OPERATION;
326
327 return COMBINED_OPERATION;
328 }
329
330 ir_visitor_status
331 find_lowerable_rvalues_visitor::visit(ir_constant *ir)
332 {
333 stack_enter(ir, this);
334
335 if (!can_lower_type(ir->type))
336 stack.back().state = CANT_LOWER;
337
338 stack_leave(ir, this);
339
340 return visit_continue;
341 }
342
343 ir_visitor_status
344 find_lowerable_rvalues_visitor::visit(ir_dereference_variable *ir)
345 {
346 stack_enter(ir, this);
347
348 if (stack.back().state == UNKNOWN)
349 stack.back().state = handle_precision(ir->type, ir->precision());
350
351 stack_leave(ir, this);
352
353 return visit_continue;
354 }
355
356 ir_visitor_status
357 find_lowerable_rvalues_visitor::visit_enter(ir_dereference_record *ir)
358 {
359 ir_hierarchical_visitor::visit_enter(ir);
360
361 if (stack.back().state == UNKNOWN)
362 stack.back().state = handle_precision(ir->type, ir->precision());
363
364 return visit_continue;
365 }
366
367 ir_visitor_status
368 find_lowerable_rvalues_visitor::visit_enter(ir_dereference_array *ir)
369 {
370 ir_hierarchical_visitor::visit_enter(ir);
371
372 if (stack.back().state == UNKNOWN)
373 stack.back().state = handle_precision(ir->type, ir->precision());
374
375 return visit_continue;
376 }
377
378 ir_visitor_status
379 find_lowerable_rvalues_visitor::visit_enter(ir_texture *ir)
380 {
381 ir_hierarchical_visitor::visit_enter(ir);
382
383 if (stack.back().state == UNKNOWN) {
384 /* The precision of the sample value depends on the precision of the
385 * sampler.
386 */
387 stack.back().state = handle_precision(ir->type,
388 ir->sampler->precision());
389 }
390
391 return visit_continue;
392 }
393
394 ir_visitor_status
395 find_lowerable_rvalues_visitor::visit_enter(ir_expression *ir)
396 {
397 ir_hierarchical_visitor::visit_enter(ir);
398
399 if (!can_lower_type(ir->type))
400 stack.back().state = CANT_LOWER;
401
402 /* Don't lower precision for derivative calculations */
403 if (!options->LowerPrecisionDerivatives &&
404 (ir->operation == ir_unop_dFdx ||
405 ir->operation == ir_unop_dFdx_coarse ||
406 ir->operation == ir_unop_dFdx_fine ||
407 ir->operation == ir_unop_dFdy ||
408 ir->operation == ir_unop_dFdy_coarse ||
409 ir->operation == ir_unop_dFdy_fine)) {
410 stack.back().state = CANT_LOWER;
411 }
412
413 return visit_continue;
414 }
415
416 static bool
417 is_lowerable_builtin(ir_call *ir,
418 const struct set *lowerable_rvalues)
419 {
420 /* The intrinsic call is inside the wrapper imageLoad function that will
421 * be inlined. We have to handle both of them.
422 */
423 if (ir->callee->intrinsic_id == ir_intrinsic_image_load ||
424 (ir->callee->is_builtin() &&
425 !strcmp(ir->callee_name(), "imageLoad"))) {
426 ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head();
427 ir_variable *resource = param->variable_referenced();
428
429 assert(ir->callee->return_precision == GLSL_PRECISION_NONE);
430 assert(resource->type->without_array()->is_image());
431
432 /* GLSL ES 3.20 requires that images have a precision modifier, but if
433 * you set one, it doesn't do anything, because all intrinsics are
434 * defined with highp. This seems to be a spec bug.
435 *
436 * In theory we could set the return value to mediump if the image
437 * format has a lower precision. This appears to be the most sensible
438 * thing to do.
439 */
440 const struct util_format_description *desc =
441 util_format_description(resource->data.image_format);
442 unsigned i =
443 util_format_get_first_non_void_channel(resource->data.image_format);
444
445 if (desc->channel[i].pure_integer ||
446 desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
447 return desc->channel[i].size <= 16;
448 else
449 return desc->channel[i].size <= 10; /* unorm/snorm */
450 }
451
452 if (!ir->callee->is_builtin())
453 return false;
454
455 assert(ir->callee->return_precision == GLSL_PRECISION_NONE);
456
457 foreach_in_list(ir_rvalue, param, &ir->actual_parameters) {
458 if (!param->as_constant() &&
459 _mesa_set_search(lowerable_rvalues, param) == NULL)
460 return false;
461 }
462
463 return true;
464 }
465
466 ir_visitor_status
467 find_lowerable_rvalues_visitor::visit_leave(ir_call *ir)
468 {
469 ir_hierarchical_visitor::visit_leave(ir);
470
471 /* Special case for handling temporary variables generated by the compiler
472 * for function calls. If we assign to one of these using a function call
473 * that has a lowerable return type then we can assume the temporary
474 * variable should have a medium precision too.
475 */
476
477 /* Do nothing if the return type is void. */
478 if (!ir->return_deref)
479 return visit_continue;
480
481 ir_variable *var = ir->return_deref->variable_referenced();
482
483 assert(var->data.mode == ir_var_temporary);
484
485 unsigned return_precision = ir->callee->return_precision;
486
487 /* If the call is to a builtin, then the function won’t have a return
488 * precision and we should determine it from the precision of the arguments.
489 */
490 if (is_lowerable_builtin(ir, lowerable_rvalues))
491 return_precision = GLSL_PRECISION_MEDIUM;
492
493 can_lower_state lower_state =
494 handle_precision(var->type, return_precision);
495
496 if (lower_state == SHOULD_LOWER) {
497 /* There probably shouldn’t be any situations where multiple ir_call
498 * instructions write to the same temporary?
499 */
500 assert(var->data.precision == GLSL_PRECISION_NONE);
501 var->data.precision = GLSL_PRECISION_MEDIUM;
502 } else {
503 var->data.precision = GLSL_PRECISION_HIGH;
504 }
505
506 return visit_continue;
507 }
508
509 ir_visitor_status
510 find_lowerable_rvalues_visitor::visit_leave(ir_assignment *ir)
511 {
512 ir_hierarchical_visitor::visit_leave(ir);
513
514 /* Special case for handling temporary variables generated by the compiler.
515 * If we assign to one of these using a lowered precision then we can assume
516 * the temporary variable should have a medium precision too.
517 */
518 ir_variable *var = ir->lhs->variable_referenced();
519
520 if (var->data.mode == ir_var_temporary) {
521 if (_mesa_set_search(lowerable_rvalues, ir->rhs)) {
522 /* Only override the precision if this is the first assignment. For
523 * temporaries such as the ones generated for the ?: operator there
524 * can be multiple assignments with different precisions. This way we
525 * get the highest precision of all of the assignments.
526 */
527 if (var->data.precision == GLSL_PRECISION_NONE)
528 var->data.precision = GLSL_PRECISION_MEDIUM;
529 } else if (!ir->rhs->as_constant()) {
530 var->data.precision = GLSL_PRECISION_HIGH;
531 }
532 }
533
534 return visit_continue;
535 }
536
537 void
538 find_lowerable_rvalues(const struct gl_shader_compiler_options *options,
539 exec_list *instructions,
540 struct set *result)
541 {
542 find_lowerable_rvalues_visitor v(result, options);
543
544 visit_list_elements(&v, instructions);
545
546 assert(v.stack.empty());
547 }
548
549 static ir_rvalue *
550 convert_precision(glsl_base_type type, bool up, ir_rvalue *ir)
551 {
552 unsigned new_type, op;
553
554 if (up) {
555 switch (type) {
556 case GLSL_TYPE_FLOAT16:
557 new_type = GLSL_TYPE_FLOAT;
558 op = ir_unop_f162f;
559 break;
560 case GLSL_TYPE_INT16:
561 new_type = GLSL_TYPE_INT;
562 op = ir_unop_i2i;
563 break;
564 case GLSL_TYPE_UINT16:
565 new_type = GLSL_TYPE_UINT;
566 op = ir_unop_u2u;
567 break;
568 default:
569 unreachable("invalid type");
570 return NULL;
571 }
572 } else {
573 switch (type) {
574 case GLSL_TYPE_FLOAT:
575 new_type = GLSL_TYPE_FLOAT16;
576 op = ir_unop_f2fmp;
577 break;
578 case GLSL_TYPE_INT:
579 new_type = GLSL_TYPE_INT16;
580 op = ir_unop_i2imp;
581 break;
582 case GLSL_TYPE_UINT:
583 new_type = GLSL_TYPE_UINT16;
584 op = ir_unop_u2ump;
585 break;
586 default:
587 unreachable("invalid type");
588 return NULL;
589 }
590 }
591
592 const glsl_type *desired_type;
593 desired_type = glsl_type::get_instance(new_type,
594 ir->type->vector_elements,
595 ir->type->matrix_columns);
596
597 void *mem_ctx = ralloc_parent(ir);
598 return new(mem_ctx) ir_expression(op, desired_type, ir, NULL);
599 }
600
601 static glsl_base_type
602 lower_type(glsl_base_type type)
603 {
604 switch (type) {
605 case GLSL_TYPE_FLOAT:
606 return GLSL_TYPE_FLOAT16;
607 case GLSL_TYPE_INT:
608 return GLSL_TYPE_INT16;
609 case GLSL_TYPE_UINT:
610 return GLSL_TYPE_UINT16;
611 default:
612 unreachable("invalid type");
613 return GLSL_TYPE_ERROR;;
614 }
615 }
616
617 void
618 lower_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
619 {
620 ir_rvalue *ir = *rvalue;
621
622 if (ir == NULL)
623 return;
624
625 if (ir->as_dereference()) {
626 if (!ir->type->is_boolean())
627 *rvalue = convert_precision(ir->type->base_type, false, ir);
628 } else if (ir->type->base_type == GLSL_TYPE_FLOAT ||
629 ir->type->base_type == GLSL_TYPE_INT ||
630 ir->type->base_type == GLSL_TYPE_UINT) {
631 ir->type = glsl_type::get_instance(lower_type(ir->type->base_type),
632 ir->type->vector_elements,
633 ir->type->matrix_columns,
634 ir->type->explicit_stride,
635 ir->type->interface_row_major);
636
637 ir_constant *const_ir = ir->as_constant();
638
639 if (const_ir) {
640 ir_constant_data value;
641
642 if (ir->type->base_type == GLSL_TYPE_FLOAT16) {
643 for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++)
644 value.f16[i] = _mesa_float_to_half(const_ir->value.f[i]);
645 } else if (ir->type->base_type == GLSL_TYPE_INT16) {
646 for (unsigned i = 0; i < ARRAY_SIZE(value.i16); i++)
647 value.i16[i] = const_ir->value.i[i];
648 } else if (ir->type->base_type == GLSL_TYPE_UINT16) {
649 for (unsigned i = 0; i < ARRAY_SIZE(value.u16); i++)
650 value.u16[i] = const_ir->value.u[i];
651 } else {
652 unreachable("invalid type");
653 }
654
655 const_ir->value = value;
656 }
657 }
658 }
659
660 ir_visitor_status
661 lower_precision_visitor::visit_enter(ir_dereference_record *ir)
662 {
663 /* We don’t want to lower the variable */
664 return visit_continue_with_parent;
665 }
666
667 ir_visitor_status
668 lower_precision_visitor::visit_enter(ir_dereference_array *ir)
669 {
670 /* We don’t want to convert the array index or the variable. If the array
671 * index itself is lowerable that will be handled separately.
672 */
673 return visit_continue_with_parent;
674 }
675
676 ir_visitor_status
677 lower_precision_visitor::visit_enter(ir_call *ir)
678 {
679 /* We don’t want to convert the arguments. These will be handled separately.
680 */
681 return visit_continue_with_parent;
682 }
683
684 ir_visitor_status
685 lower_precision_visitor::visit_enter(ir_texture *ir)
686 {
687 /* We don’t want to convert the arguments. These will be handled separately.
688 */
689 return visit_continue_with_parent;
690 }
691
692 ir_visitor_status
693 lower_precision_visitor::visit_leave(ir_expression *ir)
694 {
695 ir_rvalue_visitor::visit_leave(ir);
696
697 /* If the expression is a conversion operation to or from bool then fix the
698 * operation.
699 */
700 switch (ir->operation) {
701 case ir_unop_b2f:
702 ir->operation = ir_unop_b2f16;
703 break;
704 case ir_unop_f2b:
705 ir->operation = ir_unop_f162b;
706 break;
707 case ir_unop_b2i:
708 case ir_unop_i2b:
709 /* Nothing to do - they both support int16. */
710 break;
711 default:
712 break;
713 }
714
715 return visit_continue;
716 }
717
718 void
719 find_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
720 {
721 /* Checking the precision of rvalue can be lowered first throughout
722 * find_lowerable_rvalues_visitor.
723 * Once it found the precision of rvalue can be lowered, then we can
724 * add conversion f2fmp, etc. through lower_precision_visitor.
725 */
726 if (*rvalue == NULL)
727 return;
728
729 struct set_entry *entry = _mesa_set_search(lowerable_rvalues, *rvalue);
730
731 if (!entry)
732 return;
733
734 _mesa_set_remove(lowerable_rvalues, entry);
735
736 /* If the entire expression is just a variable dereference then trying to
737 * lower it will just directly add pointless to and from conversions without
738 * any actual operation in-between. Although these will eventually get
739 * optimised out, avoiding generating them here also avoids breaking inout
740 * parameters to functions.
741 */
742 if ((*rvalue)->as_dereference())
743 return;
744
745 lower_precision_visitor v;
746
747 (*rvalue)->accept(&v);
748 v.handle_rvalue(rvalue);
749
750 /* We don’t need to add the final conversion if the final type has been
751 * converted to bool
752 */
753 if ((*rvalue)->type->base_type != GLSL_TYPE_BOOL)
754 *rvalue = convert_precision((*rvalue)->type->base_type, true, *rvalue);
755
756 progress = true;
757 }
758
759 ir_visitor_status
760 find_precision_visitor::visit_enter(ir_call *ir)
761 {
762 ir_rvalue_enter_visitor::visit_enter(ir);
763
764 ir_variable *return_var =
765 ir->return_deref ? ir->return_deref->variable_referenced() : NULL;
766
767 /* Don't do anything for image_load here. We have only changed the return
768 * value to mediump/lowp, so that following instructions can use reduced
769 * precision.
770 *
771 * The return value type of the intrinsic itself isn't changed here, but
772 * can be changed in NIR if all users use the *2*mp opcode.
773 */
774 if (ir->callee->intrinsic_id == ir_intrinsic_image_load)
775 return visit_continue;
776
777 /* If this is a call to a builtin and the find_lowerable_rvalues_visitor
778 * overrode the precision of the temporary return variable, then we can
779 * replace the builtin implementation with a lowered version.
780 */
781
782 if (!ir->callee->is_builtin() ||
783 return_var == NULL ||
784 (return_var->data.precision != GLSL_PRECISION_MEDIUM &&
785 return_var->data.precision != GLSL_PRECISION_LOW))
786 return visit_continue;
787
788 ir->callee = map_builtin(ir->callee);
789 ir->generate_inline(ir);
790 ir->remove();
791
792 return visit_continue_with_parent;
793 }
794
795 ir_function_signature *
796 find_precision_visitor::map_builtin(ir_function_signature *sig)
797 {
798 if (lowered_builtins == NULL) {
799 lowered_builtins = _mesa_pointer_hash_table_create(NULL);
800 clone_ht =_mesa_pointer_hash_table_create(NULL);
801 lowered_builtin_mem_ctx = ralloc_context(NULL);
802 } else {
803 struct hash_entry *entry = _mesa_hash_table_search(lowered_builtins, sig);
804 if (entry)
805 return (ir_function_signature *) entry->data;
806 }
807
808 ir_function_signature *lowered_sig =
809 sig->clone(lowered_builtin_mem_ctx, clone_ht);
810
811 foreach_in_list(ir_variable, param, &lowered_sig->parameters) {
812 param->data.precision = GLSL_PRECISION_MEDIUM;
813 }
814
815 lower_precision(options, &lowered_sig->body);
816
817 _mesa_hash_table_clear(clone_ht, NULL);
818
819 _mesa_hash_table_insert(lowered_builtins, sig, lowered_sig);
820
821 return lowered_sig;
822 }
823
824 find_precision_visitor::find_precision_visitor(const struct gl_shader_compiler_options *options)
825 : progress(false),
826 lowerable_rvalues(_mesa_pointer_set_create(NULL)),
827 lowered_builtins(NULL),
828 clone_ht(NULL),
829 lowered_builtin_mem_ctx(NULL),
830 options(options)
831 {
832 }
833
834 find_precision_visitor::~find_precision_visitor()
835 {
836 _mesa_set_destroy(lowerable_rvalues, NULL);
837
838 if (lowered_builtins) {
839 _mesa_hash_table_destroy(lowered_builtins, NULL);
840 _mesa_hash_table_destroy(clone_ht, NULL);
841 ralloc_free(lowered_builtin_mem_ctx);
842 }
843 }
844
845 }
846
847 bool
848 lower_precision(const struct gl_shader_compiler_options *options,
849 exec_list *instructions)
850 {
851 find_precision_visitor v(options);
852
853 find_lowerable_rvalues(options, instructions, v.lowerable_rvalues);
854
855 visit_list_elements(&v, instructions);
856
857 return v.progress;
858 }