glsl: convert reusable lower_precision util code into helper functions
[mesa.git] / src / compiler / glsl / lower_precision.cpp
1 /*
2 * Copyright © 2019 Google, Inc
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file lower_precision.cpp
26 */
27
28 #include "main/macros.h"
29 #include "main/mtypes.h"
30 #include "compiler/glsl_types.h"
31 #include "ir.h"
32 #include "ir_builder.h"
33 #include "ir_optimization.h"
34 #include "ir_rvalue_visitor.h"
35 #include "util/half_float.h"
36 #include "util/set.h"
37 #include "util/hash_table.h"
38 #include <vector>
39
40 namespace {
41
42 class find_precision_visitor : public ir_rvalue_enter_visitor {
43 public:
44 find_precision_visitor(const struct gl_shader_compiler_options *options);
45 ~find_precision_visitor();
46
47 virtual void handle_rvalue(ir_rvalue **rvalue);
48 virtual ir_visitor_status visit_enter(ir_call *ir);
49
50 ir_function_signature *map_builtin(ir_function_signature *sig);
51
52 bool progress;
53
54 /* Set of rvalues that can be lowered. This will be filled in by
55 * find_lowerable_rvalues_visitor. Only the root node of a lowerable section
56 * will be added to this set.
57 */
58 struct set *lowerable_rvalues;
59
60 /**
61 * A mapping of builtin signature functions to lowered versions. This is
62 * filled in lazily when a lowered version is needed.
63 */
64 struct hash_table *lowered_builtins;
65 /**
66 * A temporary hash table only used in order to clone functions.
67 */
68 struct hash_table *clone_ht;
69
70 void *lowered_builtin_mem_ctx;
71
72 const struct gl_shader_compiler_options *options;
73 };
74
75 class find_lowerable_rvalues_visitor : public ir_hierarchical_visitor {
76 public:
77 enum can_lower_state {
78 UNKNOWN,
79 CANT_LOWER,
80 SHOULD_LOWER,
81 };
82
83 enum parent_relation {
84 /* The parent performs a further operation involving the result from the
85 * child and can be lowered along with it.
86 */
87 COMBINED_OPERATION,
88 /* The parent instruction’s operation is independent of the child type so
89 * the child should be lowered separately.
90 */
91 INDEPENDENT_OPERATION,
92 };
93
94 struct stack_entry {
95 ir_instruction *instr;
96 enum can_lower_state state;
97 /* List of child rvalues that can be lowered. When this stack entry is
98 * popped, if this node itself can’t be lowered than all of the children
99 * are root nodes to lower so we will add them to lowerable_rvalues.
100 * Otherwise if this node can also be lowered then we won’t add the
101 * children because we only want to add the topmost lowerable nodes to
102 * lowerable_rvalues and the children will be lowered as part of lowering
103 * this node.
104 */
105 std::vector<ir_instruction *> lowerable_children;
106 };
107
108 find_lowerable_rvalues_visitor(struct set *result,
109 const struct gl_shader_compiler_options *options);
110
111 static void stack_enter(class ir_instruction *ir, void *data);
112 static void stack_leave(class ir_instruction *ir, void *data);
113
114 virtual ir_visitor_status visit(ir_constant *ir);
115 virtual ir_visitor_status visit(ir_dereference_variable *ir);
116
117 virtual ir_visitor_status visit_enter(ir_dereference_record *ir);
118 virtual ir_visitor_status visit_enter(ir_dereference_array *ir);
119 virtual ir_visitor_status visit_enter(ir_texture *ir);
120 virtual ir_visitor_status visit_enter(ir_expression *ir);
121
122 virtual ir_visitor_status visit_leave(ir_assignment *ir);
123 virtual ir_visitor_status visit_leave(ir_call *ir);
124
125 can_lower_state handle_precision(const glsl_type *type,
126 int precision) const;
127
128 static parent_relation get_parent_relation(ir_instruction *parent,
129 ir_instruction *child);
130
131 std::vector<stack_entry> stack;
132 struct set *lowerable_rvalues;
133 const struct gl_shader_compiler_options *options;
134
135 void pop_stack_entry();
136 void add_lowerable_children(const stack_entry &entry);
137 };
138
139 class lower_precision_visitor : public ir_rvalue_visitor {
140 public:
141 virtual void handle_rvalue(ir_rvalue **rvalue);
142 virtual ir_visitor_status visit_enter(ir_dereference_array *);
143 virtual ir_visitor_status visit_enter(ir_dereference_record *);
144 virtual ir_visitor_status visit_enter(ir_call *ir);
145 virtual ir_visitor_status visit_enter(ir_texture *ir);
146 virtual ir_visitor_status visit_leave(ir_expression *);
147 };
148
149 static bool
150 can_lower_type(const struct gl_shader_compiler_options *options,
151 const glsl_type *type)
152 {
153 /* Don’t lower any expressions involving non-float types except bool and
154 * texture samplers. This will rule out operations that change the type such
155 * as conversion to ints. Instead it will end up lowering the arguments
156 * instead and adding a final conversion to float32. We want to handle
157 * boolean types so that it will do comparisons as 16-bit.
158 */
159
160 switch (type->base_type) {
161 /* TODO: should we do anything for these two with regard to Int16 vs FP16
162 * support?
163 */
164 case GLSL_TYPE_BOOL:
165 case GLSL_TYPE_SAMPLER:
166 case GLSL_TYPE_IMAGE:
167 return true;
168
169 case GLSL_TYPE_FLOAT:
170 return options->LowerPrecisionFloat16;
171
172 case GLSL_TYPE_UINT:
173 case GLSL_TYPE_INT:
174 return options->LowerPrecisionInt16;
175
176 default:
177 return false;
178 }
179 }
180
181 find_lowerable_rvalues_visitor::find_lowerable_rvalues_visitor(struct set *res,
182 const struct gl_shader_compiler_options *opts)
183 {
184 lowerable_rvalues = res;
185 options = opts;
186 callback_enter = stack_enter;
187 callback_leave = stack_leave;
188 data_enter = this;
189 data_leave = this;
190 }
191
192 void
193 find_lowerable_rvalues_visitor::stack_enter(class ir_instruction *ir,
194 void *data)
195 {
196 find_lowerable_rvalues_visitor *state =
197 (find_lowerable_rvalues_visitor *) data;
198
199 /* Add a new stack entry for this instruction */
200 stack_entry entry;
201
202 entry.instr = ir;
203 entry.state = state->in_assignee ? CANT_LOWER : UNKNOWN;
204
205 state->stack.push_back(entry);
206 }
207
208 void
209 find_lowerable_rvalues_visitor::add_lowerable_children(const stack_entry &entry)
210 {
211 /* We can’t lower this node so if there were any pending children then they
212 * are all root lowerable nodes and we should add them to the set.
213 */
214 for (auto &it : entry.lowerable_children)
215 _mesa_set_add(lowerable_rvalues, it);
216 }
217
218 void
219 find_lowerable_rvalues_visitor::pop_stack_entry()
220 {
221 const stack_entry &entry = stack.back();
222
223 if (stack.size() >= 2) {
224 /* Combine this state into the parent state, unless the parent operation
225 * doesn’t have any relation to the child operations
226 */
227 stack_entry &parent = stack.end()[-2];
228 parent_relation rel = get_parent_relation(parent.instr, entry.instr);
229
230 if (rel == COMBINED_OPERATION) {
231 switch (entry.state) {
232 case CANT_LOWER:
233 parent.state = CANT_LOWER;
234 break;
235 case SHOULD_LOWER:
236 if (parent.state == UNKNOWN)
237 parent.state = SHOULD_LOWER;
238 break;
239 case UNKNOWN:
240 break;
241 }
242 }
243 }
244
245 if (entry.state == SHOULD_LOWER) {
246 ir_rvalue *rv = entry.instr->as_rvalue();
247
248 if (rv == NULL) {
249 add_lowerable_children(entry);
250 } else if (stack.size() >= 2) {
251 stack_entry &parent = stack.end()[-2];
252
253 switch (get_parent_relation(parent.instr, rv)) {
254 case COMBINED_OPERATION:
255 /* We only want to add the toplevel lowerable instructions to the
256 * lowerable set. Therefore if there is a parent then instead of
257 * adding this instruction to the set we will queue depending on
258 * the result of the parent instruction.
259 */
260 parent.lowerable_children.push_back(entry.instr);
261 break;
262 case INDEPENDENT_OPERATION:
263 _mesa_set_add(lowerable_rvalues, rv);
264 break;
265 }
266 } else {
267 /* This is a toplevel node so add it directly to the lowerable
268 * set.
269 */
270 _mesa_set_add(lowerable_rvalues, rv);
271 }
272 } else if (entry.state == CANT_LOWER) {
273 add_lowerable_children(entry);
274 }
275
276 stack.pop_back();
277 }
278
279 void
280 find_lowerable_rvalues_visitor::stack_leave(class ir_instruction *ir,
281 void *data)
282 {
283 find_lowerable_rvalues_visitor *state =
284 (find_lowerable_rvalues_visitor *) data;
285
286 state->pop_stack_entry();
287 }
288
289 enum find_lowerable_rvalues_visitor::can_lower_state
290 find_lowerable_rvalues_visitor::handle_precision(const glsl_type *type,
291 int precision) const
292 {
293 if (!can_lower_type(options, type))
294 return CANT_LOWER;
295
296 switch (precision) {
297 case GLSL_PRECISION_NONE:
298 return UNKNOWN;
299 case GLSL_PRECISION_HIGH:
300 return CANT_LOWER;
301 case GLSL_PRECISION_MEDIUM:
302 case GLSL_PRECISION_LOW:
303 return SHOULD_LOWER;
304 }
305
306 return CANT_LOWER;
307 }
308
309 enum find_lowerable_rvalues_visitor::parent_relation
310 find_lowerable_rvalues_visitor::get_parent_relation(ir_instruction *parent,
311 ir_instruction *child)
312 {
313 /* If the parent is a dereference instruction then the only child could be
314 * for example an array dereference and that should be lowered independently
315 * of the parent.
316 */
317 if (parent->as_dereference())
318 return INDEPENDENT_OPERATION;
319
320 /* The precision of texture sampling depend on the precision of the sampler.
321 * The rest of the arguments don’t matter so we can treat it as an
322 * independent operation.
323 */
324 if (parent->as_texture())
325 return INDEPENDENT_OPERATION;
326
327 return COMBINED_OPERATION;
328 }
329
330 ir_visitor_status
331 find_lowerable_rvalues_visitor::visit(ir_constant *ir)
332 {
333 stack_enter(ir, this);
334
335 if (!can_lower_type(options, ir->type))
336 stack.back().state = CANT_LOWER;
337
338 stack_leave(ir, this);
339
340 return visit_continue;
341 }
342
343 ir_visitor_status
344 find_lowerable_rvalues_visitor::visit(ir_dereference_variable *ir)
345 {
346 stack_enter(ir, this);
347
348 if (stack.back().state == UNKNOWN)
349 stack.back().state = handle_precision(ir->type, ir->precision());
350
351 stack_leave(ir, this);
352
353 return visit_continue;
354 }
355
356 ir_visitor_status
357 find_lowerable_rvalues_visitor::visit_enter(ir_dereference_record *ir)
358 {
359 ir_hierarchical_visitor::visit_enter(ir);
360
361 if (stack.back().state == UNKNOWN)
362 stack.back().state = handle_precision(ir->type, ir->precision());
363
364 return visit_continue;
365 }
366
367 ir_visitor_status
368 find_lowerable_rvalues_visitor::visit_enter(ir_dereference_array *ir)
369 {
370 ir_hierarchical_visitor::visit_enter(ir);
371
372 if (stack.back().state == UNKNOWN)
373 stack.back().state = handle_precision(ir->type, ir->precision());
374
375 return visit_continue;
376 }
377
378 ir_visitor_status
379 find_lowerable_rvalues_visitor::visit_enter(ir_texture *ir)
380 {
381 ir_hierarchical_visitor::visit_enter(ir);
382
383 if (stack.back().state == UNKNOWN) {
384 /* The precision of the sample value depends on the precision of the
385 * sampler.
386 */
387 stack.back().state = handle_precision(ir->type,
388 ir->sampler->precision());
389 }
390
391 return visit_continue;
392 }
393
394 ir_visitor_status
395 find_lowerable_rvalues_visitor::visit_enter(ir_expression *ir)
396 {
397 ir_hierarchical_visitor::visit_enter(ir);
398
399 if (!can_lower_type(options, ir->type))
400 stack.back().state = CANT_LOWER;
401
402 /* Don't lower precision for derivative calculations */
403 if (!options->LowerPrecisionDerivatives &&
404 (ir->operation == ir_unop_dFdx ||
405 ir->operation == ir_unop_dFdx_coarse ||
406 ir->operation == ir_unop_dFdx_fine ||
407 ir->operation == ir_unop_dFdy ||
408 ir->operation == ir_unop_dFdy_coarse ||
409 ir->operation == ir_unop_dFdy_fine)) {
410 stack.back().state = CANT_LOWER;
411 }
412
413 return visit_continue;
414 }
415
416 static bool
417 is_lowerable_builtin(ir_call *ir,
418 const struct set *lowerable_rvalues)
419 {
420 /* The intrinsic call is inside the wrapper imageLoad function that will
421 * be inlined. We have to handle both of them.
422 */
423 if (ir->callee->intrinsic_id == ir_intrinsic_image_load ||
424 (ir->callee->is_builtin() &&
425 !strcmp(ir->callee_name(), "imageLoad"))) {
426 ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head();
427 ir_variable *resource = param->variable_referenced();
428
429 assert(ir->callee->return_precision == GLSL_PRECISION_NONE);
430 assert(resource->type->without_array()->is_image());
431
432 /* GLSL ES 3.20 requires that images have a precision modifier, but if
433 * you set one, it doesn't do anything, because all intrinsics are
434 * defined with highp. This seems to be a spec bug.
435 *
436 * In theory we could set the return value to mediump if the image
437 * format has a lower precision. This appears to be the most sensible
438 * thing to do.
439 */
440 const struct util_format_description *desc =
441 util_format_description(resource->data.image_format);
442 unsigned i =
443 util_format_get_first_non_void_channel(resource->data.image_format);
444
445 if (desc->channel[i].pure_integer ||
446 desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
447 return desc->channel[i].size <= 16;
448 else
449 return desc->channel[i].size <= 10; /* unorm/snorm */
450 }
451
452 /* Handle special calls. */
453 if (ir->callee->is_builtin() && ir->actual_parameters.length()) {
454 ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head();
455 ir_variable *var = param->variable_referenced();
456
457 /* Handle builtin wrappers around ir_texture opcodes. These wrappers will
458 * be inlined by lower_precision() if we return true here, so that we can
459 * get to ir_texture later and do proper lowering.
460 *
461 * We should lower the type of the return value if the sampler type
462 * uses lower precision. The function parameters don't matter.
463 */
464 if (var && var->type->without_array()->is_sampler()) {
465 return var->data.precision == GLSL_PRECISION_MEDIUM ||
466 var->data.precision == GLSL_PRECISION_LOW;
467 }
468 }
469
470 if (!ir->callee->is_builtin())
471 return false;
472
473 assert(ir->callee->return_precision == GLSL_PRECISION_NONE);
474
475 foreach_in_list(ir_rvalue, param, &ir->actual_parameters) {
476 if (!param->as_constant() &&
477 _mesa_set_search(lowerable_rvalues, param) == NULL)
478 return false;
479 }
480
481 return true;
482 }
483
484 ir_visitor_status
485 find_lowerable_rvalues_visitor::visit_leave(ir_call *ir)
486 {
487 ir_hierarchical_visitor::visit_leave(ir);
488
489 /* Special case for handling temporary variables generated by the compiler
490 * for function calls. If we assign to one of these using a function call
491 * that has a lowerable return type then we can assume the temporary
492 * variable should have a medium precision too.
493 */
494
495 /* Do nothing if the return type is void. */
496 if (!ir->return_deref)
497 return visit_continue;
498
499 ir_variable *var = ir->return_deref->variable_referenced();
500
501 assert(var->data.mode == ir_var_temporary);
502
503 unsigned return_precision = ir->callee->return_precision;
504
505 /* If the call is to a builtin, then the function won’t have a return
506 * precision and we should determine it from the precision of the arguments.
507 */
508 if (is_lowerable_builtin(ir, lowerable_rvalues))
509 return_precision = GLSL_PRECISION_MEDIUM;
510
511 can_lower_state lower_state =
512 handle_precision(var->type, return_precision);
513
514 if (lower_state == SHOULD_LOWER) {
515 /* There probably shouldn’t be any situations where multiple ir_call
516 * instructions write to the same temporary?
517 */
518 assert(var->data.precision == GLSL_PRECISION_NONE);
519 var->data.precision = GLSL_PRECISION_MEDIUM;
520 } else {
521 var->data.precision = GLSL_PRECISION_HIGH;
522 }
523
524 return visit_continue;
525 }
526
527 ir_visitor_status
528 find_lowerable_rvalues_visitor::visit_leave(ir_assignment *ir)
529 {
530 ir_hierarchical_visitor::visit_leave(ir);
531
532 /* Special case for handling temporary variables generated by the compiler.
533 * If we assign to one of these using a lowered precision then we can assume
534 * the temporary variable should have a medium precision too.
535 */
536 ir_variable *var = ir->lhs->variable_referenced();
537
538 if (var->data.mode == ir_var_temporary) {
539 if (_mesa_set_search(lowerable_rvalues, ir->rhs)) {
540 /* Only override the precision if this is the first assignment. For
541 * temporaries such as the ones generated for the ?: operator there
542 * can be multiple assignments with different precisions. This way we
543 * get the highest precision of all of the assignments.
544 */
545 if (var->data.precision == GLSL_PRECISION_NONE)
546 var->data.precision = GLSL_PRECISION_MEDIUM;
547 } else if (!ir->rhs->as_constant()) {
548 var->data.precision = GLSL_PRECISION_HIGH;
549 }
550 }
551
552 return visit_continue;
553 }
554
555 void
556 find_lowerable_rvalues(const struct gl_shader_compiler_options *options,
557 exec_list *instructions,
558 struct set *result)
559 {
560 find_lowerable_rvalues_visitor v(result, options);
561
562 visit_list_elements(&v, instructions);
563
564 assert(v.stack.empty());
565 }
566
567 static const glsl_type *
568 lower_glsl_type(const glsl_type *type)
569 {
570 glsl_base_type new_base_type;
571
572 switch (type->base_type) {
573 case GLSL_TYPE_FLOAT:
574 new_base_type = GLSL_TYPE_FLOAT16;
575 break;
576 case GLSL_TYPE_INT:
577 new_base_type = GLSL_TYPE_INT16;
578 break;
579 case GLSL_TYPE_UINT:
580 new_base_type = GLSL_TYPE_UINT16;
581 break;
582 default:
583 unreachable("invalid type");
584 return NULL;
585 }
586
587 return glsl_type::get_instance(new_base_type,
588 type->vector_elements,
589 type->matrix_columns,
590 type->explicit_stride,
591 type->interface_row_major);
592 }
593
594 static ir_rvalue *
595 convert_precision(glsl_base_type type, bool up, ir_rvalue *ir)
596 {
597 unsigned new_type, op;
598
599 if (up) {
600 switch (type) {
601 case GLSL_TYPE_FLOAT16:
602 new_type = GLSL_TYPE_FLOAT;
603 op = ir_unop_f162f;
604 break;
605 case GLSL_TYPE_INT16:
606 new_type = GLSL_TYPE_INT;
607 op = ir_unop_i2i;
608 break;
609 case GLSL_TYPE_UINT16:
610 new_type = GLSL_TYPE_UINT;
611 op = ir_unop_u2u;
612 break;
613 default:
614 unreachable("invalid type");
615 return NULL;
616 }
617 } else {
618 switch (type) {
619 case GLSL_TYPE_FLOAT:
620 new_type = GLSL_TYPE_FLOAT16;
621 op = ir_unop_f2fmp;
622 break;
623 case GLSL_TYPE_INT:
624 new_type = GLSL_TYPE_INT16;
625 op = ir_unop_i2imp;
626 break;
627 case GLSL_TYPE_UINT:
628 new_type = GLSL_TYPE_UINT16;
629 op = ir_unop_u2ump;
630 break;
631 default:
632 unreachable("invalid type");
633 return NULL;
634 }
635 }
636
637 const glsl_type *desired_type;
638 desired_type = glsl_type::get_instance(new_type,
639 ir->type->vector_elements,
640 ir->type->matrix_columns);
641
642 void *mem_ctx = ralloc_parent(ir);
643 return new(mem_ctx) ir_expression(op, desired_type, ir, NULL);
644 }
645
646 void
647 lower_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
648 {
649 ir_rvalue *ir = *rvalue;
650
651 if (ir == NULL)
652 return;
653
654 if (ir->as_dereference()) {
655 if (!ir->type->is_boolean())
656 *rvalue = convert_precision(ir->type->base_type, false, ir);
657 } else if (ir->type->base_type == GLSL_TYPE_FLOAT ||
658 ir->type->base_type == GLSL_TYPE_INT ||
659 ir->type->base_type == GLSL_TYPE_UINT) {
660 ir->type = lower_glsl_type(ir->type);
661
662 ir_constant *const_ir = ir->as_constant();
663
664 if (const_ir) {
665 ir_constant_data value;
666
667 if (ir->type->base_type == GLSL_TYPE_FLOAT16) {
668 for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++)
669 value.f16[i] = _mesa_float_to_half(const_ir->value.f[i]);
670 } else if (ir->type->base_type == GLSL_TYPE_INT16) {
671 for (unsigned i = 0; i < ARRAY_SIZE(value.i16); i++)
672 value.i16[i] = const_ir->value.i[i];
673 } else if (ir->type->base_type == GLSL_TYPE_UINT16) {
674 for (unsigned i = 0; i < ARRAY_SIZE(value.u16); i++)
675 value.u16[i] = const_ir->value.u[i];
676 } else {
677 unreachable("invalid type");
678 }
679
680 const_ir->value = value;
681 }
682 }
683 }
684
685 ir_visitor_status
686 lower_precision_visitor::visit_enter(ir_dereference_record *ir)
687 {
688 /* We don’t want to lower the variable */
689 return visit_continue_with_parent;
690 }
691
692 ir_visitor_status
693 lower_precision_visitor::visit_enter(ir_dereference_array *ir)
694 {
695 /* We don’t want to convert the array index or the variable. If the array
696 * index itself is lowerable that will be handled separately.
697 */
698 return visit_continue_with_parent;
699 }
700
701 ir_visitor_status
702 lower_precision_visitor::visit_enter(ir_call *ir)
703 {
704 /* We don’t want to convert the arguments. These will be handled separately.
705 */
706 return visit_continue_with_parent;
707 }
708
709 ir_visitor_status
710 lower_precision_visitor::visit_enter(ir_texture *ir)
711 {
712 /* We don’t want to convert the arguments. These will be handled separately.
713 */
714 return visit_continue_with_parent;
715 }
716
717 ir_visitor_status
718 lower_precision_visitor::visit_leave(ir_expression *ir)
719 {
720 ir_rvalue_visitor::visit_leave(ir);
721
722 /* If the expression is a conversion operation to or from bool then fix the
723 * operation.
724 */
725 switch (ir->operation) {
726 case ir_unop_b2f:
727 ir->operation = ir_unop_b2f16;
728 break;
729 case ir_unop_f2b:
730 ir->operation = ir_unop_f162b;
731 break;
732 case ir_unop_b2i:
733 case ir_unop_i2b:
734 /* Nothing to do - they both support int16. */
735 break;
736 default:
737 break;
738 }
739
740 return visit_continue;
741 }
742
743 void
744 find_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
745 {
746 /* Checking the precision of rvalue can be lowered first throughout
747 * find_lowerable_rvalues_visitor.
748 * Once it found the precision of rvalue can be lowered, then we can
749 * add conversion f2fmp, etc. through lower_precision_visitor.
750 */
751 if (*rvalue == NULL)
752 return;
753
754 struct set_entry *entry = _mesa_set_search(lowerable_rvalues, *rvalue);
755
756 if (!entry)
757 return;
758
759 _mesa_set_remove(lowerable_rvalues, entry);
760
761 /* If the entire expression is just a variable dereference then trying to
762 * lower it will just directly add pointless to and from conversions without
763 * any actual operation in-between. Although these will eventually get
764 * optimised out, avoiding generating them here also avoids breaking inout
765 * parameters to functions.
766 */
767 if ((*rvalue)->as_dereference())
768 return;
769
770 lower_precision_visitor v;
771
772 (*rvalue)->accept(&v);
773 v.handle_rvalue(rvalue);
774
775 /* We don’t need to add the final conversion if the final type has been
776 * converted to bool
777 */
778 if ((*rvalue)->type->base_type != GLSL_TYPE_BOOL)
779 *rvalue = convert_precision((*rvalue)->type->base_type, true, *rvalue);
780
781 progress = true;
782 }
783
784 ir_visitor_status
785 find_precision_visitor::visit_enter(ir_call *ir)
786 {
787 ir_rvalue_enter_visitor::visit_enter(ir);
788
789 ir_variable *return_var =
790 ir->return_deref ? ir->return_deref->variable_referenced() : NULL;
791
792 /* Don't do anything for image_load here. We have only changed the return
793 * value to mediump/lowp, so that following instructions can use reduced
794 * precision.
795 *
796 * The return value type of the intrinsic itself isn't changed here, but
797 * can be changed in NIR if all users use the *2*mp opcode.
798 */
799 if (ir->callee->intrinsic_id == ir_intrinsic_image_load)
800 return visit_continue;
801
802 /* If this is a call to a builtin and the find_lowerable_rvalues_visitor
803 * overrode the precision of the temporary return variable, then we can
804 * replace the builtin implementation with a lowered version.
805 */
806
807 if (!ir->callee->is_builtin() ||
808 return_var == NULL ||
809 (return_var->data.precision != GLSL_PRECISION_MEDIUM &&
810 return_var->data.precision != GLSL_PRECISION_LOW))
811 return visit_continue;
812
813 ir->callee = map_builtin(ir->callee);
814 ir->generate_inline(ir);
815 ir->remove();
816
817 return visit_continue_with_parent;
818 }
819
820 ir_function_signature *
821 find_precision_visitor::map_builtin(ir_function_signature *sig)
822 {
823 if (lowered_builtins == NULL) {
824 lowered_builtins = _mesa_pointer_hash_table_create(NULL);
825 clone_ht =_mesa_pointer_hash_table_create(NULL);
826 lowered_builtin_mem_ctx = ralloc_context(NULL);
827 } else {
828 struct hash_entry *entry = _mesa_hash_table_search(lowered_builtins, sig);
829 if (entry)
830 return (ir_function_signature *) entry->data;
831 }
832
833 ir_function_signature *lowered_sig =
834 sig->clone(lowered_builtin_mem_ctx, clone_ht);
835
836 foreach_in_list(ir_variable, param, &lowered_sig->parameters) {
837 param->data.precision = GLSL_PRECISION_MEDIUM;
838 }
839
840 lower_precision(options, &lowered_sig->body);
841
842 _mesa_hash_table_clear(clone_ht, NULL);
843
844 _mesa_hash_table_insert(lowered_builtins, sig, lowered_sig);
845
846 return lowered_sig;
847 }
848
849 find_precision_visitor::find_precision_visitor(const struct gl_shader_compiler_options *options)
850 : progress(false),
851 lowerable_rvalues(_mesa_pointer_set_create(NULL)),
852 lowered_builtins(NULL),
853 clone_ht(NULL),
854 lowered_builtin_mem_ctx(NULL),
855 options(options)
856 {
857 }
858
859 find_precision_visitor::~find_precision_visitor()
860 {
861 _mesa_set_destroy(lowerable_rvalues, NULL);
862
863 if (lowered_builtins) {
864 _mesa_hash_table_destroy(lowered_builtins, NULL);
865 _mesa_hash_table_destroy(clone_ht, NULL);
866 ralloc_free(lowered_builtin_mem_ctx);
867 }
868 }
869
870 }
871
872 bool
873 lower_precision(const struct gl_shader_compiler_options *options,
874 exec_list *instructions)
875 {
876 find_precision_visitor v(options);
877
878 find_lowerable_rvalues(options, instructions, v.lowerable_rvalues);
879
880 visit_list_elements(&v, instructions);
881
882 return v.progress;
883 }