13ba6e0d47f51b62224040cbbe85ff9cd17c5641
[mesa.git] / src / compiler / glsl / lower_precision.cpp
1 /*
2 * Copyright © 2019 Google, Inc
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file lower_precision.cpp
26 */
27
28 #include "main/macros.h"
29 #include "main/mtypes.h"
30 #include "compiler/glsl_types.h"
31 #include "ir.h"
32 #include "ir_builder.h"
33 #include "ir_optimization.h"
34 #include "ir_rvalue_visitor.h"
35 #include "util/half_float.h"
36 #include "util/set.h"
37 #include "util/hash_table.h"
38 #include <vector>
39
40 namespace {
41
42 class find_precision_visitor : public ir_rvalue_enter_visitor {
43 public:
44 find_precision_visitor(const struct gl_shader_compiler_options *options);
45 ~find_precision_visitor();
46
47 virtual void handle_rvalue(ir_rvalue **rvalue);
48 virtual ir_visitor_status visit_enter(ir_call *ir);
49
50 ir_function_signature *map_builtin(ir_function_signature *sig);
51
52 /* Set of rvalues that can be lowered. This will be filled in by
53 * find_lowerable_rvalues_visitor. Only the root node of a lowerable section
54 * will be added to this set.
55 */
56 struct set *lowerable_rvalues;
57
58 /**
59 * A mapping of builtin signature functions to lowered versions. This is
60 * filled in lazily when a lowered version is needed.
61 */
62 struct hash_table *lowered_builtins;
63 /**
64 * A temporary hash table only used in order to clone functions.
65 */
66 struct hash_table *clone_ht;
67
68 void *lowered_builtin_mem_ctx;
69
70 const struct gl_shader_compiler_options *options;
71 };
72
73 class find_lowerable_rvalues_visitor : public ir_hierarchical_visitor {
74 public:
75 enum can_lower_state {
76 UNKNOWN,
77 CANT_LOWER,
78 SHOULD_LOWER,
79 };
80
81 enum parent_relation {
82 /* The parent performs a further operation involving the result from the
83 * child and can be lowered along with it.
84 */
85 COMBINED_OPERATION,
86 /* The parent instruction’s operation is independent of the child type so
87 * the child should be lowered separately.
88 */
89 INDEPENDENT_OPERATION,
90 };
91
92 struct stack_entry {
93 ir_instruction *instr;
94 enum can_lower_state state;
95 /* List of child rvalues that can be lowered. When this stack entry is
96 * popped, if this node itself can’t be lowered than all of the children
97 * are root nodes to lower so we will add them to lowerable_rvalues.
98 * Otherwise if this node can also be lowered then we won’t add the
99 * children because we only want to add the topmost lowerable nodes to
100 * lowerable_rvalues and the children will be lowered as part of lowering
101 * this node.
102 */
103 std::vector<ir_instruction *> lowerable_children;
104 };
105
106 find_lowerable_rvalues_visitor(struct set *result,
107 const struct gl_shader_compiler_options *options);
108
109 static void stack_enter(class ir_instruction *ir, void *data);
110 static void stack_leave(class ir_instruction *ir, void *data);
111
112 virtual ir_visitor_status visit(ir_constant *ir);
113 virtual ir_visitor_status visit(ir_dereference_variable *ir);
114
115 virtual ir_visitor_status visit_enter(ir_dereference_record *ir);
116 virtual ir_visitor_status visit_enter(ir_dereference_array *ir);
117 virtual ir_visitor_status visit_enter(ir_texture *ir);
118 virtual ir_visitor_status visit_enter(ir_expression *ir);
119
120 virtual ir_visitor_status visit_leave(ir_assignment *ir);
121 virtual ir_visitor_status visit_leave(ir_call *ir);
122
123 can_lower_state handle_precision(const glsl_type *type,
124 int precision) const;
125
126 static parent_relation get_parent_relation(ir_instruction *parent,
127 ir_instruction *child);
128
129 std::vector<stack_entry> stack;
130 struct set *lowerable_rvalues;
131 const struct gl_shader_compiler_options *options;
132
133 void pop_stack_entry();
134 void add_lowerable_children(const stack_entry &entry);
135 };
136
137 class lower_precision_visitor : public ir_rvalue_visitor {
138 public:
139 virtual void handle_rvalue(ir_rvalue **rvalue);
140 virtual ir_visitor_status visit_enter(ir_dereference_array *);
141 virtual ir_visitor_status visit_enter(ir_dereference_record *);
142 virtual ir_visitor_status visit_enter(ir_call *ir);
143 virtual ir_visitor_status visit_enter(ir_texture *ir);
144 virtual ir_visitor_status visit_leave(ir_expression *);
145 };
146
147 static bool
148 can_lower_type(const struct gl_shader_compiler_options *options,
149 const glsl_type *type)
150 {
151 /* Don’t lower any expressions involving non-float types except bool and
152 * texture samplers. This will rule out operations that change the type such
153 * as conversion to ints. Instead it will end up lowering the arguments
154 * instead and adding a final conversion to float32. We want to handle
155 * boolean types so that it will do comparisons as 16-bit.
156 */
157
158 switch (type->base_type) {
159 /* TODO: should we do anything for these two with regard to Int16 vs FP16
160 * support?
161 */
162 case GLSL_TYPE_BOOL:
163 case GLSL_TYPE_SAMPLER:
164 case GLSL_TYPE_IMAGE:
165 return true;
166
167 case GLSL_TYPE_FLOAT:
168 return options->LowerPrecisionFloat16;
169
170 case GLSL_TYPE_UINT:
171 case GLSL_TYPE_INT:
172 return options->LowerPrecisionInt16;
173
174 default:
175 return false;
176 }
177 }
178
179 find_lowerable_rvalues_visitor::find_lowerable_rvalues_visitor(struct set *res,
180 const struct gl_shader_compiler_options *opts)
181 {
182 lowerable_rvalues = res;
183 options = opts;
184 callback_enter = stack_enter;
185 callback_leave = stack_leave;
186 data_enter = this;
187 data_leave = this;
188 }
189
190 void
191 find_lowerable_rvalues_visitor::stack_enter(class ir_instruction *ir,
192 void *data)
193 {
194 find_lowerable_rvalues_visitor *state =
195 (find_lowerable_rvalues_visitor *) data;
196
197 /* Add a new stack entry for this instruction */
198 stack_entry entry;
199
200 entry.instr = ir;
201 entry.state = state->in_assignee ? CANT_LOWER : UNKNOWN;
202
203 state->stack.push_back(entry);
204 }
205
206 void
207 find_lowerable_rvalues_visitor::add_lowerable_children(const stack_entry &entry)
208 {
209 /* We can’t lower this node so if there were any pending children then they
210 * are all root lowerable nodes and we should add them to the set.
211 */
212 for (auto &it : entry.lowerable_children)
213 _mesa_set_add(lowerable_rvalues, it);
214 }
215
216 void
217 find_lowerable_rvalues_visitor::pop_stack_entry()
218 {
219 const stack_entry &entry = stack.back();
220
221 if (stack.size() >= 2) {
222 /* Combine this state into the parent state, unless the parent operation
223 * doesn’t have any relation to the child operations
224 */
225 stack_entry &parent = stack.end()[-2];
226 parent_relation rel = get_parent_relation(parent.instr, entry.instr);
227
228 if (rel == COMBINED_OPERATION) {
229 switch (entry.state) {
230 case CANT_LOWER:
231 parent.state = CANT_LOWER;
232 break;
233 case SHOULD_LOWER:
234 if (parent.state == UNKNOWN)
235 parent.state = SHOULD_LOWER;
236 break;
237 case UNKNOWN:
238 break;
239 }
240 }
241 }
242
243 if (entry.state == SHOULD_LOWER) {
244 ir_rvalue *rv = entry.instr->as_rvalue();
245
246 if (rv == NULL) {
247 add_lowerable_children(entry);
248 } else if (stack.size() >= 2) {
249 stack_entry &parent = stack.end()[-2];
250
251 switch (get_parent_relation(parent.instr, rv)) {
252 case COMBINED_OPERATION:
253 /* We only want to add the toplevel lowerable instructions to the
254 * lowerable set. Therefore if there is a parent then instead of
255 * adding this instruction to the set we will queue depending on
256 * the result of the parent instruction.
257 */
258 parent.lowerable_children.push_back(entry.instr);
259 break;
260 case INDEPENDENT_OPERATION:
261 _mesa_set_add(lowerable_rvalues, rv);
262 break;
263 }
264 } else {
265 /* This is a toplevel node so add it directly to the lowerable
266 * set.
267 */
268 _mesa_set_add(lowerable_rvalues, rv);
269 }
270 } else if (entry.state == CANT_LOWER) {
271 add_lowerable_children(entry);
272 }
273
274 stack.pop_back();
275 }
276
277 void
278 find_lowerable_rvalues_visitor::stack_leave(class ir_instruction *ir,
279 void *data)
280 {
281 find_lowerable_rvalues_visitor *state =
282 (find_lowerable_rvalues_visitor *) data;
283
284 state->pop_stack_entry();
285 }
286
287 enum find_lowerable_rvalues_visitor::can_lower_state
288 find_lowerable_rvalues_visitor::handle_precision(const glsl_type *type,
289 int precision) const
290 {
291 if (!can_lower_type(options, type))
292 return CANT_LOWER;
293
294 switch (precision) {
295 case GLSL_PRECISION_NONE:
296 return UNKNOWN;
297 case GLSL_PRECISION_HIGH:
298 return CANT_LOWER;
299 case GLSL_PRECISION_MEDIUM:
300 case GLSL_PRECISION_LOW:
301 return SHOULD_LOWER;
302 }
303
304 return CANT_LOWER;
305 }
306
307 enum find_lowerable_rvalues_visitor::parent_relation
308 find_lowerable_rvalues_visitor::get_parent_relation(ir_instruction *parent,
309 ir_instruction *child)
310 {
311 /* If the parent is a dereference instruction then the only child could be
312 * for example an array dereference and that should be lowered independently
313 * of the parent.
314 */
315 if (parent->as_dereference())
316 return INDEPENDENT_OPERATION;
317
318 /* The precision of texture sampling depend on the precision of the sampler.
319 * The rest of the arguments don’t matter so we can treat it as an
320 * independent operation.
321 */
322 if (parent->as_texture())
323 return INDEPENDENT_OPERATION;
324
325 return COMBINED_OPERATION;
326 }
327
328 ir_visitor_status
329 find_lowerable_rvalues_visitor::visit(ir_constant *ir)
330 {
331 stack_enter(ir, this);
332
333 if (!can_lower_type(options, ir->type))
334 stack.back().state = CANT_LOWER;
335
336 stack_leave(ir, this);
337
338 return visit_continue;
339 }
340
341 ir_visitor_status
342 find_lowerable_rvalues_visitor::visit(ir_dereference_variable *ir)
343 {
344 stack_enter(ir, this);
345
346 if (stack.back().state == UNKNOWN)
347 stack.back().state = handle_precision(ir->type, ir->precision());
348
349 stack_leave(ir, this);
350
351 return visit_continue;
352 }
353
354 ir_visitor_status
355 find_lowerable_rvalues_visitor::visit_enter(ir_dereference_record *ir)
356 {
357 ir_hierarchical_visitor::visit_enter(ir);
358
359 if (stack.back().state == UNKNOWN)
360 stack.back().state = handle_precision(ir->type, ir->precision());
361
362 return visit_continue;
363 }
364
365 ir_visitor_status
366 find_lowerable_rvalues_visitor::visit_enter(ir_dereference_array *ir)
367 {
368 ir_hierarchical_visitor::visit_enter(ir);
369
370 if (stack.back().state == UNKNOWN)
371 stack.back().state = handle_precision(ir->type, ir->precision());
372
373 return visit_continue;
374 }
375
376 ir_visitor_status
377 find_lowerable_rvalues_visitor::visit_enter(ir_texture *ir)
378 {
379 ir_hierarchical_visitor::visit_enter(ir);
380
381 if (stack.back().state == UNKNOWN) {
382 /* The precision of the sample value depends on the precision of the
383 * sampler.
384 */
385 stack.back().state = handle_precision(ir->type,
386 ir->sampler->precision());
387 }
388
389 return visit_continue;
390 }
391
392 ir_visitor_status
393 find_lowerable_rvalues_visitor::visit_enter(ir_expression *ir)
394 {
395 ir_hierarchical_visitor::visit_enter(ir);
396
397 if (!can_lower_type(options, ir->type))
398 stack.back().state = CANT_LOWER;
399
400 /* Don't lower precision for derivative calculations */
401 if (!options->LowerPrecisionDerivatives &&
402 (ir->operation == ir_unop_dFdx ||
403 ir->operation == ir_unop_dFdx_coarse ||
404 ir->operation == ir_unop_dFdx_fine ||
405 ir->operation == ir_unop_dFdy ||
406 ir->operation == ir_unop_dFdy_coarse ||
407 ir->operation == ir_unop_dFdy_fine)) {
408 stack.back().state = CANT_LOWER;
409 }
410
411 return visit_continue;
412 }
413
414 static bool
415 is_lowerable_builtin(ir_call *ir,
416 const struct set *lowerable_rvalues)
417 {
418 /* The intrinsic call is inside the wrapper imageLoad function that will
419 * be inlined. We have to handle both of them.
420 */
421 if (ir->callee->intrinsic_id == ir_intrinsic_image_load ||
422 (ir->callee->is_builtin() &&
423 !strcmp(ir->callee_name(), "imageLoad"))) {
424 ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head();
425 ir_variable *resource = param->variable_referenced();
426
427 assert(ir->callee->return_precision == GLSL_PRECISION_NONE);
428 assert(resource->type->without_array()->is_image());
429
430 /* GLSL ES 3.20 requires that images have a precision modifier, but if
431 * you set one, it doesn't do anything, because all intrinsics are
432 * defined with highp. This seems to be a spec bug.
433 *
434 * In theory we could set the return value to mediump if the image
435 * format has a lower precision. This appears to be the most sensible
436 * thing to do.
437 */
438 const struct util_format_description *desc =
439 util_format_description(resource->data.image_format);
440 unsigned i =
441 util_format_get_first_non_void_channel(resource->data.image_format);
442
443 if (desc->channel[i].pure_integer ||
444 desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
445 return desc->channel[i].size <= 16;
446 else
447 return desc->channel[i].size <= 10; /* unorm/snorm */
448 }
449
450 /* Handle special calls. */
451 if (ir->callee->is_builtin() && ir->actual_parameters.length()) {
452 ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head();
453 ir_variable *var = param->variable_referenced();
454
455 /* Handle builtin wrappers around ir_texture opcodes. These wrappers will
456 * be inlined by lower_precision() if we return true here, so that we can
457 * get to ir_texture later and do proper lowering.
458 *
459 * We should lower the type of the return value if the sampler type
460 * uses lower precision. The function parameters don't matter.
461 */
462 if (var && var->type->without_array()->is_sampler()) {
463 return var->data.precision == GLSL_PRECISION_MEDIUM ||
464 var->data.precision == GLSL_PRECISION_LOW;
465 }
466 }
467
468 if (!ir->callee->is_builtin())
469 return false;
470
471 assert(ir->callee->return_precision == GLSL_PRECISION_NONE);
472
473 foreach_in_list(ir_rvalue, param, &ir->actual_parameters) {
474 if (!param->as_constant() &&
475 _mesa_set_search(lowerable_rvalues, param) == NULL)
476 return false;
477 }
478
479 return true;
480 }
481
482 ir_visitor_status
483 find_lowerable_rvalues_visitor::visit_leave(ir_call *ir)
484 {
485 ir_hierarchical_visitor::visit_leave(ir);
486
487 /* Special case for handling temporary variables generated by the compiler
488 * for function calls. If we assign to one of these using a function call
489 * that has a lowerable return type then we can assume the temporary
490 * variable should have a medium precision too.
491 */
492
493 /* Do nothing if the return type is void. */
494 if (!ir->return_deref)
495 return visit_continue;
496
497 ir_variable *var = ir->return_deref->variable_referenced();
498
499 assert(var->data.mode == ir_var_temporary);
500
501 unsigned return_precision = ir->callee->return_precision;
502
503 /* If the call is to a builtin, then the function won’t have a return
504 * precision and we should determine it from the precision of the arguments.
505 */
506 if (is_lowerable_builtin(ir, lowerable_rvalues))
507 return_precision = GLSL_PRECISION_MEDIUM;
508
509 can_lower_state lower_state =
510 handle_precision(var->type, return_precision);
511
512 if (lower_state == SHOULD_LOWER) {
513 /* There probably shouldn’t be any situations where multiple ir_call
514 * instructions write to the same temporary?
515 */
516 assert(var->data.precision == GLSL_PRECISION_NONE);
517 var->data.precision = GLSL_PRECISION_MEDIUM;
518 } else {
519 var->data.precision = GLSL_PRECISION_HIGH;
520 }
521
522 return visit_continue;
523 }
524
525 ir_visitor_status
526 find_lowerable_rvalues_visitor::visit_leave(ir_assignment *ir)
527 {
528 ir_hierarchical_visitor::visit_leave(ir);
529
530 /* Special case for handling temporary variables generated by the compiler.
531 * If we assign to one of these using a lowered precision then we can assume
532 * the temporary variable should have a medium precision too.
533 */
534 ir_variable *var = ir->lhs->variable_referenced();
535
536 if (var->data.mode == ir_var_temporary) {
537 if (_mesa_set_search(lowerable_rvalues, ir->rhs)) {
538 /* Only override the precision if this is the first assignment. For
539 * temporaries such as the ones generated for the ?: operator there
540 * can be multiple assignments with different precisions. This way we
541 * get the highest precision of all of the assignments.
542 */
543 if (var->data.precision == GLSL_PRECISION_NONE)
544 var->data.precision = GLSL_PRECISION_MEDIUM;
545 } else if (!ir->rhs->as_constant()) {
546 var->data.precision = GLSL_PRECISION_HIGH;
547 }
548 }
549
550 return visit_continue;
551 }
552
553 void
554 find_lowerable_rvalues(const struct gl_shader_compiler_options *options,
555 exec_list *instructions,
556 struct set *result)
557 {
558 find_lowerable_rvalues_visitor v(result, options);
559
560 visit_list_elements(&v, instructions);
561
562 assert(v.stack.empty());
563 }
564
565 static const glsl_type *
566 lower_glsl_type(const glsl_type *type)
567 {
568 glsl_base_type new_base_type;
569
570 switch (type->base_type) {
571 case GLSL_TYPE_FLOAT:
572 new_base_type = GLSL_TYPE_FLOAT16;
573 break;
574 case GLSL_TYPE_INT:
575 new_base_type = GLSL_TYPE_INT16;
576 break;
577 case GLSL_TYPE_UINT:
578 new_base_type = GLSL_TYPE_UINT16;
579 break;
580 default:
581 unreachable("invalid type");
582 return NULL;
583 }
584
585 return glsl_type::get_instance(new_base_type,
586 type->vector_elements,
587 type->matrix_columns,
588 type->explicit_stride,
589 type->interface_row_major);
590 }
591
592 static ir_rvalue *
593 convert_precision(glsl_base_type type, bool up, ir_rvalue *ir)
594 {
595 unsigned new_type, op;
596
597 if (up) {
598 switch (type) {
599 case GLSL_TYPE_FLOAT16:
600 new_type = GLSL_TYPE_FLOAT;
601 op = ir_unop_f162f;
602 break;
603 case GLSL_TYPE_INT16:
604 new_type = GLSL_TYPE_INT;
605 op = ir_unop_i2i;
606 break;
607 case GLSL_TYPE_UINT16:
608 new_type = GLSL_TYPE_UINT;
609 op = ir_unop_u2u;
610 break;
611 default:
612 unreachable("invalid type");
613 return NULL;
614 }
615 } else {
616 switch (type) {
617 case GLSL_TYPE_FLOAT:
618 new_type = GLSL_TYPE_FLOAT16;
619 op = ir_unop_f2fmp;
620 break;
621 case GLSL_TYPE_INT:
622 new_type = GLSL_TYPE_INT16;
623 op = ir_unop_i2imp;
624 break;
625 case GLSL_TYPE_UINT:
626 new_type = GLSL_TYPE_UINT16;
627 op = ir_unop_u2ump;
628 break;
629 default:
630 unreachable("invalid type");
631 return NULL;
632 }
633 }
634
635 const glsl_type *desired_type;
636 desired_type = glsl_type::get_instance(new_type,
637 ir->type->vector_elements,
638 ir->type->matrix_columns);
639
640 void *mem_ctx = ralloc_parent(ir);
641 return new(mem_ctx) ir_expression(op, desired_type, ir, NULL);
642 }
643
644 void
645 lower_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
646 {
647 ir_rvalue *ir = *rvalue;
648
649 if (ir == NULL)
650 return;
651
652 if (ir->as_dereference()) {
653 if (!ir->type->is_boolean())
654 *rvalue = convert_precision(ir->type->base_type, false, ir);
655 } else if (ir->type->base_type == GLSL_TYPE_FLOAT ||
656 ir->type->base_type == GLSL_TYPE_INT ||
657 ir->type->base_type == GLSL_TYPE_UINT) {
658 ir->type = lower_glsl_type(ir->type);
659
660 ir_constant *const_ir = ir->as_constant();
661
662 if (const_ir) {
663 ir_constant_data value;
664
665 if (ir->type->base_type == GLSL_TYPE_FLOAT16) {
666 for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++)
667 value.f16[i] = _mesa_float_to_half(const_ir->value.f[i]);
668 } else if (ir->type->base_type == GLSL_TYPE_INT16) {
669 for (unsigned i = 0; i < ARRAY_SIZE(value.i16); i++)
670 value.i16[i] = const_ir->value.i[i];
671 } else if (ir->type->base_type == GLSL_TYPE_UINT16) {
672 for (unsigned i = 0; i < ARRAY_SIZE(value.u16); i++)
673 value.u16[i] = const_ir->value.u[i];
674 } else {
675 unreachable("invalid type");
676 }
677
678 const_ir->value = value;
679 }
680 }
681 }
682
683 ir_visitor_status
684 lower_precision_visitor::visit_enter(ir_dereference_record *ir)
685 {
686 /* We don’t want to lower the variable */
687 return visit_continue_with_parent;
688 }
689
690 ir_visitor_status
691 lower_precision_visitor::visit_enter(ir_dereference_array *ir)
692 {
693 /* We don’t want to convert the array index or the variable. If the array
694 * index itself is lowerable that will be handled separately.
695 */
696 return visit_continue_with_parent;
697 }
698
699 ir_visitor_status
700 lower_precision_visitor::visit_enter(ir_call *ir)
701 {
702 /* We don’t want to convert the arguments. These will be handled separately.
703 */
704 return visit_continue_with_parent;
705 }
706
707 ir_visitor_status
708 lower_precision_visitor::visit_enter(ir_texture *ir)
709 {
710 /* We don’t want to convert the arguments. These will be handled separately.
711 */
712 return visit_continue_with_parent;
713 }
714
715 ir_visitor_status
716 lower_precision_visitor::visit_leave(ir_expression *ir)
717 {
718 ir_rvalue_visitor::visit_leave(ir);
719
720 /* If the expression is a conversion operation to or from bool then fix the
721 * operation.
722 */
723 switch (ir->operation) {
724 case ir_unop_b2f:
725 ir->operation = ir_unop_b2f16;
726 break;
727 case ir_unop_f2b:
728 ir->operation = ir_unop_f162b;
729 break;
730 case ir_unop_b2i:
731 case ir_unop_i2b:
732 /* Nothing to do - they both support int16. */
733 break;
734 default:
735 break;
736 }
737
738 return visit_continue;
739 }
740
741 void
742 find_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
743 {
744 /* Checking the precision of rvalue can be lowered first throughout
745 * find_lowerable_rvalues_visitor.
746 * Once it found the precision of rvalue can be lowered, then we can
747 * add conversion f2fmp, etc. through lower_precision_visitor.
748 */
749 if (*rvalue == NULL)
750 return;
751
752 struct set_entry *entry = _mesa_set_search(lowerable_rvalues, *rvalue);
753
754 if (!entry)
755 return;
756
757 _mesa_set_remove(lowerable_rvalues, entry);
758
759 /* If the entire expression is just a variable dereference then trying to
760 * lower it will just directly add pointless to and from conversions without
761 * any actual operation in-between. Although these will eventually get
762 * optimised out, avoiding generating them here also avoids breaking inout
763 * parameters to functions.
764 */
765 if ((*rvalue)->as_dereference())
766 return;
767
768 lower_precision_visitor v;
769
770 (*rvalue)->accept(&v);
771 v.handle_rvalue(rvalue);
772
773 /* We don’t need to add the final conversion if the final type has been
774 * converted to bool
775 */
776 if ((*rvalue)->type->base_type != GLSL_TYPE_BOOL)
777 *rvalue = convert_precision((*rvalue)->type->base_type, true, *rvalue);
778 }
779
780 ir_visitor_status
781 find_precision_visitor::visit_enter(ir_call *ir)
782 {
783 ir_rvalue_enter_visitor::visit_enter(ir);
784
785 ir_variable *return_var =
786 ir->return_deref ? ir->return_deref->variable_referenced() : NULL;
787
788 /* Don't do anything for image_load here. We have only changed the return
789 * value to mediump/lowp, so that following instructions can use reduced
790 * precision.
791 *
792 * The return value type of the intrinsic itself isn't changed here, but
793 * can be changed in NIR if all users use the *2*mp opcode.
794 */
795 if (ir->callee->intrinsic_id == ir_intrinsic_image_load)
796 return visit_continue;
797
798 /* If this is a call to a builtin and the find_lowerable_rvalues_visitor
799 * overrode the precision of the temporary return variable, then we can
800 * replace the builtin implementation with a lowered version.
801 */
802
803 if (!ir->callee->is_builtin() ||
804 return_var == NULL ||
805 (return_var->data.precision != GLSL_PRECISION_MEDIUM &&
806 return_var->data.precision != GLSL_PRECISION_LOW))
807 return visit_continue;
808
809 ir->callee = map_builtin(ir->callee);
810 ir->generate_inline(ir);
811 ir->remove();
812
813 return visit_continue_with_parent;
814 }
815
816 ir_function_signature *
817 find_precision_visitor::map_builtin(ir_function_signature *sig)
818 {
819 if (lowered_builtins == NULL) {
820 lowered_builtins = _mesa_pointer_hash_table_create(NULL);
821 clone_ht =_mesa_pointer_hash_table_create(NULL);
822 lowered_builtin_mem_ctx = ralloc_context(NULL);
823 } else {
824 struct hash_entry *entry = _mesa_hash_table_search(lowered_builtins, sig);
825 if (entry)
826 return (ir_function_signature *) entry->data;
827 }
828
829 ir_function_signature *lowered_sig =
830 sig->clone(lowered_builtin_mem_ctx, clone_ht);
831
832 foreach_in_list(ir_variable, param, &lowered_sig->parameters) {
833 param->data.precision = GLSL_PRECISION_MEDIUM;
834 }
835
836 lower_precision(options, &lowered_sig->body);
837
838 _mesa_hash_table_clear(clone_ht, NULL);
839
840 _mesa_hash_table_insert(lowered_builtins, sig, lowered_sig);
841
842 return lowered_sig;
843 }
844
845 find_precision_visitor::find_precision_visitor(const struct gl_shader_compiler_options *options)
846 : lowerable_rvalues(_mesa_pointer_set_create(NULL)),
847 lowered_builtins(NULL),
848 clone_ht(NULL),
849 lowered_builtin_mem_ctx(NULL),
850 options(options)
851 {
852 }
853
854 find_precision_visitor::~find_precision_visitor()
855 {
856 _mesa_set_destroy(lowerable_rvalues, NULL);
857
858 if (lowered_builtins) {
859 _mesa_hash_table_destroy(lowered_builtins, NULL);
860 _mesa_hash_table_destroy(clone_ht, NULL);
861 ralloc_free(lowered_builtin_mem_ctx);
862 }
863 }
864
865 }
866
867 void
868 lower_precision(const struct gl_shader_compiler_options *options,
869 exec_list *instructions)
870 {
871 find_precision_visitor v(options);
872
873 find_lowerable_rvalues(options, instructions, v.lowerable_rvalues);
874
875 visit_list_elements(&v, instructions);
876 }