spirv: add ReadClockKHR support with device scope
[mesa.git] / src / compiler / glsl / lower_precision.cpp
1 /*
2 * Copyright © 2019 Google, Inc
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file lower_precision.cpp
26 */
27
28 #include "main/macros.h"
29 #include "compiler/glsl_types.h"
30 #include "ir.h"
31 #include "ir_builder.h"
32 #include "ir_optimization.h"
33 #include "ir_rvalue_visitor.h"
34 #include "util/half_float.h"
35 #include "util/set.h"
36 #include "util/hash_table.h"
37 #include <vector>
38
39 namespace {
40
41 class find_precision_visitor : public ir_rvalue_enter_visitor {
42 public:
43 find_precision_visitor();
44 ~find_precision_visitor();
45
46 virtual void handle_rvalue(ir_rvalue **rvalue);
47 virtual ir_visitor_status visit_enter(ir_call *ir);
48
49 ir_function_signature *map_builtin(ir_function_signature *sig);
50
51 bool progress;
52
53 /* Set of rvalues that can be lowered. This will be filled in by
54 * find_lowerable_rvalues_visitor. Only the root node of a lowerable section
55 * will be added to this set.
56 */
57 struct set *lowerable_rvalues;
58
59 /**
60 * A mapping of builtin signature functions to lowered versions. This is
61 * filled in lazily when a lowered version is needed.
62 */
63 struct hash_table *lowered_builtins;
64 /**
65 * A temporary hash table only used in order to clone functions.
66 */
67 struct hash_table *clone_ht;
68
69 void *lowered_builtin_mem_ctx;
70 };
71
72 class find_lowerable_rvalues_visitor : public ir_hierarchical_visitor {
73 public:
74 enum can_lower_state {
75 UNKNOWN,
76 CANT_LOWER,
77 SHOULD_LOWER,
78 };
79
80 enum parent_relation {
81 /* The parent performs a further operation involving the result from the
82 * child and can be lowered along with it.
83 */
84 COMBINED_OPERATION,
85 /* The parent instruction’s operation is independent of the child type so
86 * the child should be lowered separately.
87 */
88 INDEPENDENT_OPERATION,
89 };
90
91 struct stack_entry {
92 ir_instruction *instr;
93 enum can_lower_state state;
94 /* List of child rvalues that can be lowered. When this stack entry is
95 * popped, if this node itself can’t be lowered than all of the children
96 * are root nodes to lower so we will add them to lowerable_rvalues.
97 * Otherwise if this node can also be lowered then we won’t add the
98 * children because we only want to add the topmost lowerable nodes to
99 * lowerable_rvalues and the children will be lowered as part of lowering
100 * this node.
101 */
102 std::vector<ir_instruction *> lowerable_children;
103 };
104
105 find_lowerable_rvalues_visitor(struct set *result);
106
107 static void stack_enter(class ir_instruction *ir, void *data);
108 static void stack_leave(class ir_instruction *ir, void *data);
109
110 virtual ir_visitor_status visit(ir_constant *ir);
111 virtual ir_visitor_status visit(ir_dereference_variable *ir);
112
113 virtual ir_visitor_status visit_enter(ir_dereference_record *ir);
114 virtual ir_visitor_status visit_enter(ir_dereference_array *ir);
115 virtual ir_visitor_status visit_enter(ir_texture *ir);
116 virtual ir_visitor_status visit_enter(ir_expression *ir);
117
118 virtual ir_visitor_status visit_leave(ir_assignment *ir);
119 virtual ir_visitor_status visit_leave(ir_call *ir);
120
121 static can_lower_state handle_precision(const glsl_type *type,
122 int precision);
123
124 static parent_relation get_parent_relation(ir_instruction *parent,
125 ir_instruction *child);
126
127 std::vector<stack_entry> stack;
128 struct set *lowerable_rvalues;
129
130 void pop_stack_entry();
131 void add_lowerable_children(const stack_entry &entry);
132 };
133
134 class lower_precision_visitor : public ir_rvalue_visitor {
135 public:
136 virtual void handle_rvalue(ir_rvalue **rvalue);
137 virtual ir_visitor_status visit_enter(ir_dereference_array *);
138 virtual ir_visitor_status visit_enter(ir_dereference_record *);
139 virtual ir_visitor_status visit_enter(ir_call *ir);
140 virtual ir_visitor_status visit_enter(ir_texture *ir);
141 virtual ir_visitor_status visit_leave(ir_expression *);
142 };
143
144 bool
145 can_lower_type(const glsl_type *type)
146 {
147 /* Don’t lower any expressions involving non-float types except bool and
148 * texture samplers. This will rule out operations that change the type such
149 * as conversion to ints. Instead it will end up lowering the arguments
150 * instead and adding a final conversion to float32. We want to handle
151 * boolean types so that it will do comparisons as 16-bit.
152 */
153
154 switch (type->base_type) {
155 case GLSL_TYPE_FLOAT:
156 case GLSL_TYPE_BOOL:
157 case GLSL_TYPE_SAMPLER:
158 return true;
159
160 default:
161 return false;
162 }
163 }
164
165 find_lowerable_rvalues_visitor::find_lowerable_rvalues_visitor(struct set *res)
166 {
167 lowerable_rvalues = res;
168 callback_enter = stack_enter;
169 callback_leave = stack_leave;
170 data_enter = this;
171 data_leave = this;
172 }
173
174 void
175 find_lowerable_rvalues_visitor::stack_enter(class ir_instruction *ir,
176 void *data)
177 {
178 find_lowerable_rvalues_visitor *state =
179 (find_lowerable_rvalues_visitor *) data;
180
181 /* Add a new stack entry for this instruction */
182 stack_entry entry;
183
184 entry.instr = ir;
185 entry.state = state->in_assignee ? CANT_LOWER : UNKNOWN;
186
187 state->stack.push_back(entry);
188 }
189
190 void
191 find_lowerable_rvalues_visitor::add_lowerable_children(const stack_entry &entry)
192 {
193 /* We can’t lower this node so if there were any pending children then they
194 * are all root lowerable nodes and we should add them to the set.
195 */
196 for (auto &it : entry.lowerable_children)
197 _mesa_set_add(lowerable_rvalues, it);
198 }
199
200 void
201 find_lowerable_rvalues_visitor::pop_stack_entry()
202 {
203 const stack_entry &entry = stack.back();
204
205 if (stack.size() >= 2) {
206 /* Combine this state into the parent state, unless the parent operation
207 * doesn’t have any relation to the child operations
208 */
209 stack_entry &parent = stack.end()[-2];
210 parent_relation rel = get_parent_relation(parent.instr, entry.instr);
211
212 if (rel == COMBINED_OPERATION) {
213 switch (entry.state) {
214 case CANT_LOWER:
215 parent.state = CANT_LOWER;
216 break;
217 case SHOULD_LOWER:
218 if (parent.state == UNKNOWN)
219 parent.state = SHOULD_LOWER;
220 break;
221 case UNKNOWN:
222 break;
223 }
224 }
225 }
226
227 if (entry.state == SHOULD_LOWER) {
228 ir_rvalue *rv = entry.instr->as_rvalue();
229
230 if (rv == NULL) {
231 add_lowerable_children(entry);
232 } else if (stack.size() >= 2) {
233 stack_entry &parent = stack.end()[-2];
234
235 switch (get_parent_relation(parent.instr, rv)) {
236 case COMBINED_OPERATION:
237 /* We only want to add the toplevel lowerable instructions to the
238 * lowerable set. Therefore if there is a parent then instead of
239 * adding this instruction to the set we will queue depending on
240 * the result of the parent instruction.
241 */
242 parent.lowerable_children.push_back(entry.instr);
243 break;
244 case INDEPENDENT_OPERATION:
245 _mesa_set_add(lowerable_rvalues, rv);
246 break;
247 }
248 } else {
249 /* This is a toplevel node so add it directly to the lowerable
250 * set.
251 */
252 _mesa_set_add(lowerable_rvalues, rv);
253 }
254 } else if (entry.state == CANT_LOWER) {
255 add_lowerable_children(entry);
256 }
257
258 stack.pop_back();
259 }
260
261 void
262 find_lowerable_rvalues_visitor::stack_leave(class ir_instruction *ir,
263 void *data)
264 {
265 find_lowerable_rvalues_visitor *state =
266 (find_lowerable_rvalues_visitor *) data;
267
268 state->pop_stack_entry();
269 }
270
271 enum find_lowerable_rvalues_visitor::can_lower_state
272 find_lowerable_rvalues_visitor::handle_precision(const glsl_type *type,
273 int precision)
274 {
275 if (!can_lower_type(type))
276 return CANT_LOWER;
277
278 switch (precision) {
279 case GLSL_PRECISION_NONE:
280 return UNKNOWN;
281 case GLSL_PRECISION_HIGH:
282 return CANT_LOWER;
283 case GLSL_PRECISION_MEDIUM:
284 case GLSL_PRECISION_LOW:
285 return SHOULD_LOWER;
286 }
287
288 return CANT_LOWER;
289 }
290
291 enum find_lowerable_rvalues_visitor::parent_relation
292 find_lowerable_rvalues_visitor::get_parent_relation(ir_instruction *parent,
293 ir_instruction *child)
294 {
295 /* If the parent is a dereference instruction then the only child could be
296 * for example an array dereference and that should be lowered independently
297 * of the parent.
298 */
299 if (parent->as_dereference())
300 return INDEPENDENT_OPERATION;
301
302 /* The precision of texture sampling depend on the precision of the sampler.
303 * The rest of the arguments don’t matter so we can treat it as an
304 * independent operation.
305 */
306 if (parent->as_texture())
307 return INDEPENDENT_OPERATION;
308
309 return COMBINED_OPERATION;
310 }
311
312 ir_visitor_status
313 find_lowerable_rvalues_visitor::visit(ir_constant *ir)
314 {
315 stack_enter(ir, this);
316
317 if (!can_lower_type(ir->type))
318 stack.back().state = CANT_LOWER;
319
320 stack_leave(ir, this);
321
322 return visit_continue;
323 }
324
325 ir_visitor_status
326 find_lowerable_rvalues_visitor::visit(ir_dereference_variable *ir)
327 {
328 stack_enter(ir, this);
329
330 if (stack.back().state == UNKNOWN)
331 stack.back().state = handle_precision(ir->type, ir->precision());
332
333 stack_leave(ir, this);
334
335 return visit_continue;
336 }
337
338 ir_visitor_status
339 find_lowerable_rvalues_visitor::visit_enter(ir_dereference_record *ir)
340 {
341 ir_hierarchical_visitor::visit_enter(ir);
342
343 if (stack.back().state == UNKNOWN)
344 stack.back().state = handle_precision(ir->type, ir->precision());
345
346 return visit_continue;
347 }
348
349 ir_visitor_status
350 find_lowerable_rvalues_visitor::visit_enter(ir_dereference_array *ir)
351 {
352 ir_hierarchical_visitor::visit_enter(ir);
353
354 if (stack.back().state == UNKNOWN)
355 stack.back().state = handle_precision(ir->type, ir->precision());
356
357 return visit_continue;
358 }
359
360 ir_visitor_status
361 find_lowerable_rvalues_visitor::visit_enter(ir_texture *ir)
362 {
363 ir_hierarchical_visitor::visit_enter(ir);
364
365 if (stack.back().state == UNKNOWN) {
366 /* The precision of the sample value depends on the precision of the
367 * sampler.
368 */
369 stack.back().state = handle_precision(ir->type,
370 ir->sampler->precision());
371 }
372
373 return visit_continue;
374 }
375
376 ir_visitor_status
377 find_lowerable_rvalues_visitor::visit_enter(ir_expression *ir)
378 {
379 ir_hierarchical_visitor::visit_enter(ir);
380
381 if (!can_lower_type(ir->type))
382 stack.back().state = CANT_LOWER;
383
384 /* Don't lower precision for derivative calculations */
385 if (ir->operation == ir_unop_dFdx ||
386 ir->operation == ir_unop_dFdx_coarse ||
387 ir->operation == ir_unop_dFdx_fine ||
388 ir->operation == ir_unop_dFdy ||
389 ir->operation == ir_unop_dFdy_coarse ||
390 ir->operation == ir_unop_dFdy_fine) {
391 stack.back().state = CANT_LOWER;
392 }
393
394 return visit_continue;
395 }
396
397 static bool
398 is_lowerable_builtin(ir_call *ir,
399 const struct set *lowerable_rvalues)
400 {
401 if (!ir->callee->is_builtin())
402 return false;
403
404 assert(ir->callee->return_precision == GLSL_PRECISION_NONE);
405
406 foreach_in_list(ir_rvalue, param, &ir->actual_parameters) {
407 if (!param->as_constant() &&
408 _mesa_set_search(lowerable_rvalues, param) == NULL)
409 return false;
410 }
411
412 return true;
413 }
414
415 ir_visitor_status
416 find_lowerable_rvalues_visitor::visit_leave(ir_call *ir)
417 {
418 ir_hierarchical_visitor::visit_leave(ir);
419
420 /* Special case for handling temporary variables generated by the compiler
421 * for function calls. If we assign to one of these using a function call
422 * that has a lowerable return type then we can assume the temporary
423 * variable should have a medium precision too.
424 */
425
426 /* Do nothing if the return type is void. */
427 if (!ir->return_deref)
428 return visit_continue;
429
430 ir_variable *var = ir->return_deref->variable_referenced();
431
432 assert(var->data.mode == ir_var_temporary);
433
434 unsigned return_precision = ir->callee->return_precision;
435
436 /* If the call is to a builtin, then the function won’t have a return
437 * precision and we should determine it from the precision of the arguments.
438 */
439 if (is_lowerable_builtin(ir, lowerable_rvalues))
440 return_precision = GLSL_PRECISION_MEDIUM;
441
442 can_lower_state lower_state =
443 handle_precision(var->type, return_precision);
444
445 if (lower_state == SHOULD_LOWER) {
446 /* There probably shouldn’t be any situations where multiple ir_call
447 * instructions write to the same temporary?
448 */
449 assert(var->data.precision == GLSL_PRECISION_NONE);
450 var->data.precision = GLSL_PRECISION_MEDIUM;
451 } else {
452 var->data.precision = GLSL_PRECISION_HIGH;
453 }
454
455 return visit_continue;
456 }
457
458 ir_visitor_status
459 find_lowerable_rvalues_visitor::visit_leave(ir_assignment *ir)
460 {
461 ir_hierarchical_visitor::visit_leave(ir);
462
463 /* Special case for handling temporary variables generated by the compiler.
464 * If we assign to one of these using a lowered precision then we can assume
465 * the temporary variable should have a medium precision too.
466 */
467 ir_variable *var = ir->lhs->variable_referenced();
468
469 if (var->data.mode == ir_var_temporary) {
470 if (_mesa_set_search(lowerable_rvalues, ir->rhs)) {
471 /* Only override the precision if this is the first assignment. For
472 * temporaries such as the ones generated for the ?: operator there
473 * can be multiple assignments with different precisions. This way we
474 * get the highest precision of all of the assignments.
475 */
476 if (var->data.precision == GLSL_PRECISION_NONE)
477 var->data.precision = GLSL_PRECISION_MEDIUM;
478 } else if (!ir->rhs->as_constant()) {
479 var->data.precision = GLSL_PRECISION_HIGH;
480 }
481 }
482
483 return visit_continue;
484 }
485
486 void
487 find_lowerable_rvalues(exec_list *instructions,
488 struct set *result)
489 {
490 find_lowerable_rvalues_visitor v(result);
491
492 visit_list_elements(&v, instructions);
493
494 assert(v.stack.empty());
495 }
496
497 static ir_rvalue *
498 convert_precision(int op, ir_rvalue *ir)
499 {
500 unsigned base_type = (op == ir_unop_f2fmp ?
501 GLSL_TYPE_FLOAT16 : GLSL_TYPE_FLOAT);
502 const glsl_type *desired_type;
503 desired_type = glsl_type::get_instance(base_type,
504 ir->type->vector_elements,
505 ir->type->matrix_columns);
506
507 void *mem_ctx = ralloc_parent(ir);
508 return new(mem_ctx) ir_expression(op, desired_type, ir, NULL);
509 }
510
511 void
512 lower_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
513 {
514 ir_rvalue *ir = *rvalue;
515
516 if (ir == NULL)
517 return;
518
519 if (ir->as_dereference()) {
520 if (!ir->type->is_boolean())
521 *rvalue = convert_precision(ir_unop_f2fmp, ir);
522 } else if (ir->type->is_float()) {
523 ir->type = glsl_type::get_instance(GLSL_TYPE_FLOAT16,
524 ir->type->vector_elements,
525 ir->type->matrix_columns,
526 ir->type->explicit_stride,
527 ir->type->interface_row_major);
528
529 ir_constant *const_ir = ir->as_constant();
530
531 if (const_ir) {
532 ir_constant_data value;
533
534 for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++)
535 value.f16[i] = _mesa_float_to_half(const_ir->value.f[i]);
536
537 const_ir->value = value;
538 }
539 }
540 }
541
542 ir_visitor_status
543 lower_precision_visitor::visit_enter(ir_dereference_record *ir)
544 {
545 /* We don’t want to lower the variable */
546 return visit_continue_with_parent;
547 }
548
549 ir_visitor_status
550 lower_precision_visitor::visit_enter(ir_dereference_array *ir)
551 {
552 /* We don’t want to convert the array index or the variable. If the array
553 * index itself is lowerable that will be handled separately.
554 */
555 return visit_continue_with_parent;
556 }
557
558 ir_visitor_status
559 lower_precision_visitor::visit_enter(ir_call *ir)
560 {
561 /* We don’t want to convert the arguments. These will be handled separately.
562 */
563 return visit_continue_with_parent;
564 }
565
566 ir_visitor_status
567 lower_precision_visitor::visit_enter(ir_texture *ir)
568 {
569 /* We don’t want to convert the arguments. These will be handled separately.
570 */
571 return visit_continue_with_parent;
572 }
573
574 ir_visitor_status
575 lower_precision_visitor::visit_leave(ir_expression *ir)
576 {
577 ir_rvalue_visitor::visit_leave(ir);
578
579 /* If the expression is a conversion operation to or from bool then fix the
580 * operation.
581 */
582 switch (ir->operation) {
583 case ir_unop_b2f:
584 ir->operation = ir_unop_b2f16;
585 break;
586 case ir_unop_f2b:
587 ir->operation = ir_unop_f162b;
588 break;
589 default:
590 break;
591 }
592
593 return visit_continue;
594 }
595
596 void
597 find_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
598 {
599 /* Checking the precision of rvalue can be lowered first throughout
600 * find_lowerable_rvalues_visitor.
601 * Once it found the precision of rvalue can be lowered, then we can
602 * add conversion f2fmp through lower_precision_visitor.
603 */
604 if (*rvalue == NULL)
605 return;
606
607 struct set_entry *entry = _mesa_set_search(lowerable_rvalues, *rvalue);
608
609 if (!entry)
610 return;
611
612 _mesa_set_remove(lowerable_rvalues, entry);
613
614 /* If the entire expression is just a variable dereference then trying to
615 * lower it will just directly add pointless to and from conversions without
616 * any actual operation in-between. Although these will eventually get
617 * optimised out, avoiding generating them here also avoids breaking inout
618 * parameters to functions.
619 */
620 if ((*rvalue)->as_dereference())
621 return;
622
623 lower_precision_visitor v;
624
625 (*rvalue)->accept(&v);
626 v.handle_rvalue(rvalue);
627
628 /* We don’t need to add the final conversion if the final type has been
629 * converted to bool
630 */
631 if ((*rvalue)->type->base_type != GLSL_TYPE_BOOL)
632 *rvalue = convert_precision(ir_unop_f162f, *rvalue);
633
634 progress = true;
635 }
636
637 ir_visitor_status
638 find_precision_visitor::visit_enter(ir_call *ir)
639 {
640 ir_rvalue_enter_visitor::visit_enter(ir);
641
642 /* If this is a call to a builtin and the find_lowerable_rvalues_visitor
643 * overrode the precision of the temporary return variable, then we can
644 * replace the builtin implementation with a lowered version.
645 */
646
647 if (!ir->callee->is_builtin() ||
648 ir->return_deref == NULL ||
649 ir->return_deref->variable_referenced()->data.precision !=
650 GLSL_PRECISION_MEDIUM)
651 return visit_continue;
652
653 ir->callee = map_builtin(ir->callee);
654 ir->generate_inline(ir);
655 ir->remove();
656
657 return visit_continue_with_parent;
658 }
659
660 ir_function_signature *
661 find_precision_visitor::map_builtin(ir_function_signature *sig)
662 {
663 if (lowered_builtins == NULL) {
664 lowered_builtins = _mesa_pointer_hash_table_create(NULL);
665 clone_ht =_mesa_pointer_hash_table_create(NULL);
666 lowered_builtin_mem_ctx = ralloc_context(NULL);
667 } else {
668 struct hash_entry *entry = _mesa_hash_table_search(lowered_builtins, sig);
669 if (entry)
670 return (ir_function_signature *) entry->data;
671 }
672
673 ir_function_signature *lowered_sig =
674 sig->clone(lowered_builtin_mem_ctx, clone_ht);
675
676 foreach_in_list(ir_variable, param, &lowered_sig->parameters) {
677 param->data.precision = GLSL_PRECISION_MEDIUM;
678 }
679
680 lower_precision(&lowered_sig->body);
681
682 _mesa_hash_table_clear(clone_ht, NULL);
683
684 _mesa_hash_table_insert(lowered_builtins, sig, lowered_sig);
685
686 return lowered_sig;
687 }
688
689 find_precision_visitor::find_precision_visitor()
690 : progress(false),
691 lowerable_rvalues(_mesa_pointer_set_create(NULL)),
692 lowered_builtins(NULL),
693 clone_ht(NULL),
694 lowered_builtin_mem_ctx(NULL)
695 {
696 }
697
698 find_precision_visitor::~find_precision_visitor()
699 {
700 _mesa_set_destroy(lowerable_rvalues, NULL);
701
702 if (lowered_builtins) {
703 _mesa_hash_table_destroy(lowered_builtins, NULL);
704 _mesa_hash_table_destroy(clone_ht, NULL);
705 ralloc_free(lowered_builtin_mem_ctx);
706 }
707 }
708
709 }
710
711 bool
712 lower_precision(exec_list *instructions)
713 {
714 find_precision_visitor v;
715
716 find_lowerable_rvalues(instructions, v.lowerable_rvalues);
717
718 visit_list_elements(&v, instructions);
719
720 return v.progress;
721 }