src/compiler/glsl/lower_precision.cpp

   1 /*
   2  * Copyright © 2019 Google, Inc
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 /**
  25  * \file lower_precision.cpp
  26  */
  27
  28 #include "main/macros.h"
  29 #include "main/mtypes.h"
  30 #include "compiler/glsl_types.h"
  31 #include "ir.h"
  32 #include "ir_builder.h"
  33 #include "ir_optimization.h"
  34 #include "ir_rvalue_visitor.h"
  35 #include "util/half_float.h"
  36 #include "util/set.h"
  37 #include "util/hash_table.h"
  38 #include <vector>
  39
  40 namespace {
  41
  42 class find_precision_visitor : public ir_rvalue_enter_visitor {
  43 public:
  44    find_precision_visitor(const struct gl_shader_compiler_options *options);
  45    ~find_precision_visitor();
  46
  47    virtual void handle_rvalue(ir_rvalue **rvalue);
  48    virtual ir_visitor_status visit_enter(ir_call *ir);
  49
  50    ir_function_signature *map_builtin(ir_function_signature *sig);
  51
  52    bool progress;
  53
  54    /* Set of rvalues that can be lowered. This will be filled in by
  55     * find_lowerable_rvalues_visitor. Only the root node of a lowerable section
  56     * will be added to this set.
  57     */
  58    struct set *lowerable_rvalues;
  59
  60    /**
  61     * A mapping of builtin signature functions to lowered versions. This is
  62     * filled in lazily when a lowered version is needed.
  63     */
  64    struct hash_table *lowered_builtins;
  65    /**
  66     * A temporary hash table only used in order to clone functions.
  67     */
  68    struct hash_table *clone_ht;
  69
  70    void *lowered_builtin_mem_ctx;
  71
  72    const struct gl_shader_compiler_options *options;
  73 };
  74
  75 class find_lowerable_rvalues_visitor : public ir_hierarchical_visitor {
  76 public:
  77    enum can_lower_state {
  78       UNKNOWN,
  79       CANT_LOWER,
  80       SHOULD_LOWER,
  81    };
  82
  83    enum parent_relation {
  84       /* The parent performs a further operation involving the result from the
  85        * child and can be lowered along with it.
  86        */
  87       COMBINED_OPERATION,
  88       /* The parent instruction’s operation is independent of the child type so
  89        * the child should be lowered separately.
  90        */
  91       INDEPENDENT_OPERATION,
  92    };
  93
  94    struct stack_entry {
  95       ir_instruction *instr;
  96       enum can_lower_state state;
  97       /* List of child rvalues that can be lowered. When this stack entry is
  98        * popped, if this node itself can’t be lowered than all of the children
  99        * are root nodes to lower so we will add them to lowerable_rvalues.
 100        * Otherwise if this node can also be lowered then we won’t add the
 101        * children because we only want to add the topmost lowerable nodes to
 102        * lowerable_rvalues and the children will be lowered as part of lowering
 103        * this node.
 104        */
 105       std::vector<ir_instruction *> lowerable_children;
 106    };
 107
 108    find_lowerable_rvalues_visitor(struct set *result,
 109                                   const struct gl_shader_compiler_options *options);
 110    bool can_lower_type(const glsl_type *type) const;
 111
 112    static void stack_enter(class ir_instruction *ir, void *data);
 113    static void stack_leave(class ir_instruction *ir, void *data);
 114
 115    virtual ir_visitor_status visit(ir_constant *ir);
 116    virtual ir_visitor_status visit(ir_dereference_variable *ir);
 117
 118    virtual ir_visitor_status visit_enter(ir_dereference_record *ir);
 119    virtual ir_visitor_status visit_enter(ir_dereference_array *ir);
 120    virtual ir_visitor_status visit_enter(ir_texture *ir);
 121    virtual ir_visitor_status visit_enter(ir_expression *ir);
 122
 123    virtual ir_visitor_status visit_leave(ir_assignment *ir);
 124    virtual ir_visitor_status visit_leave(ir_call *ir);
 125
 126    can_lower_state handle_precision(const glsl_type *type,
 127                                     int precision) const;
 128
 129    static parent_relation get_parent_relation(ir_instruction *parent,
 130                                               ir_instruction *child);
 131
 132    std::vector<stack_entry> stack;
 133    struct set *lowerable_rvalues;
 134    const struct gl_shader_compiler_options *options;
 135
 136    void pop_stack_entry();
 137    void add_lowerable_children(const stack_entry &entry);
 138 };
 139
 140 class lower_precision_visitor : public ir_rvalue_visitor {
 141 public:
 142    virtual void handle_rvalue(ir_rvalue **rvalue);
 143    virtual ir_visitor_status visit_enter(ir_dereference_array *);
 144    virtual ir_visitor_status visit_enter(ir_dereference_record *);
 145    virtual ir_visitor_status visit_enter(ir_call *ir);
 146    virtual ir_visitor_status visit_enter(ir_texture *ir);
 147    virtual ir_visitor_status visit_leave(ir_expression *);
 148 };
 149
 150 bool
 151 find_lowerable_rvalues_visitor::can_lower_type(const glsl_type *type) const
 152 {
 153    /* Don’t lower any expressions involving non-float types except bool and
 154     * texture samplers. This will rule out operations that change the type such
 155     * as conversion to ints. Instead it will end up lowering the arguments
 156     * instead and adding a final conversion to float32. We want to handle
 157     * boolean types so that it will do comparisons as 16-bit.
 158     */
 159
 160    switch (type->base_type) {
 161    /* TODO: should we do anything for these two with regard to Int16 vs FP16
 162     * support?
 163     */
 164    case GLSL_TYPE_BOOL:
 165    case GLSL_TYPE_SAMPLER:
 166    case GLSL_TYPE_IMAGE:
 167       return true;
 168
 169    case GLSL_TYPE_FLOAT:
 170       return options->LowerPrecisionFloat16;
 171
 172    case GLSL_TYPE_UINT:
 173    case GLSL_TYPE_INT:
 174       return options->LowerPrecisionInt16;
 175
 176    default:
 177       return false;
 178    }
 179 }
 180
 181 find_lowerable_rvalues_visitor::find_lowerable_rvalues_visitor(struct set *res,
 182                                  const struct gl_shader_compiler_options *opts)
 183 {
 184    lowerable_rvalues = res;
 185    options = opts;
 186    callback_enter = stack_enter;
 187    callback_leave = stack_leave;
 188    data_enter = this;
 189    data_leave = this;
 190 }
 191
 192 void
 193 find_lowerable_rvalues_visitor::stack_enter(class ir_instruction *ir,
 194                                             void *data)
 195 {
 196    find_lowerable_rvalues_visitor *state =
 197       (find_lowerable_rvalues_visitor *) data;
 198
 199    /* Add a new stack entry for this instruction */
 200    stack_entry entry;
 201
 202    entry.instr = ir;
 203    entry.state = state->in_assignee ? CANT_LOWER : UNKNOWN;
 204
 205    state->stack.push_back(entry);
 206 }
 207
 208 void
 209 find_lowerable_rvalues_visitor::add_lowerable_children(const stack_entry &entry)
 210 {
 211    /* We can’t lower this node so if there were any pending children then they
 212     * are all root lowerable nodes and we should add them to the set.
 213     */
 214    for (auto &it : entry.lowerable_children)
 215       _mesa_set_add(lowerable_rvalues, it);
 216 }
 217
 218 void
 219 find_lowerable_rvalues_visitor::pop_stack_entry()
 220 {
 221    const stack_entry &entry = stack.back();
 222
 223    if (stack.size() >= 2) {
 224       /* Combine this state into the parent state, unless the parent operation
 225        * doesn’t have any relation to the child operations
 226        */
 227       stack_entry &parent = stack.end()[-2];
 228       parent_relation rel = get_parent_relation(parent.instr, entry.instr);
 229
 230       if (rel == COMBINED_OPERATION) {
 231          switch (entry.state) {
 232          case CANT_LOWER:
 233             parent.state = CANT_LOWER;
 234             break;
 235          case SHOULD_LOWER:
 236             if (parent.state == UNKNOWN)
 237                parent.state = SHOULD_LOWER;
 238             break;
 239          case UNKNOWN:
 240             break;
 241          }
 242       }
 243    }
 244
 245    if (entry.state == SHOULD_LOWER) {
 246       ir_rvalue *rv = entry.instr->as_rvalue();
 247
 248       if (rv == NULL) {
 249          add_lowerable_children(entry);
 250       } else if (stack.size() >= 2) {
 251          stack_entry &parent = stack.end()[-2];
 252
 253          switch (get_parent_relation(parent.instr, rv)) {
 254          case COMBINED_OPERATION:
 255             /* We only want to add the toplevel lowerable instructions to the
 256              * lowerable set. Therefore if there is a parent then instead of
 257              * adding this instruction to the set we will queue depending on
 258              * the result of the parent instruction.
 259              */
 260             parent.lowerable_children.push_back(entry.instr);
 261             break;
 262          case INDEPENDENT_OPERATION:
 263             _mesa_set_add(lowerable_rvalues, rv);
 264             break;
 265          }
 266       } else {
 267          /* This is a toplevel node so add it directly to the lowerable
 268           * set.
 269           */
 270          _mesa_set_add(lowerable_rvalues, rv);
 271       }
 272    } else if (entry.state == CANT_LOWER) {
 273       add_lowerable_children(entry);
 274    }
 275
 276    stack.pop_back();
 277 }
 278
 279 void
 280 find_lowerable_rvalues_visitor::stack_leave(class ir_instruction *ir,
 281                                             void *data)
 282 {
 283    find_lowerable_rvalues_visitor *state =
 284       (find_lowerable_rvalues_visitor *) data;
 285
 286    state->pop_stack_entry();
 287 }
 288
 289 enum find_lowerable_rvalues_visitor::can_lower_state
 290 find_lowerable_rvalues_visitor::handle_precision(const glsl_type *type,
 291                                                  int precision) const
 292 {
 293    if (!can_lower_type(type))
 294       return CANT_LOWER;
 295
 296    switch (precision) {
 297    case GLSL_PRECISION_NONE:
 298       return UNKNOWN;
 299    case GLSL_PRECISION_HIGH:
 300       return CANT_LOWER;
 301    case GLSL_PRECISION_MEDIUM:
 302    case GLSL_PRECISION_LOW:
 303       return SHOULD_LOWER;
 304    }
 305
 306    return CANT_LOWER;
 307 }
 308
 309 enum find_lowerable_rvalues_visitor::parent_relation
 310 find_lowerable_rvalues_visitor::get_parent_relation(ir_instruction *parent,
 311                                                     ir_instruction *child)
 312 {
 313    /* If the parent is a dereference instruction then the only child could be
 314     * for example an array dereference and that should be lowered independently
 315     * of the parent.
 316     */
 317    if (parent->as_dereference())
 318       return INDEPENDENT_OPERATION;
 319
 320    /* The precision of texture sampling depend on the precision of the sampler.
 321     * The rest of the arguments don’t matter so we can treat it as an
 322     * independent operation.
 323     */
 324    if (parent->as_texture())
 325       return INDEPENDENT_OPERATION;
 326
 327    return COMBINED_OPERATION;
 328 }
 329
 330 ir_visitor_status
 331 find_lowerable_rvalues_visitor::visit(ir_constant *ir)
 332 {
 333    stack_enter(ir, this);
 334
 335    if (!can_lower_type(ir->type))
 336       stack.back().state = CANT_LOWER;
 337
 338    stack_leave(ir, this);
 339
 340    return visit_continue;
 341 }
 342
 343 ir_visitor_status
 344 find_lowerable_rvalues_visitor::visit(ir_dereference_variable *ir)
 345 {
 346    stack_enter(ir, this);
 347
 348    if (stack.back().state == UNKNOWN)
 349       stack.back().state = handle_precision(ir->type, ir->precision());
 350
 351    stack_leave(ir, this);
 352
 353    return visit_continue;
 354 }
 355
 356 ir_visitor_status
 357 find_lowerable_rvalues_visitor::visit_enter(ir_dereference_record *ir)
 358 {
 359    ir_hierarchical_visitor::visit_enter(ir);
 360
 361    if (stack.back().state == UNKNOWN)
 362       stack.back().state = handle_precision(ir->type, ir->precision());
 363
 364    return visit_continue;
 365 }
 366
 367 ir_visitor_status
 368 find_lowerable_rvalues_visitor::visit_enter(ir_dereference_array *ir)
 369 {
 370    ir_hierarchical_visitor::visit_enter(ir);
 371
 372    if (stack.back().state == UNKNOWN)
 373       stack.back().state = handle_precision(ir->type, ir->precision());
 374
 375    return visit_continue;
 376 }
 377
 378 ir_visitor_status
 379 find_lowerable_rvalues_visitor::visit_enter(ir_texture *ir)
 380 {
 381    ir_hierarchical_visitor::visit_enter(ir);
 382
 383    if (stack.back().state == UNKNOWN) {
 384       /* The precision of the sample value depends on the precision of the
 385        * sampler.
 386        */
 387       stack.back().state = handle_precision(ir->type,
 388                                             ir->sampler->precision());
 389    }
 390
 391    return visit_continue;
 392 }
 393
 394 ir_visitor_status
 395 find_lowerable_rvalues_visitor::visit_enter(ir_expression *ir)
 396 {
 397    ir_hierarchical_visitor::visit_enter(ir);
 398
 399    if (!can_lower_type(ir->type))
 400       stack.back().state = CANT_LOWER;
 401
 402    /* Don't lower precision for derivative calculations */
 403    if (!options->LowerPrecisionDerivatives &&
 404        (ir->operation == ir_unop_dFdx ||
 405         ir->operation == ir_unop_dFdx_coarse ||
 406         ir->operation == ir_unop_dFdx_fine ||
 407         ir->operation == ir_unop_dFdy ||
 408         ir->operation == ir_unop_dFdy_coarse ||
 409         ir->operation == ir_unop_dFdy_fine)) {
 410       stack.back().state = CANT_LOWER;
 411    }
 412
 413    return visit_continue;
 414 }
 415
 416 static bool
 417 is_lowerable_builtin(ir_call *ir,
 418                      const struct set *lowerable_rvalues)
 419 {
 420    /* The intrinsic call is inside the wrapper imageLoad function that will
 421     * be inlined. We have to handle both of them.
 422     */
 423    if (ir->callee->intrinsic_id == ir_intrinsic_image_load ||
 424        (ir->callee->is_builtin() &&
 425         !strcmp(ir->callee_name(), "imageLoad"))) {
 426       ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head();
 427       ir_variable *resource = param->variable_referenced();
 428
 429       assert(ir->callee->return_precision == GLSL_PRECISION_NONE);
 430       assert(resource->type->without_array()->is_image());
 431
 432       /* GLSL ES 3.20 requires that images have a precision modifier, but if
 433        * you set one, it doesn't do anything, because all intrinsics are
 434        * defined with highp. This seems to be a spec bug.
 435        *
 436        * In theory we could set the return value to mediump if the image
 437        * format has a lower precision. This appears to be the most sensible
 438        * thing to do.
 439        */
 440       const struct util_format_description *desc =
 441          util_format_description(resource->data.image_format);
 442       unsigned i =
 443          util_format_get_first_non_void_channel(resource->data.image_format);
 444
 445       if (desc->channel[i].pure_integer ||
 446           desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
 447          return desc->channel[i].size <= 16;
 448       else
 449          return desc->channel[i].size <= 10; /* unorm/snorm */
 450    }
 451
 452    if (!ir->callee->is_builtin())
 453       return false;
 454
 455    assert(ir->callee->return_precision == GLSL_PRECISION_NONE);
 456
 457    foreach_in_list(ir_rvalue, param, &ir->actual_parameters) {
 458       if (!param->as_constant() &&
 459           _mesa_set_search(lowerable_rvalues, param) == NULL)
 460          return false;
 461    }
 462
 463    return true;
 464 }
 465
 466 ir_visitor_status
 467 find_lowerable_rvalues_visitor::visit_leave(ir_call *ir)
 468 {
 469    ir_hierarchical_visitor::visit_leave(ir);
 470
 471    /* Special case for handling temporary variables generated by the compiler
 472     * for function calls. If we assign to one of these using a function call
 473     * that has a lowerable return type then we can assume the temporary
 474     * variable should have a medium precision too.
 475     */
 476
 477    /* Do nothing if the return type is void. */
 478    if (!ir->return_deref)
 479       return visit_continue;
 480
 481    ir_variable *var = ir->return_deref->variable_referenced();
 482
 483    assert(var->data.mode == ir_var_temporary);
 484
 485    unsigned return_precision = ir->callee->return_precision;
 486
 487    /* If the call is to a builtin, then the function won’t have a return
 488     * precision and we should determine it from the precision of the arguments.
 489     */
 490    if (is_lowerable_builtin(ir, lowerable_rvalues))
 491       return_precision = GLSL_PRECISION_MEDIUM;
 492
 493    can_lower_state lower_state =
 494       handle_precision(var->type, return_precision);
 495
 496    if (lower_state == SHOULD_LOWER) {
 497       /* There probably shouldn’t be any situations where multiple ir_call
 498        * instructions write to the same temporary?
 499        */
 500       assert(var->data.precision == GLSL_PRECISION_NONE);
 501       var->data.precision = GLSL_PRECISION_MEDIUM;
 502    } else {
 503       var->data.precision = GLSL_PRECISION_HIGH;
 504    }
 505
 506    return visit_continue;
 507 }
 508
 509 ir_visitor_status
 510 find_lowerable_rvalues_visitor::visit_leave(ir_assignment *ir)
 511 {
 512    ir_hierarchical_visitor::visit_leave(ir);
 513
 514    /* Special case for handling temporary variables generated by the compiler.
 515     * If we assign to one of these using a lowered precision then we can assume
 516     * the temporary variable should have a medium precision too.
 517     */
 518    ir_variable *var = ir->lhs->variable_referenced();
 519
 520    if (var->data.mode == ir_var_temporary) {
 521       if (_mesa_set_search(lowerable_rvalues, ir->rhs)) {
 522          /* Only override the precision if this is the first assignment. For
 523           * temporaries such as the ones generated for the ?: operator there
 524           * can be multiple assignments with different precisions. This way we
 525           * get the highest precision of all of the assignments.
 526           */
 527          if (var->data.precision == GLSL_PRECISION_NONE)
 528             var->data.precision = GLSL_PRECISION_MEDIUM;
 529       } else if (!ir->rhs->as_constant()) {
 530          var->data.precision = GLSL_PRECISION_HIGH;
 531       }
 532    }
 533
 534    return visit_continue;
 535 }
 536
 537 void
 538 find_lowerable_rvalues(const struct gl_shader_compiler_options *options,
 539                        exec_list *instructions,
 540                        struct set *result)
 541 {
 542    find_lowerable_rvalues_visitor v(result, options);
 543
 544    visit_list_elements(&v, instructions);
 545
 546    assert(v.stack.empty());
 547 }
 548
 549 static ir_rvalue *
 550 convert_precision(glsl_base_type type, bool up, ir_rvalue *ir)
 551 {
 552    unsigned new_type, op;
 553
 554    if (up) {
 555       switch (type) {
 556       case GLSL_TYPE_FLOAT16:
 557          new_type = GLSL_TYPE_FLOAT;
 558          op = ir_unop_f162f;
 559          break;
 560       case GLSL_TYPE_INT16:
 561          new_type = GLSL_TYPE_INT;
 562          op = ir_unop_i2i;
 563          break;
 564       case GLSL_TYPE_UINT16:
 565          new_type = GLSL_TYPE_UINT;
 566          op = ir_unop_u2u;
 567          break;
 568       default:
 569          unreachable("invalid type");
 570          return NULL;
 571       }
 572    } else {
 573       switch (type) {
 574       case GLSL_TYPE_FLOAT:
 575          new_type = GLSL_TYPE_FLOAT16;
 576          op = ir_unop_f2fmp;
 577          break;
 578       case GLSL_TYPE_INT:
 579          new_type = GLSL_TYPE_INT16;
 580          op = ir_unop_i2imp;
 581          break;
 582       case GLSL_TYPE_UINT:
 583          new_type = GLSL_TYPE_UINT16;
 584          op = ir_unop_u2ump;
 585          break;
 586       default:
 587          unreachable("invalid type");
 588          return NULL;
 589       }
 590    }
 591
 592    const glsl_type *desired_type;
 593    desired_type = glsl_type::get_instance(new_type,
 594                              ir->type->vector_elements,
 595                              ir->type->matrix_columns);
 596
 597    void *mem_ctx = ralloc_parent(ir);
 598    return new(mem_ctx) ir_expression(op, desired_type, ir, NULL);
 599 }
 600
 601 static glsl_base_type
 602 lower_type(glsl_base_type type)
 603 {
 604    switch (type) {
 605    case GLSL_TYPE_FLOAT:
 606       return GLSL_TYPE_FLOAT16;
 607    case GLSL_TYPE_INT:
 608       return GLSL_TYPE_INT16;
 609    case GLSL_TYPE_UINT:
 610       return GLSL_TYPE_UINT16;
 611    default:
 612       unreachable("invalid type");
 613       return GLSL_TYPE_ERROR;;
 614    }
 615 }
 616
 617 void
 618 lower_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
 619 {
 620    ir_rvalue *ir = *rvalue;
 621
 622    if (ir == NULL)
 623       return;
 624
 625    if (ir->as_dereference()) {
 626       if (!ir->type->is_boolean())
 627          *rvalue = convert_precision(ir->type->base_type, false, ir);
 628    } else if (ir->type->base_type == GLSL_TYPE_FLOAT ||
 629               ir->type->base_type == GLSL_TYPE_INT ||
 630               ir->type->base_type == GLSL_TYPE_UINT) {
 631       ir->type = glsl_type::get_instance(lower_type(ir->type->base_type),
 632                                          ir->type->vector_elements,
 633                                          ir->type->matrix_columns,
 634                                          ir->type->explicit_stride,
 635                                          ir->type->interface_row_major);
 636
 637       ir_constant *const_ir = ir->as_constant();
 638
 639       if (const_ir) {
 640          ir_constant_data value;
 641
 642          if (ir->type->base_type == GLSL_TYPE_FLOAT16) {
 643             for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++)
 644                value.f16[i] = _mesa_float_to_half(const_ir->value.f[i]);
 645          } else if (ir->type->base_type == GLSL_TYPE_INT16) {
 646             for (unsigned i = 0; i < ARRAY_SIZE(value.i16); i++)
 647                value.i16[i] = const_ir->value.i[i];
 648          } else if (ir->type->base_type == GLSL_TYPE_UINT16) {
 649             for (unsigned i = 0; i < ARRAY_SIZE(value.u16); i++)
 650                value.u16[i] = const_ir->value.u[i];
 651          } else {
 652             unreachable("invalid type");
 653          }
 654
 655          const_ir->value = value;
 656       }
 657    }
 658 }
 659
 660 ir_visitor_status
 661 lower_precision_visitor::visit_enter(ir_dereference_record *ir)
 662 {
 663    /* We don’t want to lower the variable */
 664    return visit_continue_with_parent;
 665 }
 666
 667 ir_visitor_status
 668 lower_precision_visitor::visit_enter(ir_dereference_array *ir)
 669 {
 670    /* We don’t want to convert the array index or the variable. If the array
 671     * index itself is lowerable that will be handled separately.
 672     */
 673    return visit_continue_with_parent;
 674 }
 675
 676 ir_visitor_status
 677 lower_precision_visitor::visit_enter(ir_call *ir)
 678 {
 679    /* We don’t want to convert the arguments. These will be handled separately.
 680     */
 681    return visit_continue_with_parent;
 682 }
 683
 684 ir_visitor_status
 685 lower_precision_visitor::visit_enter(ir_texture *ir)
 686 {
 687    /* We don’t want to convert the arguments. These will be handled separately.
 688     */
 689    return visit_continue_with_parent;
 690 }
 691
 692 ir_visitor_status
 693 lower_precision_visitor::visit_leave(ir_expression *ir)
 694 {
 695    ir_rvalue_visitor::visit_leave(ir);
 696
 697    /* If the expression is a conversion operation to or from bool then fix the
 698     * operation.
 699     */
 700    switch (ir->operation) {
 701    case ir_unop_b2f:
 702       ir->operation = ir_unop_b2f16;
 703       break;
 704    case ir_unop_f2b:
 705       ir->operation = ir_unop_f162b;
 706       break;
 707    case ir_unop_b2i:
 708    case ir_unop_i2b:
 709       /* Nothing to do - they both support int16. */
 710       break;
 711    default:
 712       break;
 713    }
 714
 715    return visit_continue;
 716 }
 717
 718 void
 719 find_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
 720 {
 721    /* Checking the precision of rvalue can be lowered first throughout
 722     * find_lowerable_rvalues_visitor.
 723     * Once it found the precision of rvalue can be lowered, then we can
 724     * add conversion f2fmp, etc. through lower_precision_visitor.
 725     */
 726    if (*rvalue == NULL)
 727       return;
 728
 729    struct set_entry *entry = _mesa_set_search(lowerable_rvalues, *rvalue);
 730
 731    if (!entry)
 732       return;
 733
 734    _mesa_set_remove(lowerable_rvalues, entry);
 735
 736    /* If the entire expression is just a variable dereference then trying to
 737     * lower it will just directly add pointless to and from conversions without
 738     * any actual operation in-between. Although these will eventually get
 739     * optimised out, avoiding generating them here also avoids breaking inout
 740     * parameters to functions.
 741     */
 742    if ((*rvalue)->as_dereference())
 743       return;
 744
 745    lower_precision_visitor v;
 746
 747    (*rvalue)->accept(&v);
 748    v.handle_rvalue(rvalue);
 749
 750    /* We don’t need to add the final conversion if the final type has been
 751     * converted to bool
 752     */
 753    if ((*rvalue)->type->base_type != GLSL_TYPE_BOOL)
 754       *rvalue = convert_precision((*rvalue)->type->base_type, true, *rvalue);
 755
 756    progress = true;
 757 }
 758
 759 ir_visitor_status
 760 find_precision_visitor::visit_enter(ir_call *ir)
 761 {
 762    ir_rvalue_enter_visitor::visit_enter(ir);
 763
 764    ir_variable *return_var =
 765       ir->return_deref ? ir->return_deref->variable_referenced() : NULL;
 766
 767    /* Don't do anything for image_load here. We have only changed the return
 768     * value to mediump/lowp, so that following instructions can use reduced
 769     * precision.
 770     *
 771     * The return value type of the intrinsic itself isn't changed here, but
 772     * can be changed in NIR if all users use the *2*mp opcode.
 773     */
 774    if (ir->callee->intrinsic_id == ir_intrinsic_image_load)
 775       return visit_continue;
 776
 777    /* If this is a call to a builtin and the find_lowerable_rvalues_visitor
 778     * overrode the precision of the temporary return variable, then we can
 779     * replace the builtin implementation with a lowered version.
 780     */
 781
 782    if (!ir->callee->is_builtin() ||
 783        return_var == NULL ||
 784        (return_var->data.precision != GLSL_PRECISION_MEDIUM &&
 785         return_var->data.precision != GLSL_PRECISION_LOW))
 786       return visit_continue;
 787
 788    ir->callee = map_builtin(ir->callee);
 789    ir->generate_inline(ir);
 790    ir->remove();
 791
 792    return visit_continue_with_parent;
 793 }
 794
 795 ir_function_signature *
 796 find_precision_visitor::map_builtin(ir_function_signature *sig)
 797 {
 798    if (lowered_builtins == NULL) {
 799       lowered_builtins = _mesa_pointer_hash_table_create(NULL);
 800       clone_ht =_mesa_pointer_hash_table_create(NULL);
 801       lowered_builtin_mem_ctx = ralloc_context(NULL);
 802    } else {
 803       struct hash_entry *entry = _mesa_hash_table_search(lowered_builtins, sig);
 804       if (entry)
 805          return (ir_function_signature *) entry->data;
 806    }
 807
 808    ir_function_signature *lowered_sig =
 809       sig->clone(lowered_builtin_mem_ctx, clone_ht);
 810
 811    foreach_in_list(ir_variable, param, &lowered_sig->parameters) {
 812       param->data.precision = GLSL_PRECISION_MEDIUM;
 813    }
 814
 815    lower_precision(options, &lowered_sig->body);
 816
 817    _mesa_hash_table_clear(clone_ht, NULL);
 818
 819    _mesa_hash_table_insert(lowered_builtins, sig, lowered_sig);
 820
 821    return lowered_sig;
 822 }
 823
 824 find_precision_visitor::find_precision_visitor(const struct gl_shader_compiler_options *options)
 825    : progress(false),
 826      lowerable_rvalues(_mesa_pointer_set_create(NULL)),
 827      lowered_builtins(NULL),
 828      clone_ht(NULL),
 829      lowered_builtin_mem_ctx(NULL),
 830      options(options)
 831 {
 832 }
 833
 834 find_precision_visitor::~find_precision_visitor()
 835 {
 836    _mesa_set_destroy(lowerable_rvalues, NULL);
 837
 838    if (lowered_builtins) {
 839       _mesa_hash_table_destroy(lowered_builtins, NULL);
 840       _mesa_hash_table_destroy(clone_ht, NULL);
 841       ralloc_free(lowered_builtin_mem_ctx);
 842    }
 843 }
 844
 845 }
 846
 847 bool
 848 lower_precision(const struct gl_shader_compiler_options *options,
 849                 exec_list *instructions)
 850 {
 851    find_precision_visitor v(options);
 852
 853    find_lowerable_rvalues(options, instructions, v.lowerable_rvalues);
 854
 855    visit_list_elements(&v, instructions);
 856
 857    return v.progress;
 858 }