src/compiler/glsl/lower_precision.cpp

   1 /*
   2  * Copyright © 2019 Google, Inc
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 /**
  25  * \file lower_precision.cpp
  26  */
  27
  28 #include "main/macros.h"
  29 #include "main/mtypes.h"
  30 #include "compiler/glsl_types.h"
  31 #include "ir.h"
  32 #include "ir_builder.h"
  33 #include "ir_optimization.h"
  34 #include "ir_rvalue_visitor.h"
  35 #include "util/half_float.h"
  36 #include "util/set.h"
  37 #include "util/hash_table.h"
  38 #include <vector>
  39
  40 namespace {
  41
  42 class find_precision_visitor : public ir_rvalue_enter_visitor {
  43 public:
  44    find_precision_visitor(const struct gl_shader_compiler_options *options);
  45    ~find_precision_visitor();
  46
  47    virtual void handle_rvalue(ir_rvalue **rvalue);
  48    virtual ir_visitor_status visit_enter(ir_call *ir);
  49
  50    ir_function_signature *map_builtin(ir_function_signature *sig);
  51
  52    /* Set of rvalues that can be lowered. This will be filled in by
  53     * find_lowerable_rvalues_visitor. Only the root node of a lowerable section
  54     * will be added to this set.
  55     */
  56    struct set *lowerable_rvalues;
  57
  58    /**
  59     * A mapping of builtin signature functions to lowered versions. This is
  60     * filled in lazily when a lowered version is needed.
  61     */
  62    struct hash_table *lowered_builtins;
  63    /**
  64     * A temporary hash table only used in order to clone functions.
  65     */
  66    struct hash_table *clone_ht;
  67
  68    void *lowered_builtin_mem_ctx;
  69
  70    const struct gl_shader_compiler_options *options;
  71 };
  72
  73 class find_lowerable_rvalues_visitor : public ir_hierarchical_visitor {
  74 public:
  75    enum can_lower_state {
  76       UNKNOWN,
  77       CANT_LOWER,
  78       SHOULD_LOWER,
  79    };
  80
  81    enum parent_relation {
  82       /* The parent performs a further operation involving the result from the
  83        * child and can be lowered along with it.
  84        */
  85       COMBINED_OPERATION,
  86       /* The parent instruction’s operation is independent of the child type so
  87        * the child should be lowered separately.
  88        */
  89       INDEPENDENT_OPERATION,
  90    };
  91
  92    struct stack_entry {
  93       ir_instruction *instr;
  94       enum can_lower_state state;
  95       /* List of child rvalues that can be lowered. When this stack entry is
  96        * popped, if this node itself can’t be lowered than all of the children
  97        * are root nodes to lower so we will add them to lowerable_rvalues.
  98        * Otherwise if this node can also be lowered then we won’t add the
  99        * children because we only want to add the topmost lowerable nodes to
 100        * lowerable_rvalues and the children will be lowered as part of lowering
 101        * this node.
 102        */
 103       std::vector<ir_instruction *> lowerable_children;
 104    };
 105
 106    find_lowerable_rvalues_visitor(struct set *result,
 107                                   const struct gl_shader_compiler_options *options);
 108
 109    static void stack_enter(class ir_instruction *ir, void *data);
 110    static void stack_leave(class ir_instruction *ir, void *data);
 111
 112    virtual ir_visitor_status visit(ir_constant *ir);
 113    virtual ir_visitor_status visit(ir_dereference_variable *ir);
 114
 115    virtual ir_visitor_status visit_enter(ir_dereference_record *ir);
 116    virtual ir_visitor_status visit_enter(ir_dereference_array *ir);
 117    virtual ir_visitor_status visit_enter(ir_texture *ir);
 118    virtual ir_visitor_status visit_enter(ir_expression *ir);
 119
 120    virtual ir_visitor_status visit_leave(ir_assignment *ir);
 121    virtual ir_visitor_status visit_leave(ir_call *ir);
 122
 123    can_lower_state handle_precision(const glsl_type *type,
 124                                     int precision) const;
 125
 126    static parent_relation get_parent_relation(ir_instruction *parent,
 127                                               ir_instruction *child);
 128
 129    std::vector<stack_entry> stack;
 130    struct set *lowerable_rvalues;
 131    const struct gl_shader_compiler_options *options;
 132
 133    void pop_stack_entry();
 134    void add_lowerable_children(const stack_entry &entry);
 135 };
 136
 137 class lower_precision_visitor : public ir_rvalue_visitor {
 138 public:
 139    virtual void handle_rvalue(ir_rvalue **rvalue);
 140    virtual ir_visitor_status visit_enter(ir_dereference_array *);
 141    virtual ir_visitor_status visit_enter(ir_dereference_record *);
 142    virtual ir_visitor_status visit_enter(ir_call *ir);
 143    virtual ir_visitor_status visit_enter(ir_texture *ir);
 144    virtual ir_visitor_status visit_leave(ir_expression *);
 145 };
 146
 147 static bool
 148 can_lower_type(const struct gl_shader_compiler_options *options,
 149                const glsl_type *type)
 150 {
 151    /* Don’t lower any expressions involving non-float types except bool and
 152     * texture samplers. This will rule out operations that change the type such
 153     * as conversion to ints. Instead it will end up lowering the arguments
 154     * instead and adding a final conversion to float32. We want to handle
 155     * boolean types so that it will do comparisons as 16-bit.
 156     */
 157
 158    switch (type->without_array()->base_type) {
 159    /* TODO: should we do anything for these two with regard to Int16 vs FP16
 160     * support?
 161     */
 162    case GLSL_TYPE_BOOL:
 163    case GLSL_TYPE_SAMPLER:
 164    case GLSL_TYPE_IMAGE:
 165       return true;
 166
 167    case GLSL_TYPE_FLOAT:
 168       return options->LowerPrecisionFloat16;
 169
 170    case GLSL_TYPE_UINT:
 171    case GLSL_TYPE_INT:
 172       return options->LowerPrecisionInt16;
 173
 174    default:
 175       return false;
 176    }
 177 }
 178
 179 find_lowerable_rvalues_visitor::find_lowerable_rvalues_visitor(struct set *res,
 180                                  const struct gl_shader_compiler_options *opts)
 181 {
 182    lowerable_rvalues = res;
 183    options = opts;
 184    callback_enter = stack_enter;
 185    callback_leave = stack_leave;
 186    data_enter = this;
 187    data_leave = this;
 188 }
 189
 190 void
 191 find_lowerable_rvalues_visitor::stack_enter(class ir_instruction *ir,
 192                                             void *data)
 193 {
 194    find_lowerable_rvalues_visitor *state =
 195       (find_lowerable_rvalues_visitor *) data;
 196
 197    /* Add a new stack entry for this instruction */
 198    stack_entry entry;
 199
 200    entry.instr = ir;
 201    entry.state = state->in_assignee ? CANT_LOWER : UNKNOWN;
 202
 203    state->stack.push_back(entry);
 204 }
 205
 206 void
 207 find_lowerable_rvalues_visitor::add_lowerable_children(const stack_entry &entry)
 208 {
 209    /* We can’t lower this node so if there were any pending children then they
 210     * are all root lowerable nodes and we should add them to the set.
 211     */
 212    for (auto &it : entry.lowerable_children)
 213       _mesa_set_add(lowerable_rvalues, it);
 214 }
 215
 216 void
 217 find_lowerable_rvalues_visitor::pop_stack_entry()
 218 {
 219    const stack_entry &entry = stack.back();
 220
 221    if (stack.size() >= 2) {
 222       /* Combine this state into the parent state, unless the parent operation
 223        * doesn’t have any relation to the child operations
 224        */
 225       stack_entry &parent = stack.end()[-2];
 226       parent_relation rel = get_parent_relation(parent.instr, entry.instr);
 227
 228       if (rel == COMBINED_OPERATION) {
 229          switch (entry.state) {
 230          case CANT_LOWER:
 231             parent.state = CANT_LOWER;
 232             break;
 233          case SHOULD_LOWER:
 234             if (parent.state == UNKNOWN)
 235                parent.state = SHOULD_LOWER;
 236             break;
 237          case UNKNOWN:
 238             break;
 239          }
 240       }
 241    }
 242
 243    if (entry.state == SHOULD_LOWER) {
 244       ir_rvalue *rv = entry.instr->as_rvalue();
 245
 246       if (rv == NULL) {
 247          add_lowerable_children(entry);
 248       } else if (stack.size() >= 2) {
 249          stack_entry &parent = stack.end()[-2];
 250
 251          switch (get_parent_relation(parent.instr, rv)) {
 252          case COMBINED_OPERATION:
 253             /* We only want to add the toplevel lowerable instructions to the
 254              * lowerable set. Therefore if there is a parent then instead of
 255              * adding this instruction to the set we will queue depending on
 256              * the result of the parent instruction.
 257              */
 258             parent.lowerable_children.push_back(entry.instr);
 259             break;
 260          case INDEPENDENT_OPERATION:
 261             _mesa_set_add(lowerable_rvalues, rv);
 262             break;
 263          }
 264       } else {
 265          /* This is a toplevel node so add it directly to the lowerable
 266           * set.
 267           */
 268          _mesa_set_add(lowerable_rvalues, rv);
 269       }
 270    } else if (entry.state == CANT_LOWER) {
 271       add_lowerable_children(entry);
 272    }
 273
 274    stack.pop_back();
 275 }
 276
 277 void
 278 find_lowerable_rvalues_visitor::stack_leave(class ir_instruction *ir,
 279                                             void *data)
 280 {
 281    find_lowerable_rvalues_visitor *state =
 282       (find_lowerable_rvalues_visitor *) data;
 283
 284    state->pop_stack_entry();
 285 }
 286
 287 enum find_lowerable_rvalues_visitor::can_lower_state
 288 find_lowerable_rvalues_visitor::handle_precision(const glsl_type *type,
 289                                                  int precision) const
 290 {
 291    if (!can_lower_type(options, type))
 292       return CANT_LOWER;
 293
 294    switch (precision) {
 295    case GLSL_PRECISION_NONE:
 296       return UNKNOWN;
 297    case GLSL_PRECISION_HIGH:
 298       return CANT_LOWER;
 299    case GLSL_PRECISION_MEDIUM:
 300    case GLSL_PRECISION_LOW:
 301       return SHOULD_LOWER;
 302    }
 303
 304    return CANT_LOWER;
 305 }
 306
 307 enum find_lowerable_rvalues_visitor::parent_relation
 308 find_lowerable_rvalues_visitor::get_parent_relation(ir_instruction *parent,
 309                                                     ir_instruction *child)
 310 {
 311    /* If the parent is a dereference instruction then the only child could be
 312     * for example an array dereference and that should be lowered independently
 313     * of the parent.
 314     */
 315    if (parent->as_dereference())
 316       return INDEPENDENT_OPERATION;
 317
 318    /* The precision of texture sampling depend on the precision of the sampler.
 319     * The rest of the arguments don’t matter so we can treat it as an
 320     * independent operation.
 321     */
 322    if (parent->as_texture())
 323       return INDEPENDENT_OPERATION;
 324
 325    return COMBINED_OPERATION;
 326 }
 327
 328 ir_visitor_status
 329 find_lowerable_rvalues_visitor::visit(ir_constant *ir)
 330 {
 331    stack_enter(ir, this);
 332
 333    if (!can_lower_type(options, ir->type))
 334       stack.back().state = CANT_LOWER;
 335
 336    stack_leave(ir, this);
 337
 338    return visit_continue;
 339 }
 340
 341 ir_visitor_status
 342 find_lowerable_rvalues_visitor::visit(ir_dereference_variable *ir)
 343 {
 344    stack_enter(ir, this);
 345
 346    if (stack.back().state == UNKNOWN)
 347       stack.back().state = handle_precision(ir->type, ir->precision());
 348
 349    stack_leave(ir, this);
 350
 351    return visit_continue;
 352 }
 353
 354 ir_visitor_status
 355 find_lowerable_rvalues_visitor::visit_enter(ir_dereference_record *ir)
 356 {
 357    ir_hierarchical_visitor::visit_enter(ir);
 358
 359    if (stack.back().state == UNKNOWN)
 360       stack.back().state = handle_precision(ir->type, ir->precision());
 361
 362    return visit_continue;
 363 }
 364
 365 ir_visitor_status
 366 find_lowerable_rvalues_visitor::visit_enter(ir_dereference_array *ir)
 367 {
 368    ir_hierarchical_visitor::visit_enter(ir);
 369
 370    if (stack.back().state == UNKNOWN)
 371       stack.back().state = handle_precision(ir->type, ir->precision());
 372
 373    return visit_continue;
 374 }
 375
 376 ir_visitor_status
 377 find_lowerable_rvalues_visitor::visit_enter(ir_texture *ir)
 378 {
 379    ir_hierarchical_visitor::visit_enter(ir);
 380
 381    /* The precision of the sample value depends on the precision of the
 382     * sampler.
 383     */
 384    stack.back().state = handle_precision(ir->type,
 385                                          ir->sampler->precision());
 386    return visit_continue;
 387 }
 388
 389 ir_visitor_status
 390 find_lowerable_rvalues_visitor::visit_enter(ir_expression *ir)
 391 {
 392    ir_hierarchical_visitor::visit_enter(ir);
 393
 394    if (!can_lower_type(options, ir->type))
 395       stack.back().state = CANT_LOWER;
 396
 397    /* Don't lower precision for derivative calculations */
 398    if (!options->LowerPrecisionDerivatives &&
 399        (ir->operation == ir_unop_dFdx ||
 400         ir->operation == ir_unop_dFdx_coarse ||
 401         ir->operation == ir_unop_dFdx_fine ||
 402         ir->operation == ir_unop_dFdy ||
 403         ir->operation == ir_unop_dFdy_coarse ||
 404         ir->operation == ir_unop_dFdy_fine)) {
 405       stack.back().state = CANT_LOWER;
 406    }
 407
 408    return visit_continue;
 409 }
 410
 411 static bool
 412 function_always_returns_mediump_or_lowp(const char *name)
 413 {
 414    return !strcmp(name, "bitCount") ||
 415           !strcmp(name, "findLSB") ||
 416           !strcmp(name, "findMSB") ||
 417           !strcmp(name, "unpackHalf2x16") ||
 418           !strcmp(name, "unpackUnorm4x8") ||
 419           !strcmp(name, "unpackSnorm4x8");
 420 }
 421
 422 static bool
 423 is_lowerable_builtin(ir_call *ir,
 424                      const struct set *lowerable_rvalues)
 425 {
 426    /* The intrinsic call is inside the wrapper imageLoad function that will
 427     * be inlined. We have to handle both of them.
 428     */
 429    if (ir->callee->intrinsic_id == ir_intrinsic_image_load ||
 430        (ir->callee->is_builtin() &&
 431         !strcmp(ir->callee_name(), "imageLoad"))) {
 432       ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head();
 433       ir_variable *resource = param->variable_referenced();
 434
 435       assert(ir->callee->return_precision == GLSL_PRECISION_NONE);
 436       assert(resource->type->without_array()->is_image());
 437
 438       /* GLSL ES 3.20 requires that images have a precision modifier, but if
 439        * you set one, it doesn't do anything, because all intrinsics are
 440        * defined with highp. This seems to be a spec bug.
 441        *
 442        * In theory we could set the return value to mediump if the image
 443        * format has a lower precision. This appears to be the most sensible
 444        * thing to do.
 445        */
 446       const struct util_format_description *desc =
 447          util_format_description(resource->data.image_format);
 448       int i =
 449          util_format_get_first_non_void_channel(resource->data.image_format);
 450       assert(i >= 0);
 451
 452       if (desc->channel[i].pure_integer ||
 453           desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
 454          return desc->channel[i].size <= 16;
 455       else
 456          return desc->channel[i].size <= 10; /* unorm/snorm */
 457    }
 458
 459    /* Handle special calls. */
 460    if (ir->callee->is_builtin() && ir->actual_parameters.length()) {
 461       ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head();
 462       ir_variable *var = param->variable_referenced();
 463
 464       /* Handle builtin wrappers around ir_texture opcodes. These wrappers will
 465        * be inlined by lower_precision() if we return true here, so that we can
 466        * get to ir_texture later and do proper lowering.
 467        *
 468        * We should lower the type of the return value if the sampler type
 469        * uses lower precision. The function parameters don't matter.
 470        */
 471       if (var && var->type->without_array()->is_sampler()) {
 472          /* textureSize always returns highp. */
 473          if (!strcmp(ir->callee_name(), "textureSize"))
 474             return false;
 475
 476          return var->data.precision == GLSL_PRECISION_MEDIUM ||
 477                 var->data.precision == GLSL_PRECISION_LOW;
 478       }
 479    }
 480
 481    if (!ir->callee->is_builtin() ||
 482        /* Parameters are always highp: */
 483        !strcmp(ir->callee_name(), "floatBitsToInt") ||
 484        !strcmp(ir->callee_name(), "floatBitsToUint") ||
 485        !strcmp(ir->callee_name(), "intBitsToFloat") ||
 486        !strcmp(ir->callee_name(), "uintBitsToFloat") ||
 487        !strcmp(ir->callee_name(), "bitfieldReverse") ||
 488        !strcmp(ir->callee_name(), "frexp") ||
 489        !strcmp(ir->callee_name(), "ldexp") ||
 490        /* Parameters and outputs are always highp: */
 491        /* TODO: The operations are highp, but carry and borrow outputs are lowp. */
 492        !strcmp(ir->callee_name(), "uaddCarry") ||
 493        !strcmp(ir->callee_name(), "usubBorrow") ||
 494        !strcmp(ir->callee_name(), "imulExtended") ||
 495        !strcmp(ir->callee_name(), "umulExtended") ||
 496        !strcmp(ir->callee_name(), "unpackUnorm2x16") ||
 497        !strcmp(ir->callee_name(), "unpackSnorm2x16") ||
 498        /* Outputs are highp: */
 499        !strcmp(ir->callee_name(), "packUnorm2x16") ||
 500        !strcmp(ir->callee_name(), "packSnorm2x16") ||
 501        /* Parameters are mediump and outputs are highp. The parameters should
 502         * be optimized in NIR, not here, e.g:
 503         * - packHalf2x16 can just be a bitcast from f16vec2 to uint32
 504         * - Other opcodes don't have to convert parameters to highp if the hw
 505         *   has f16 versions. Optimize in NIR accordingly.
 506         */
 507        !strcmp(ir->callee_name(), "packHalf2x16") ||
 508        !strcmp(ir->callee_name(), "packUnorm4x8") ||
 509        !strcmp(ir->callee_name(), "packSnorm4x8") ||
 510        /* Atomic functions are not lowered. */
 511        strstr(ir->callee_name(), "atomic") == ir->callee_name())
 512       return false;
 513
 514    assert(ir->callee->return_precision == GLSL_PRECISION_NONE);
 515
 516    /* Number of parameters to check if they are lowerable. */
 517    unsigned check_parameters = ir->actual_parameters.length();
 518
 519    /* Interpolation functions only consider the precision of the interpolant. */
 520    /* Bitfield functions ignore the precision of "offset" and "bits". */
 521    if (!strcmp(ir->callee_name(), "interpolateAtOffset") ||
 522        !strcmp(ir->callee_name(), "interpolateAtSample") ||
 523        !strcmp(ir->callee_name(), "bitfieldExtract")) {
 524       check_parameters = 1;
 525    } else if (!strcmp(ir->callee_name(), "bitfieldInsert")) {
 526       check_parameters = 2;
 527    } if (function_always_returns_mediump_or_lowp(ir->callee_name())) {
 528       /* These only lower the return value. Parameters keep their precision,
 529        * which is preserved in map_builtin.
 530        */
 531       check_parameters = 0;
 532    }
 533
 534    foreach_in_list(ir_rvalue, param, &ir->actual_parameters) {
 535       if (!check_parameters)
 536          break;
 537
 538       if (!param->as_constant() &&
 539           _mesa_set_search(lowerable_rvalues, param) == NULL)
 540          return false;
 541
 542       --check_parameters;
 543    }
 544
 545    return true;
 546 }
 547
 548 ir_visitor_status
 549 find_lowerable_rvalues_visitor::visit_leave(ir_call *ir)
 550 {
 551    ir_hierarchical_visitor::visit_leave(ir);
 552
 553    /* Special case for handling temporary variables generated by the compiler
 554     * for function calls. If we assign to one of these using a function call
 555     * that has a lowerable return type then we can assume the temporary
 556     * variable should have a medium precision too.
 557     */
 558
 559    /* Do nothing if the return type is void. */
 560    if (!ir->return_deref)
 561       return visit_continue;
 562
 563    ir_variable *var = ir->return_deref->variable_referenced();
 564
 565    assert(var->data.mode == ir_var_temporary);
 566
 567    unsigned return_precision = ir->callee->return_precision;
 568
 569    /* If the call is to a builtin, then the function won’t have a return
 570     * precision and we should determine it from the precision of the arguments.
 571     */
 572    if (is_lowerable_builtin(ir, lowerable_rvalues))
 573       return_precision = GLSL_PRECISION_MEDIUM;
 574
 575    can_lower_state lower_state =
 576       handle_precision(var->type, return_precision);
 577
 578    if (lower_state == SHOULD_LOWER) {
 579       /* There probably shouldn’t be any situations where multiple ir_call
 580        * instructions write to the same temporary?
 581        */
 582       assert(var->data.precision == GLSL_PRECISION_NONE);
 583       var->data.precision = GLSL_PRECISION_MEDIUM;
 584    } else {
 585       var->data.precision = GLSL_PRECISION_HIGH;
 586    }
 587
 588    return visit_continue;
 589 }
 590
 591 ir_visitor_status
 592 find_lowerable_rvalues_visitor::visit_leave(ir_assignment *ir)
 593 {
 594    ir_hierarchical_visitor::visit_leave(ir);
 595
 596    /* Special case for handling temporary variables generated by the compiler.
 597     * If we assign to one of these using a lowered precision then we can assume
 598     * the temporary variable should have a medium precision too.
 599     */
 600    ir_variable *var = ir->lhs->variable_referenced();
 601
 602    if (var->data.mode == ir_var_temporary) {
 603       if (_mesa_set_search(lowerable_rvalues, ir->rhs)) {
 604          /* Only override the precision if this is the first assignment. For
 605           * temporaries such as the ones generated for the ?: operator there
 606           * can be multiple assignments with different precisions. This way we
 607           * get the highest precision of all of the assignments.
 608           */
 609          if (var->data.precision == GLSL_PRECISION_NONE)
 610             var->data.precision = GLSL_PRECISION_MEDIUM;
 611       } else if (!ir->rhs->as_constant()) {
 612          var->data.precision = GLSL_PRECISION_HIGH;
 613       }
 614    }
 615
 616    return visit_continue;
 617 }
 618
 619 void
 620 find_lowerable_rvalues(const struct gl_shader_compiler_options *options,
 621                        exec_list *instructions,
 622                        struct set *result)
 623 {
 624    find_lowerable_rvalues_visitor v(result, options);
 625
 626    visit_list_elements(&v, instructions);
 627
 628    assert(v.stack.empty());
 629 }
 630
 631 static const glsl_type *
 632 convert_type(bool up, const glsl_type *type)
 633 {
 634    if (type->is_array()) {
 635       return glsl_type::get_array_instance(convert_type(up, type->fields.array),
 636                                            type->array_size(),
 637                                            type->explicit_stride);
 638    }
 639
 640    glsl_base_type new_base_type;
 641
 642    if (up) {
 643       switch (type->base_type) {
 644       case GLSL_TYPE_FLOAT16:
 645          new_base_type = GLSL_TYPE_FLOAT;
 646          break;
 647       case GLSL_TYPE_INT16:
 648          new_base_type = GLSL_TYPE_INT;
 649          break;
 650       case GLSL_TYPE_UINT16:
 651          new_base_type = GLSL_TYPE_UINT;
 652          break;
 653       default:
 654          unreachable("invalid type");
 655          return NULL;
 656       }
 657    } else {
 658       switch (type->base_type) {
 659       case GLSL_TYPE_FLOAT:
 660          new_base_type = GLSL_TYPE_FLOAT16;
 661          break;
 662       case GLSL_TYPE_INT:
 663          new_base_type = GLSL_TYPE_INT16;
 664          break;
 665       case GLSL_TYPE_UINT:
 666          new_base_type = GLSL_TYPE_UINT16;
 667          break;
 668       default:
 669          unreachable("invalid type");
 670          return NULL;
 671       }
 672    }
 673
 674    return glsl_type::get_instance(new_base_type,
 675                                   type->vector_elements,
 676                                   type->matrix_columns,
 677                                   type->explicit_stride,
 678                                   type->interface_row_major);
 679 }
 680
 681 static const glsl_type *
 682 lower_glsl_type(const glsl_type *type)
 683 {
 684    return convert_type(false, type);
 685 }
 686
 687 static ir_rvalue *
 688 convert_precision(bool up, ir_rvalue *ir)
 689 {
 690    unsigned op;
 691
 692    if (up) {
 693       switch (ir->type->base_type) {
 694       case GLSL_TYPE_FLOAT16:
 695          op = ir_unop_f162f;
 696          break;
 697       case GLSL_TYPE_INT16:
 698          op = ir_unop_i2i;
 699          break;
 700       case GLSL_TYPE_UINT16:
 701          op = ir_unop_u2u;
 702          break;
 703       default:
 704          unreachable("invalid type");
 705          return NULL;
 706       }
 707    } else {
 708       switch (ir->type->base_type) {
 709       case GLSL_TYPE_FLOAT:
 710          op = ir_unop_f2fmp;
 711          break;
 712       case GLSL_TYPE_INT:
 713          op = ir_unop_i2imp;
 714          break;
 715       case GLSL_TYPE_UINT:
 716          op = ir_unop_u2ump;
 717          break;
 718       default:
 719          unreachable("invalid type");
 720          return NULL;
 721       }
 722    }
 723
 724    const glsl_type *desired_type = convert_type(up, ir->type);
 725    void *mem_ctx = ralloc_parent(ir);
 726    return new(mem_ctx) ir_expression(op, desired_type, ir, NULL);
 727 }
 728
 729 void
 730 lower_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
 731 {
 732    ir_rvalue *ir = *rvalue;
 733
 734    if (ir == NULL)
 735       return;
 736
 737    if (ir->as_dereference()) {
 738       if (!ir->type->is_boolean())
 739          *rvalue = convert_precision(false, ir);
 740    } else if (ir->type->is_32bit()) {
 741       ir->type = lower_glsl_type(ir->type);
 742
 743       ir_constant *const_ir = ir->as_constant();
 744
 745       if (const_ir) {
 746          ir_constant_data value;
 747
 748          if (ir->type->base_type == GLSL_TYPE_FLOAT16) {
 749             for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++)
 750                value.f16[i] = _mesa_float_to_half(const_ir->value.f[i]);
 751          } else if (ir->type->base_type == GLSL_TYPE_INT16) {
 752             for (unsigned i = 0; i < ARRAY_SIZE(value.i16); i++)
 753                value.i16[i] = const_ir->value.i[i];
 754          } else if (ir->type->base_type == GLSL_TYPE_UINT16) {
 755             for (unsigned i = 0; i < ARRAY_SIZE(value.u16); i++)
 756                value.u16[i] = const_ir->value.u[i];
 757          } else {
 758             unreachable("invalid type");
 759          }
 760
 761          const_ir->value = value;
 762       }
 763    }
 764 }
 765
 766 ir_visitor_status
 767 lower_precision_visitor::visit_enter(ir_dereference_record *ir)
 768 {
 769    /* We don’t want to lower the variable */
 770    return visit_continue_with_parent;
 771 }
 772
 773 ir_visitor_status
 774 lower_precision_visitor::visit_enter(ir_dereference_array *ir)
 775 {
 776    /* We don’t want to convert the array index or the variable. If the array
 777     * index itself is lowerable that will be handled separately.
 778     */
 779    return visit_continue_with_parent;
 780 }
 781
 782 ir_visitor_status
 783 lower_precision_visitor::visit_enter(ir_call *ir)
 784 {
 785    /* We don’t want to convert the arguments. These will be handled separately.
 786     */
 787    return visit_continue_with_parent;
 788 }
 789
 790 ir_visitor_status
 791 lower_precision_visitor::visit_enter(ir_texture *ir)
 792 {
 793    /* We don’t want to convert the arguments. These will be handled separately.
 794     */
 795    return visit_continue_with_parent;
 796 }
 797
 798 ir_visitor_status
 799 lower_precision_visitor::visit_leave(ir_expression *ir)
 800 {
 801    ir_rvalue_visitor::visit_leave(ir);
 802
 803    /* If the expression is a conversion operation to or from bool then fix the
 804     * operation.
 805     */
 806    switch (ir->operation) {
 807    case ir_unop_b2f:
 808       ir->operation = ir_unop_b2f16;
 809       break;
 810    case ir_unop_f2b:
 811       ir->operation = ir_unop_f162b;
 812       break;
 813    case ir_unop_b2i:
 814    case ir_unop_i2b:
 815       /* Nothing to do - they both support int16. */
 816       break;
 817    default:
 818       break;
 819    }
 820
 821    return visit_continue;
 822 }
 823
 824 void
 825 find_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
 826 {
 827    /* Checking the precision of rvalue can be lowered first throughout
 828     * find_lowerable_rvalues_visitor.
 829     * Once it found the precision of rvalue can be lowered, then we can
 830     * add conversion f2fmp, etc. through lower_precision_visitor.
 831     */
 832    if (*rvalue == NULL)
 833       return;
 834
 835    struct set_entry *entry = _mesa_set_search(lowerable_rvalues, *rvalue);
 836
 837    if (!entry)
 838       return;
 839
 840    _mesa_set_remove(lowerable_rvalues, entry);
 841
 842    /* If the entire expression is just a variable dereference then trying to
 843     * lower it will just directly add pointless to and from conversions without
 844     * any actual operation in-between. Although these will eventually get
 845     * optimised out, avoiding generating them here also avoids breaking inout
 846     * parameters to functions.
 847     */
 848    if ((*rvalue)->as_dereference())
 849       return;
 850
 851    lower_precision_visitor v;
 852
 853    (*rvalue)->accept(&v);
 854    v.handle_rvalue(rvalue);
 855
 856    /* We don’t need to add the final conversion if the final type has been
 857     * converted to bool
 858     */
 859    if ((*rvalue)->type->base_type != GLSL_TYPE_BOOL) {
 860       *rvalue = convert_precision(true, *rvalue);
 861    }
 862 }
 863
 864 ir_visitor_status
 865 find_precision_visitor::visit_enter(ir_call *ir)
 866 {
 867    ir_rvalue_enter_visitor::visit_enter(ir);
 868
 869    ir_variable *return_var =
 870       ir->return_deref ? ir->return_deref->variable_referenced() : NULL;
 871
 872    /* Don't do anything for image_load here. We have only changed the return
 873     * value to mediump/lowp, so that following instructions can use reduced
 874     * precision.
 875     *
 876     * The return value type of the intrinsic itself isn't changed here, but
 877     * can be changed in NIR if all users use the *2*mp opcode.
 878     */
 879    if (ir->callee->intrinsic_id == ir_intrinsic_image_load)
 880       return visit_continue;
 881
 882    /* If this is a call to a builtin and the find_lowerable_rvalues_visitor
 883     * overrode the precision of the temporary return variable, then we can
 884     * replace the builtin implementation with a lowered version.
 885     */
 886
 887    if (!ir->callee->is_builtin() ||
 888        ir->callee->is_intrinsic() ||
 889        return_var == NULL ||
 890        (return_var->data.precision != GLSL_PRECISION_MEDIUM &&
 891         return_var->data.precision != GLSL_PRECISION_LOW))
 892       return visit_continue;
 893
 894    ir->callee = map_builtin(ir->callee);
 895    ir->generate_inline(ir);
 896    ir->remove();
 897
 898    return visit_continue_with_parent;
 899 }
 900
 901 ir_function_signature *
 902 find_precision_visitor::map_builtin(ir_function_signature *sig)
 903 {
 904    if (lowered_builtins == NULL) {
 905       lowered_builtins = _mesa_pointer_hash_table_create(NULL);
 906       clone_ht =_mesa_pointer_hash_table_create(NULL);
 907       lowered_builtin_mem_ctx = ralloc_context(NULL);
 908    } else {
 909       struct hash_entry *entry = _mesa_hash_table_search(lowered_builtins, sig);
 910       if (entry)
 911          return (ir_function_signature *) entry->data;
 912    }
 913
 914    ir_function_signature *lowered_sig =
 915       sig->clone(lowered_builtin_mem_ctx, clone_ht);
 916
 917    /* Functions that always return mediump or lowp should keep their
 918     * parameters intact, because they can be highp. NIR can lower
 919     * the up-conversion for parameters if needed.
 920     */
 921    if (!function_always_returns_mediump_or_lowp(sig->function_name())) {
 922       foreach_in_list(ir_variable, param, &lowered_sig->parameters) {
 923          param->data.precision = GLSL_PRECISION_MEDIUM;
 924       }
 925    }
 926
 927    lower_precision(options, &lowered_sig->body);
 928
 929    _mesa_hash_table_clear(clone_ht, NULL);
 930
 931    _mesa_hash_table_insert(lowered_builtins, sig, lowered_sig);
 932
 933    return lowered_sig;
 934 }
 935
 936 find_precision_visitor::find_precision_visitor(const struct gl_shader_compiler_options *options)
 937    : lowerable_rvalues(_mesa_pointer_set_create(NULL)),
 938      lowered_builtins(NULL),
 939      clone_ht(NULL),
 940      lowered_builtin_mem_ctx(NULL),
 941      options(options)
 942 {
 943 }
 944
 945 find_precision_visitor::~find_precision_visitor()
 946 {
 947    _mesa_set_destroy(lowerable_rvalues, NULL);
 948
 949    if (lowered_builtins) {
 950       _mesa_hash_table_destroy(lowered_builtins, NULL);
 951       _mesa_hash_table_destroy(clone_ht, NULL);
 952       ralloc_free(lowered_builtin_mem_ctx);
 953    }
 954 }
 955
 956 /* Lowering opcodes to 16 bits is not enough for programs with control flow
 957  * (and the ?: operator, which is represented by if-then-else in the IR),
 958  * because temporary variables, which are used for passing values between
 959  * code blocks, are not lowered, resulting in 32-bit phis in NIR.
 960  *
 961  * First change the variable types to 16 bits, then change all ir_dereference
 962  * types to 16 bits.
 963  */
 964 class lower_variables_visitor : public ir_rvalue_enter_visitor {
 965 public:
 966    lower_variables_visitor(const struct gl_shader_compiler_options *options)
 967       : options(options) {
 968       lower_vars = _mesa_pointer_set_create(NULL);
 969    }
 970
 971    virtual ~lower_variables_visitor()
 972    {
 973       _mesa_set_destroy(lower_vars, NULL);
 974    }
 975
 976    virtual ir_visitor_status visit(ir_variable *var);
 977    virtual ir_visitor_status visit_enter(ir_assignment *ir);
 978    virtual ir_visitor_status visit_enter(ir_return *ir);
 979    virtual ir_visitor_status visit_enter(ir_call *ir);
 980    virtual void handle_rvalue(ir_rvalue **rvalue);
 981
 982    void fix_types_in_deref_chain(ir_dereference *ir);
 983    void convert_split_assignment(ir_dereference *lhs, ir_rvalue *rhs,
 984                                  bool insert_before);
 985
 986    const struct gl_shader_compiler_options *options;
 987    set *lower_vars;
 988 };
 989
 990 static void
 991 lower_constant(ir_constant *ir)
 992 {
 993    if (ir->type->is_array()) {
 994       for (int i = 0; i < ir->type->array_size(); i++)
 995          lower_constant(ir->get_array_element(i));
 996
 997       ir->type = lower_glsl_type(ir->type);
 998       return;
 999    }
1000
1001    ir->type = lower_glsl_type(ir->type);
1002    ir_constant_data value;
1003
1004    if (ir->type->base_type == GLSL_TYPE_FLOAT16) {
1005       for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++)
1006          value.f16[i] = _mesa_float_to_half(ir->value.f[i]);
1007    } else if (ir->type->base_type == GLSL_TYPE_INT16) {
1008       for (unsigned i = 0; i < ARRAY_SIZE(value.i16); i++)
1009          value.i16[i] = ir->value.i[i];
1010    } else if (ir->type->base_type == GLSL_TYPE_UINT16) {
1011       for (unsigned i = 0; i < ARRAY_SIZE(value.u16); i++)
1012          value.u16[i] = ir->value.u[i];
1013    } else {
1014       unreachable("invalid type");
1015    }
1016
1017    ir->value = value;
1018 }
1019
1020 ir_visitor_status
1021 lower_variables_visitor::visit(ir_variable *var)
1022 {
1023    if ((var->data.mode != ir_var_temporary &&
1024         var->data.mode != ir_var_auto) ||
1025        !var->type->without_array()->is_32bit() ||
1026        (var->data.precision != GLSL_PRECISION_MEDIUM &&
1027         var->data.precision != GLSL_PRECISION_LOW) ||
1028        !can_lower_type(options, var->type))
1029       return visit_continue;
1030
1031    /* Lower constant initializers. */
1032    if (var->constant_value &&
1033        var->type == var->constant_value->type) {
1034       if (!options->LowerPrecisionConstants)
1035          return visit_continue;
1036       var->constant_value =
1037          var->constant_value->clone(ralloc_parent(var), NULL);
1038       lower_constant(var->constant_value);
1039    }
1040
1041    if (var->constant_initializer &&
1042        var->type == var->constant_initializer->type) {
1043       if (!options->LowerPrecisionConstants)
1044          return visit_continue;
1045       var->constant_initializer =
1046          var->constant_initializer->clone(ralloc_parent(var), NULL);
1047       lower_constant(var->constant_initializer);
1048    }
1049
1050    var->type = lower_glsl_type(var->type);
1051    _mesa_set_add(lower_vars, var);
1052
1053    return visit_continue;
1054 }
1055
1056 void
1057 lower_variables_visitor::fix_types_in_deref_chain(ir_dereference *ir)
1058 {
1059    assert(ir->type->without_array()->is_32bit());
1060    assert(_mesa_set_search(lower_vars, ir->variable_referenced()));
1061
1062    /* Fix the type in the dereference node. */
1063    ir->type = lower_glsl_type(ir->type);
1064
1065    /* If it's an array, fix the types in the whole dereference chain. */
1066    for (ir_dereference_array *deref_array = ir->as_dereference_array();
1067         deref_array;
1068         deref_array = deref_array->array->as_dereference_array()) {
1069       assert(deref_array->array->type->without_array()->is_32bit());
1070       deref_array->array->type = lower_glsl_type(deref_array->array->type);
1071    }
1072 }
1073
1074 void
1075 lower_variables_visitor::convert_split_assignment(ir_dereference *lhs,
1076                                                   ir_rvalue *rhs,
1077                                                   bool insert_before)
1078 {
1079    void *mem_ctx = ralloc_parent(lhs);
1080
1081    if (lhs->type->is_array()) {
1082       for (unsigned i = 0; i < lhs->type->length; i++) {
1083          ir_dereference *l, *r;
1084
1085          l = new(mem_ctx) ir_dereference_array(lhs->clone(mem_ctx, NULL),
1086                                                new(mem_ctx) ir_constant(i));
1087          r = new(mem_ctx) ir_dereference_array(rhs->clone(mem_ctx, NULL),
1088                                                new(mem_ctx) ir_constant(i));
1089          convert_split_assignment(l, r, insert_before);
1090       }
1091       return;
1092    }
1093
1094    assert(lhs->type->is_16bit() || lhs->type->is_32bit());
1095    assert(rhs->type->is_16bit() || rhs->type->is_32bit());
1096    assert(lhs->type->is_16bit() != rhs->type->is_16bit());
1097
1098    ir_assignment *assign =
1099       new(mem_ctx) ir_assignment(lhs, convert_precision(lhs->type->is_32bit(), rhs));
1100
1101    if (insert_before)
1102       base_ir->insert_before(assign);
1103    else
1104       base_ir->insert_after(assign);
1105 }
1106
1107 ir_visitor_status
1108 lower_variables_visitor::visit_enter(ir_assignment *ir)
1109 {
1110    ir_dereference *lhs = ir->lhs;
1111    ir_variable *var = lhs->variable_referenced();
1112    ir_dereference *rhs_deref = ir->rhs->as_dereference();
1113    ir_variable *rhs_var = rhs_deref ? rhs_deref->variable_referenced() : NULL;
1114    ir_constant *rhs_const = ir->rhs->as_constant();
1115
1116    /* Legalize array assignments between lowered and non-lowered variables. */
1117    if (lhs->type->is_array() &&
1118        (rhs_var || rhs_const) &&
1119        (!rhs_var ||
1120         (var &&
1121          var->type->without_array()->is_16bit() !=
1122          rhs_var->type->without_array()->is_16bit())) &&
1123        (!rhs_const ||
1124         (var &&
1125          var->type->without_array()->is_16bit() &&
1126          rhs_const->type->without_array()->is_32bit()))) {
1127       assert(ir->rhs->type->is_array());
1128
1129       /* Fix array assignments from lowered to non-lowered. */
1130       if (rhs_var && _mesa_set_search(lower_vars, rhs_var)) {
1131          fix_types_in_deref_chain(rhs_deref);
1132          /* Convert to 32 bits for LHS. */
1133          convert_split_assignment(lhs, rhs_deref, true);
1134          ir->remove();
1135          return visit_continue;
1136       }
1137
1138       /* Fix array assignments from non-lowered to lowered. */
1139       if (var &&
1140           _mesa_set_search(lower_vars, var) &&
1141           ir->rhs->type->without_array()->is_32bit()) {
1142          fix_types_in_deref_chain(lhs);
1143          /* Convert to 16 bits for LHS. */
1144          convert_split_assignment(lhs, ir->rhs, true);
1145          ir->remove();
1146          return visit_continue;
1147       }
1148    }
1149
1150    /* Fix assignment types. */
1151    if (var &&
1152        _mesa_set_search(lower_vars, var)) {
1153       /* Fix the LHS type. */
1154       if (lhs->type->without_array()->is_32bit())
1155          fix_types_in_deref_chain(lhs);
1156
1157       /* Fix the RHS type if it's a lowered variable. */
1158       if (rhs_var &&
1159           _mesa_set_search(lower_vars, rhs_var) &&
1160           rhs_deref->type->without_array()->is_32bit())
1161          fix_types_in_deref_chain(rhs_deref);
1162
1163       /* Fix the RHS type if it's a non-array expression. */
1164       if (ir->rhs->type->is_32bit()) {
1165          ir_expression *expr = ir->rhs->as_expression();
1166
1167          /* Convert the RHS to the LHS type. */
1168          if (expr &&
1169              (expr->operation == ir_unop_f162f ||
1170               expr->operation == ir_unop_i2i ||
1171               expr->operation == ir_unop_u2u) &&
1172              expr->operands[0]->type->is_16bit()) {
1173             /* If there is an "up" conversion, just remove it.
1174              * This is optional. We could as well execute the else statement and
1175              * let NIR eliminate the up+down conversions.
1176              */
1177             ir->rhs = expr->operands[0];
1178          } else {
1179             /* Add a "down" conversion operation to fix the type of RHS. */
1180             ir->rhs = convert_precision(false, ir->rhs);
1181          }
1182       }
1183    }
1184
1185    return ir_rvalue_enter_visitor::visit_enter(ir);
1186 }
1187
1188 ir_visitor_status
1189 lower_variables_visitor::visit_enter(ir_return *ir)
1190 {
1191    void *mem_ctx = ralloc_parent(ir);
1192
1193    ir_dereference *deref = ir->value ? ir->value->as_dereference() : NULL;
1194    if (deref) {
1195       ir_variable *var = deref->variable_referenced();
1196
1197       /* Fix the type of the return value. */
1198       if (var &&
1199           _mesa_set_search(lower_vars, var) &&
1200           deref->type->without_array()->is_32bit()) {
1201          /* Create a 32-bit temporary variable. */
1202          ir_variable *new_var =
1203             new(mem_ctx) ir_variable(deref->type, "lowerp", ir_var_temporary);
1204          base_ir->insert_before(new_var);
1205
1206          /* Fix types in dereferences. */
1207          fix_types_in_deref_chain(deref);
1208
1209          /* Convert to 32 bits for the return value. */
1210          convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var),
1211                                   deref, true);
1212          ir->value = new(mem_ctx) ir_dereference_variable(new_var);
1213       }
1214    }
1215
1216    return ir_rvalue_enter_visitor::visit_enter(ir);
1217 }
1218
1219 void lower_variables_visitor::handle_rvalue(ir_rvalue **rvalue)
1220 {
1221    ir_rvalue *ir = *rvalue;
1222
1223    if (in_assignee || ir == NULL)
1224       return;
1225
1226    ir_expression *expr = ir->as_expression();
1227    ir_dereference *expr_op0_deref = expr ? expr->operands[0]->as_dereference() : NULL;
1228
1229    /* Remove f2fmp(float16). Same for int16 and uint16. */
1230    if (expr &&
1231        expr_op0_deref &&
1232        (expr->operation == ir_unop_f2fmp ||
1233         expr->operation == ir_unop_i2imp ||
1234         expr->operation == ir_unop_u2ump ||
1235         expr->operation == ir_unop_f2f16 ||
1236         expr->operation == ir_unop_i2i ||
1237         expr->operation == ir_unop_u2u) &&
1238        expr->type->without_array()->is_16bit() &&
1239        expr_op0_deref->type->without_array()->is_32bit() &&
1240        expr_op0_deref->variable_referenced() &&
1241        _mesa_set_search(lower_vars, expr_op0_deref->variable_referenced())) {
1242       fix_types_in_deref_chain(expr_op0_deref);
1243
1244       /* Remove f2fmp/i2imp/u2ump. */
1245       *rvalue = expr_op0_deref;
1246       return;
1247    }
1248
1249    ir_dereference *deref = ir->as_dereference();
1250
1251    if (deref) {
1252       ir_variable *var = deref->variable_referenced();
1253
1254       /* var can be NULL if we are dereferencing ir_constant. */
1255       if (var &&
1256           _mesa_set_search(lower_vars, var) &&
1257           deref->type->without_array()->is_32bit()) {
1258          void *mem_ctx = ralloc_parent(ir);
1259
1260          /* Create a 32-bit temporary variable. */
1261          ir_variable *new_var =
1262             new(mem_ctx) ir_variable(deref->type, "lowerp", ir_var_temporary);
1263          base_ir->insert_before(new_var);
1264
1265          /* Fix types in dereferences. */
1266          fix_types_in_deref_chain(deref);
1267
1268          /* Convert to 32 bits for the rvalue. */
1269          convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var),
1270                                   deref, true);
1271          *rvalue = new(mem_ctx) ir_dereference_variable(new_var);
1272       }
1273    }
1274 }
1275
1276 ir_visitor_status
1277 lower_variables_visitor::visit_enter(ir_call *ir)
1278 {
1279    void *mem_ctx = ralloc_parent(ir);
1280
1281    /* We can't pass 16-bit variables as 32-bit inout/out parameters. */
1282    foreach_two_lists(formal_node, &ir->callee->parameters,
1283                      actual_node, &ir->actual_parameters) {
1284       ir_dereference *param_deref =
1285          ((ir_rvalue *)actual_node)->as_dereference();
1286       ir_variable *param = (ir_variable *)formal_node;
1287
1288       if (!param_deref)
1289             continue;
1290
1291       ir_variable *var = param_deref->variable_referenced();
1292
1293       /* var can be NULL if we are dereferencing ir_constant. */
1294       if (var &&
1295           _mesa_set_search(lower_vars, var) &&
1296           param->type->without_array()->is_32bit()) {
1297          fix_types_in_deref_chain(param_deref);
1298
1299          /* Create a 32-bit temporary variable for the parameter. */
1300          ir_variable *new_var =
1301             new(mem_ctx) ir_variable(param->type, "lowerp", ir_var_temporary);
1302          base_ir->insert_before(new_var);
1303
1304          /* Replace the parameter. */
1305          actual_node->replace_with(new(mem_ctx) ir_dereference_variable(new_var));
1306
1307          if (param->data.mode == ir_var_function_in ||
1308              param->data.mode == ir_var_function_inout) {
1309             /* Convert to 32 bits for passing in. */
1310             convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var),
1311                                      param_deref->clone(mem_ctx, NULL), true);
1312          }
1313          if (param->data.mode == ir_var_function_out ||
1314              param->data.mode == ir_var_function_inout) {
1315             /* Convert to 16 bits after returning. */
1316             convert_split_assignment(param_deref,
1317                                      new(mem_ctx) ir_dereference_variable(new_var),
1318                                      false);
1319          }
1320       }
1321    }
1322
1323    /* Fix the type of return value dereferencies. */
1324    ir_dereference_variable *ret_deref = ir->return_deref;
1325    ir_variable *ret_var = ret_deref ? ret_deref->variable_referenced() : NULL;
1326
1327    if (ret_var &&
1328        _mesa_set_search(lower_vars, ret_var) &&
1329        ret_deref->type->without_array()->is_32bit()) {
1330       /* Create a 32-bit temporary variable. */
1331       ir_variable *new_var =
1332          new(mem_ctx) ir_variable(ir->callee->return_type, "lowerp",
1333                                   ir_var_temporary);
1334       base_ir->insert_before(new_var);
1335
1336       /* Replace the return variable. */
1337       ret_deref->var = new_var;
1338
1339       /* Convert to 16 bits after returning. */
1340       convert_split_assignment(new(mem_ctx) ir_dereference_variable(ret_var),
1341                                new(mem_ctx) ir_dereference_variable(new_var),
1342                                false);
1343    }
1344
1345    return ir_rvalue_enter_visitor::visit_enter(ir);
1346 }
1347
1348 }
1349
1350 void
1351 lower_precision(const struct gl_shader_compiler_options *options,
1352                 exec_list *instructions)
1353 {
1354    find_precision_visitor v(options);
1355    find_lowerable_rvalues(options, instructions, v.lowerable_rvalues);
1356    visit_list_elements(&v, instructions);
1357
1358    lower_variables_visitor vars(options);
1359    visit_list_elements(&vars, instructions);
1360 }