src/compiler/glsl/lower_precision.cpp

   1 /*
   2  * Copyright © 2019 Google, Inc
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 /**
  25  * \file lower_precision.cpp
  26  */
  27
  28 #include "main/macros.h"
  29 #include "main/mtypes.h"
  30 #include "compiler/glsl_types.h"
  31 #include "ir.h"
  32 #include "ir_builder.h"
  33 #include "ir_optimization.h"
  34 #include "ir_rvalue_visitor.h"
  35 #include "util/half_float.h"
  36 #include "util/set.h"
  37 #include "util/hash_table.h"
  38 #include <vector>
  39
  40 namespace {
  41
  42 class find_precision_visitor : public ir_rvalue_enter_visitor {
  43 public:
  44    find_precision_visitor(const struct gl_shader_compiler_options *options);
  45    ~find_precision_visitor();
  46
  47    virtual void handle_rvalue(ir_rvalue **rvalue);
  48    virtual ir_visitor_status visit_enter(ir_call *ir);
  49
  50    ir_function_signature *map_builtin(ir_function_signature *sig);
  51
  52    /* Set of rvalues that can be lowered. This will be filled in by
  53     * find_lowerable_rvalues_visitor. Only the root node of a lowerable section
  54     * will be added to this set.
  55     */
  56    struct set *lowerable_rvalues;
  57
  58    /**
  59     * A mapping of builtin signature functions to lowered versions. This is
  60     * filled in lazily when a lowered version is needed.
  61     */
  62    struct hash_table *lowered_builtins;
  63    /**
  64     * A temporary hash table only used in order to clone functions.
  65     */
  66    struct hash_table *clone_ht;
  67
  68    void *lowered_builtin_mem_ctx;
  69
  70    const struct gl_shader_compiler_options *options;
  71 };
  72
  73 class find_lowerable_rvalues_visitor : public ir_hierarchical_visitor {
  74 public:
  75    enum can_lower_state {
  76       UNKNOWN,
  77       CANT_LOWER,
  78       SHOULD_LOWER,
  79    };
  80
  81    enum parent_relation {
  82       /* The parent performs a further operation involving the result from the
  83        * child and can be lowered along with it.
  84        */
  85       COMBINED_OPERATION,
  86       /* The parent instruction’s operation is independent of the child type so
  87        * the child should be lowered separately.
  88        */
  89       INDEPENDENT_OPERATION,
  90    };
  91
  92    struct stack_entry {
  93       ir_instruction *instr;
  94       enum can_lower_state state;
  95       /* List of child rvalues that can be lowered. When this stack entry is
  96        * popped, if this node itself can’t be lowered than all of the children
  97        * are root nodes to lower so we will add them to lowerable_rvalues.
  98        * Otherwise if this node can also be lowered then we won’t add the
  99        * children because we only want to add the topmost lowerable nodes to
 100        * lowerable_rvalues and the children will be lowered as part of lowering
 101        * this node.
 102        */
 103       std::vector<ir_instruction *> lowerable_children;
 104    };
 105
 106    find_lowerable_rvalues_visitor(struct set *result,
 107                                   const struct gl_shader_compiler_options *options);
 108
 109    static void stack_enter(class ir_instruction *ir, void *data);
 110    static void stack_leave(class ir_instruction *ir, void *data);
 111
 112    virtual ir_visitor_status visit(ir_constant *ir);
 113    virtual ir_visitor_status visit(ir_dereference_variable *ir);
 114
 115    virtual ir_visitor_status visit_enter(ir_dereference_record *ir);
 116    virtual ir_visitor_status visit_enter(ir_dereference_array *ir);
 117    virtual ir_visitor_status visit_enter(ir_texture *ir);
 118    virtual ir_visitor_status visit_enter(ir_expression *ir);
 119
 120    virtual ir_visitor_status visit_leave(ir_assignment *ir);
 121    virtual ir_visitor_status visit_leave(ir_call *ir);
 122
 123    can_lower_state handle_precision(const glsl_type *type,
 124                                     int precision) const;
 125
 126    static parent_relation get_parent_relation(ir_instruction *parent,
 127                                               ir_instruction *child);
 128
 129    std::vector<stack_entry> stack;
 130    struct set *lowerable_rvalues;
 131    const struct gl_shader_compiler_options *options;
 132
 133    void pop_stack_entry();
 134    void add_lowerable_children(const stack_entry &entry);
 135 };
 136
 137 class lower_precision_visitor : public ir_rvalue_visitor {
 138 public:
 139    virtual void handle_rvalue(ir_rvalue **rvalue);
 140    virtual ir_visitor_status visit_enter(ir_dereference_array *);
 141    virtual ir_visitor_status visit_enter(ir_dereference_record *);
 142    virtual ir_visitor_status visit_enter(ir_call *ir);
 143    virtual ir_visitor_status visit_enter(ir_texture *ir);
 144    virtual ir_visitor_status visit_leave(ir_expression *);
 145 };
 146
 147 static bool
 148 can_lower_type(const struct gl_shader_compiler_options *options,
 149                const glsl_type *type)
 150 {
 151    /* Don’t lower any expressions involving non-float types except bool and
 152     * texture samplers. This will rule out operations that change the type such
 153     * as conversion to ints. Instead it will end up lowering the arguments
 154     * instead and adding a final conversion to float32. We want to handle
 155     * boolean types so that it will do comparisons as 16-bit.
 156     */
 157
 158    switch (type->without_array()->base_type) {
 159    /* TODO: should we do anything for these two with regard to Int16 vs FP16
 160     * support?
 161     */
 162    case GLSL_TYPE_BOOL:
 163    case GLSL_TYPE_SAMPLER:
 164    case GLSL_TYPE_IMAGE:
 165       return true;
 166
 167    case GLSL_TYPE_FLOAT:
 168       return options->LowerPrecisionFloat16;
 169
 170    case GLSL_TYPE_UINT:
 171    case GLSL_TYPE_INT:
 172       return options->LowerPrecisionInt16;
 173
 174    default:
 175       return false;
 176    }
 177 }
 178
 179 find_lowerable_rvalues_visitor::find_lowerable_rvalues_visitor(struct set *res,
 180                                  const struct gl_shader_compiler_options *opts)
 181 {
 182    lowerable_rvalues = res;
 183    options = opts;
 184    callback_enter = stack_enter;
 185    callback_leave = stack_leave;
 186    data_enter = this;
 187    data_leave = this;
 188 }
 189
 190 void
 191 find_lowerable_rvalues_visitor::stack_enter(class ir_instruction *ir,
 192                                             void *data)
 193 {
 194    find_lowerable_rvalues_visitor *state =
 195       (find_lowerable_rvalues_visitor *) data;
 196
 197    /* Add a new stack entry for this instruction */
 198    stack_entry entry;
 199
 200    entry.instr = ir;
 201    entry.state = state->in_assignee ? CANT_LOWER : UNKNOWN;
 202
 203    state->stack.push_back(entry);
 204 }
 205
 206 void
 207 find_lowerable_rvalues_visitor::add_lowerable_children(const stack_entry &entry)
 208 {
 209    /* We can’t lower this node so if there were any pending children then they
 210     * are all root lowerable nodes and we should add them to the set.
 211     */
 212    for (auto &it : entry.lowerable_children)
 213       _mesa_set_add(lowerable_rvalues, it);
 214 }
 215
 216 void
 217 find_lowerable_rvalues_visitor::pop_stack_entry()
 218 {
 219    const stack_entry &entry = stack.back();
 220
 221    if (stack.size() >= 2) {
 222       /* Combine this state into the parent state, unless the parent operation
 223        * doesn’t have any relation to the child operations
 224        */
 225       stack_entry &parent = stack.end()[-2];
 226       parent_relation rel = get_parent_relation(parent.instr, entry.instr);
 227
 228       if (rel == COMBINED_OPERATION) {
 229          switch (entry.state) {
 230          case CANT_LOWER:
 231             parent.state = CANT_LOWER;
 232             break;
 233          case SHOULD_LOWER:
 234             if (parent.state == UNKNOWN)
 235                parent.state = SHOULD_LOWER;
 236             break;
 237          case UNKNOWN:
 238             break;
 239          }
 240       }
 241    }
 242
 243    if (entry.state == SHOULD_LOWER) {
 244       ir_rvalue *rv = entry.instr->as_rvalue();
 245
 246       if (rv == NULL) {
 247          add_lowerable_children(entry);
 248       } else if (stack.size() >= 2) {
 249          stack_entry &parent = stack.end()[-2];
 250
 251          switch (get_parent_relation(parent.instr, rv)) {
 252          case COMBINED_OPERATION:
 253             /* We only want to add the toplevel lowerable instructions to the
 254              * lowerable set. Therefore if there is a parent then instead of
 255              * adding this instruction to the set we will queue depending on
 256              * the result of the parent instruction.
 257              */
 258             parent.lowerable_children.push_back(entry.instr);
 259             break;
 260          case INDEPENDENT_OPERATION:
 261             _mesa_set_add(lowerable_rvalues, rv);
 262             break;
 263          }
 264       } else {
 265          /* This is a toplevel node so add it directly to the lowerable
 266           * set.
 267           */
 268          _mesa_set_add(lowerable_rvalues, rv);
 269       }
 270    } else if (entry.state == CANT_LOWER) {
 271       add_lowerable_children(entry);
 272    }
 273
 274    stack.pop_back();
 275 }
 276
 277 void
 278 find_lowerable_rvalues_visitor::stack_leave(class ir_instruction *ir,
 279                                             void *data)
 280 {
 281    find_lowerable_rvalues_visitor *state =
 282       (find_lowerable_rvalues_visitor *) data;
 283
 284    state->pop_stack_entry();
 285 }
 286
 287 enum find_lowerable_rvalues_visitor::can_lower_state
 288 find_lowerable_rvalues_visitor::handle_precision(const glsl_type *type,
 289                                                  int precision) const
 290 {
 291    if (!can_lower_type(options, type))
 292       return CANT_LOWER;
 293
 294    switch (precision) {
 295    case GLSL_PRECISION_NONE:
 296       return UNKNOWN;
 297    case GLSL_PRECISION_HIGH:
 298       return CANT_LOWER;
 299    case GLSL_PRECISION_MEDIUM:
 300    case GLSL_PRECISION_LOW:
 301       return SHOULD_LOWER;
 302    }
 303
 304    return CANT_LOWER;
 305 }
 306
 307 enum find_lowerable_rvalues_visitor::parent_relation
 308 find_lowerable_rvalues_visitor::get_parent_relation(ir_instruction *parent,
 309                                                     ir_instruction *child)
 310 {
 311    /* If the parent is a dereference instruction then the only child could be
 312     * for example an array dereference and that should be lowered independently
 313     * of the parent.
 314     */
 315    if (parent->as_dereference())
 316       return INDEPENDENT_OPERATION;
 317
 318    /* The precision of texture sampling depend on the precision of the sampler.
 319     * The rest of the arguments don’t matter so we can treat it as an
 320     * independent operation.
 321     */
 322    if (parent->as_texture())
 323       return INDEPENDENT_OPERATION;
 324
 325    return COMBINED_OPERATION;
 326 }
 327
 328 ir_visitor_status
 329 find_lowerable_rvalues_visitor::visit(ir_constant *ir)
 330 {
 331    stack_enter(ir, this);
 332
 333    if (!can_lower_type(options, ir->type))
 334       stack.back().state = CANT_LOWER;
 335
 336    stack_leave(ir, this);
 337
 338    return visit_continue;
 339 }
 340
 341 ir_visitor_status
 342 find_lowerable_rvalues_visitor::visit(ir_dereference_variable *ir)
 343 {
 344    stack_enter(ir, this);
 345
 346    if (stack.back().state == UNKNOWN)
 347       stack.back().state = handle_precision(ir->type, ir->precision());
 348
 349    stack_leave(ir, this);
 350
 351    return visit_continue;
 352 }
 353
 354 ir_visitor_status
 355 find_lowerable_rvalues_visitor::visit_enter(ir_dereference_record *ir)
 356 {
 357    ir_hierarchical_visitor::visit_enter(ir);
 358
 359    if (stack.back().state == UNKNOWN)
 360       stack.back().state = handle_precision(ir->type, ir->precision());
 361
 362    return visit_continue;
 363 }
 364
 365 ir_visitor_status
 366 find_lowerable_rvalues_visitor::visit_enter(ir_dereference_array *ir)
 367 {
 368    ir_hierarchical_visitor::visit_enter(ir);
 369
 370    if (stack.back().state == UNKNOWN)
 371       stack.back().state = handle_precision(ir->type, ir->precision());
 372
 373    return visit_continue;
 374 }
 375
 376 ir_visitor_status
 377 find_lowerable_rvalues_visitor::visit_enter(ir_texture *ir)
 378 {
 379    ir_hierarchical_visitor::visit_enter(ir);
 380
 381    /* The precision of the sample value depends on the precision of the
 382     * sampler.
 383     */
 384    stack.back().state = handle_precision(ir->type,
 385                                          ir->sampler->precision());
 386    return visit_continue;
 387 }
 388
 389 ir_visitor_status
 390 find_lowerable_rvalues_visitor::visit_enter(ir_expression *ir)
 391 {
 392    ir_hierarchical_visitor::visit_enter(ir);
 393
 394    if (!can_lower_type(options, ir->type))
 395       stack.back().state = CANT_LOWER;
 396
 397    /* Don't lower precision for derivative calculations */
 398    if (!options->LowerPrecisionDerivatives &&
 399        (ir->operation == ir_unop_dFdx ||
 400         ir->operation == ir_unop_dFdx_coarse ||
 401         ir->operation == ir_unop_dFdx_fine ||
 402         ir->operation == ir_unop_dFdy ||
 403         ir->operation == ir_unop_dFdy_coarse ||
 404         ir->operation == ir_unop_dFdy_fine)) {
 405       stack.back().state = CANT_LOWER;
 406    }
 407
 408    return visit_continue;
 409 }
 410
 411 static bool
 412 function_always_returns_mediump_or_lowp(const char *name)
 413 {
 414    return !strcmp(name, "bitCount") ||
 415           !strcmp(name, "findLSB") ||
 416           !strcmp(name, "findMSB") ||
 417           !strcmp(name, "unpackHalf2x16") ||
 418           !strcmp(name, "unpackUnorm4x8") ||
 419           !strcmp(name, "unpackSnorm4x8");
 420 }
 421
 422 static unsigned
 423 handle_call(ir_call *ir, const struct set *lowerable_rvalues)
 424 {
 425    /* The intrinsic call is inside the wrapper imageLoad function that will
 426     * be inlined. We have to handle both of them.
 427     */
 428    if (ir->callee->intrinsic_id == ir_intrinsic_image_load ||
 429        (ir->callee->is_builtin() &&
 430         !strcmp(ir->callee_name(), "imageLoad"))) {
 431       ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head();
 432       ir_variable *resource = param->variable_referenced();
 433
 434       assert(ir->callee->return_precision == GLSL_PRECISION_NONE);
 435       assert(resource->type->without_array()->is_image());
 436
 437       /* GLSL ES 3.20 requires that images have a precision modifier, but if
 438        * you set one, it doesn't do anything, because all intrinsics are
 439        * defined with highp. This seems to be a spec bug.
 440        *
 441        * In theory we could set the return value to mediump if the image
 442        * format has a lower precision. This appears to be the most sensible
 443        * thing to do.
 444        */
 445       const struct util_format_description *desc =
 446          util_format_description(resource->data.image_format);
 447       int i =
 448          util_format_get_first_non_void_channel(resource->data.image_format);
 449       bool mediump;
 450
 451       assert(i >= 0);
 452
 453       if (desc->channel[i].pure_integer ||
 454           desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
 455          mediump = desc->channel[i].size <= 16;
 456       else
 457          mediump = desc->channel[i].size <= 10; /* unorm/snorm */
 458
 459       return mediump ? GLSL_PRECISION_MEDIUM : GLSL_PRECISION_HIGH;
 460    }
 461
 462    /* Return the declared precision for user-defined functions. */
 463    if (!ir->callee->is_builtin())
 464       return ir->callee->return_precision;
 465
 466    /* Handle special calls. */
 467    if (ir->callee->is_builtin() && ir->actual_parameters.length()) {
 468       ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head();
 469       ir_variable *var = param->variable_referenced();
 470
 471       /* Handle builtin wrappers around ir_texture opcodes. These wrappers will
 472        * be inlined by lower_precision() if we return true here, so that we can
 473        * get to ir_texture later and do proper lowering.
 474        *
 475        * We should lower the type of the return value if the sampler type
 476        * uses lower precision. The function parameters don't matter.
 477        */
 478       if (var && var->type->without_array()->is_sampler()) {
 479          /* textureSize always returns highp. */
 480          if (!strcmp(ir->callee_name(), "textureSize"))
 481             return GLSL_PRECISION_HIGH;
 482
 483          return var->data.precision;
 484       }
 485    }
 486
 487    if (/* Parameters are always highp: */
 488        !strcmp(ir->callee_name(), "floatBitsToInt") ||
 489        !strcmp(ir->callee_name(), "floatBitsToUint") ||
 490        !strcmp(ir->callee_name(), "intBitsToFloat") ||
 491        !strcmp(ir->callee_name(), "uintBitsToFloat") ||
 492        !strcmp(ir->callee_name(), "bitfieldReverse") ||
 493        !strcmp(ir->callee_name(), "frexp") ||
 494        !strcmp(ir->callee_name(), "ldexp") ||
 495        /* Parameters and outputs are always highp: */
 496        /* TODO: The operations are highp, but carry and borrow outputs are lowp. */
 497        !strcmp(ir->callee_name(), "uaddCarry") ||
 498        !strcmp(ir->callee_name(), "usubBorrow") ||
 499        !strcmp(ir->callee_name(), "imulExtended") ||
 500        !strcmp(ir->callee_name(), "umulExtended") ||
 501        !strcmp(ir->callee_name(), "unpackUnorm2x16") ||
 502        !strcmp(ir->callee_name(), "unpackSnorm2x16") ||
 503        /* Outputs are highp: */
 504        !strcmp(ir->callee_name(), "packUnorm2x16") ||
 505        !strcmp(ir->callee_name(), "packSnorm2x16") ||
 506        /* Parameters are mediump and outputs are highp. The parameters should
 507         * be optimized in NIR, not here, e.g:
 508         * - packHalf2x16 can just be a bitcast from f16vec2 to uint32
 509         * - Other opcodes don't have to convert parameters to highp if the hw
 510         *   has f16 versions. Optimize in NIR accordingly.
 511         */
 512        !strcmp(ir->callee_name(), "packHalf2x16") ||
 513        !strcmp(ir->callee_name(), "packUnorm4x8") ||
 514        !strcmp(ir->callee_name(), "packSnorm4x8") ||
 515        /* Atomic functions are not lowered. */
 516        strstr(ir->callee_name(), "atomic") == ir->callee_name())
 517       return GLSL_PRECISION_HIGH;
 518
 519    assert(ir->callee->return_precision == GLSL_PRECISION_NONE);
 520
 521    /* Number of parameters to check if they are lowerable. */
 522    unsigned check_parameters = ir->actual_parameters.length();
 523
 524    /* Interpolation functions only consider the precision of the interpolant. */
 525    /* Bitfield functions ignore the precision of "offset" and "bits". */
 526    if (!strcmp(ir->callee_name(), "interpolateAtOffset") ||
 527        !strcmp(ir->callee_name(), "interpolateAtSample") ||
 528        !strcmp(ir->callee_name(), "bitfieldExtract")) {
 529       check_parameters = 1;
 530    } else if (!strcmp(ir->callee_name(), "bitfieldInsert")) {
 531       check_parameters = 2;
 532    } if (function_always_returns_mediump_or_lowp(ir->callee_name())) {
 533       /* These only lower the return value. Parameters keep their precision,
 534        * which is preserved in map_builtin.
 535        */
 536       check_parameters = 0;
 537    }
 538
 539    /* If the call is to a builtin, then the function won’t have a return
 540     * precision and we should determine it from the precision of the arguments.
 541     */
 542    foreach_in_list(ir_rvalue, param, &ir->actual_parameters) {
 543       if (!check_parameters)
 544          break;
 545
 546       if (!param->as_constant() &&
 547           _mesa_set_search(lowerable_rvalues, param) == NULL)
 548          return GLSL_PRECISION_HIGH;
 549
 550       --check_parameters;
 551    }
 552
 553    return GLSL_PRECISION_MEDIUM;
 554 }
 555
 556 ir_visitor_status
 557 find_lowerable_rvalues_visitor::visit_leave(ir_call *ir)
 558 {
 559    ir_hierarchical_visitor::visit_leave(ir);
 560
 561    /* Special case for handling temporary variables generated by the compiler
 562     * for function calls. If we assign to one of these using a function call
 563     * that has a lowerable return type then we can assume the temporary
 564     * variable should have a medium precision too.
 565     */
 566
 567    /* Do nothing if the return type is void. */
 568    if (!ir->return_deref)
 569       return visit_continue;
 570
 571    ir_variable *var = ir->return_deref->variable_referenced();
 572
 573    assert(var->data.mode == ir_var_temporary);
 574
 575    unsigned return_precision = handle_call(ir, lowerable_rvalues);
 576
 577    can_lower_state lower_state =
 578       handle_precision(var->type, return_precision);
 579
 580    if (lower_state == SHOULD_LOWER) {
 581       /* There probably shouldn’t be any situations where multiple ir_call
 582        * instructions write to the same temporary?
 583        */
 584       assert(var->data.precision == GLSL_PRECISION_NONE);
 585       var->data.precision = GLSL_PRECISION_MEDIUM;
 586    } else {
 587       var->data.precision = GLSL_PRECISION_HIGH;
 588    }
 589
 590    return visit_continue;
 591 }
 592
 593 ir_visitor_status
 594 find_lowerable_rvalues_visitor::visit_leave(ir_assignment *ir)
 595 {
 596    ir_hierarchical_visitor::visit_leave(ir);
 597
 598    /* Special case for handling temporary variables generated by the compiler.
 599     * If we assign to one of these using a lowered precision then we can assume
 600     * the temporary variable should have a medium precision too.
 601     */
 602    ir_variable *var = ir->lhs->variable_referenced();
 603
 604    if (var->data.mode == ir_var_temporary) {
 605       if (_mesa_set_search(lowerable_rvalues, ir->rhs)) {
 606          /* Only override the precision if this is the first assignment. For
 607           * temporaries such as the ones generated for the ?: operator there
 608           * can be multiple assignments with different precisions. This way we
 609           * get the highest precision of all of the assignments.
 610           */
 611          if (var->data.precision == GLSL_PRECISION_NONE)
 612             var->data.precision = GLSL_PRECISION_MEDIUM;
 613       } else if (!ir->rhs->as_constant()) {
 614          var->data.precision = GLSL_PRECISION_HIGH;
 615       }
 616    }
 617
 618    return visit_continue;
 619 }
 620
 621 void
 622 find_lowerable_rvalues(const struct gl_shader_compiler_options *options,
 623                        exec_list *instructions,
 624                        struct set *result)
 625 {
 626    find_lowerable_rvalues_visitor v(result, options);
 627
 628    visit_list_elements(&v, instructions);
 629
 630    assert(v.stack.empty());
 631 }
 632
 633 static const glsl_type *
 634 convert_type(bool up, const glsl_type *type)
 635 {
 636    if (type->is_array()) {
 637       return glsl_type::get_array_instance(convert_type(up, type->fields.array),
 638                                            type->array_size(),
 639                                            type->explicit_stride);
 640    }
 641
 642    glsl_base_type new_base_type;
 643
 644    if (up) {
 645       switch (type->base_type) {
 646       case GLSL_TYPE_FLOAT16:
 647          new_base_type = GLSL_TYPE_FLOAT;
 648          break;
 649       case GLSL_TYPE_INT16:
 650          new_base_type = GLSL_TYPE_INT;
 651          break;
 652       case GLSL_TYPE_UINT16:
 653          new_base_type = GLSL_TYPE_UINT;
 654          break;
 655       default:
 656          unreachable("invalid type");
 657          return NULL;
 658       }
 659    } else {
 660       switch (type->base_type) {
 661       case GLSL_TYPE_FLOAT:
 662          new_base_type = GLSL_TYPE_FLOAT16;
 663          break;
 664       case GLSL_TYPE_INT:
 665          new_base_type = GLSL_TYPE_INT16;
 666          break;
 667       case GLSL_TYPE_UINT:
 668          new_base_type = GLSL_TYPE_UINT16;
 669          break;
 670       default:
 671          unreachable("invalid type");
 672          return NULL;
 673       }
 674    }
 675
 676    return glsl_type::get_instance(new_base_type,
 677                                   type->vector_elements,
 678                                   type->matrix_columns,
 679                                   type->explicit_stride,
 680                                   type->interface_row_major);
 681 }
 682
 683 static const glsl_type *
 684 lower_glsl_type(const glsl_type *type)
 685 {
 686    return convert_type(false, type);
 687 }
 688
 689 static ir_rvalue *
 690 convert_precision(bool up, ir_rvalue *ir)
 691 {
 692    unsigned op;
 693
 694    if (up) {
 695       switch (ir->type->base_type) {
 696       case GLSL_TYPE_FLOAT16:
 697          op = ir_unop_f162f;
 698          break;
 699       case GLSL_TYPE_INT16:
 700          op = ir_unop_i2i;
 701          break;
 702       case GLSL_TYPE_UINT16:
 703          op = ir_unop_u2u;
 704          break;
 705       default:
 706          unreachable("invalid type");
 707          return NULL;
 708       }
 709    } else {
 710       switch (ir->type->base_type) {
 711       case GLSL_TYPE_FLOAT:
 712          op = ir_unop_f2fmp;
 713          break;
 714       case GLSL_TYPE_INT:
 715          op = ir_unop_i2imp;
 716          break;
 717       case GLSL_TYPE_UINT:
 718          op = ir_unop_u2ump;
 719          break;
 720       default:
 721          unreachable("invalid type");
 722          return NULL;
 723       }
 724    }
 725
 726    const glsl_type *desired_type = convert_type(up, ir->type);
 727    void *mem_ctx = ralloc_parent(ir);
 728    return new(mem_ctx) ir_expression(op, desired_type, ir, NULL);
 729 }
 730
 731 void
 732 lower_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
 733 {
 734    ir_rvalue *ir = *rvalue;
 735
 736    if (ir == NULL)
 737       return;
 738
 739    if (ir->as_dereference()) {
 740       if (!ir->type->is_boolean())
 741          *rvalue = convert_precision(false, ir);
 742    } else if (ir->type->is_32bit()) {
 743       ir->type = lower_glsl_type(ir->type);
 744
 745       ir_constant *const_ir = ir->as_constant();
 746
 747       if (const_ir) {
 748          ir_constant_data value;
 749
 750          if (ir->type->base_type == GLSL_TYPE_FLOAT16) {
 751             for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++)
 752                value.f16[i] = _mesa_float_to_half(const_ir->value.f[i]);
 753          } else if (ir->type->base_type == GLSL_TYPE_INT16) {
 754             for (unsigned i = 0; i < ARRAY_SIZE(value.i16); i++)
 755                value.i16[i] = const_ir->value.i[i];
 756          } else if (ir->type->base_type == GLSL_TYPE_UINT16) {
 757             for (unsigned i = 0; i < ARRAY_SIZE(value.u16); i++)
 758                value.u16[i] = const_ir->value.u[i];
 759          } else {
 760             unreachable("invalid type");
 761          }
 762
 763          const_ir->value = value;
 764       }
 765    }
 766 }
 767
 768 ir_visitor_status
 769 lower_precision_visitor::visit_enter(ir_dereference_record *ir)
 770 {
 771    /* We don’t want to lower the variable */
 772    return visit_continue_with_parent;
 773 }
 774
 775 ir_visitor_status
 776 lower_precision_visitor::visit_enter(ir_dereference_array *ir)
 777 {
 778    /* We don’t want to convert the array index or the variable. If the array
 779     * index itself is lowerable that will be handled separately.
 780     */
 781    return visit_continue_with_parent;
 782 }
 783
 784 ir_visitor_status
 785 lower_precision_visitor::visit_enter(ir_call *ir)
 786 {
 787    /* We don’t want to convert the arguments. These will be handled separately.
 788     */
 789    return visit_continue_with_parent;
 790 }
 791
 792 ir_visitor_status
 793 lower_precision_visitor::visit_enter(ir_texture *ir)
 794 {
 795    /* We don’t want to convert the arguments. These will be handled separately.
 796     */
 797    return visit_continue_with_parent;
 798 }
 799
 800 ir_visitor_status
 801 lower_precision_visitor::visit_leave(ir_expression *ir)
 802 {
 803    ir_rvalue_visitor::visit_leave(ir);
 804
 805    /* If the expression is a conversion operation to or from bool then fix the
 806     * operation.
 807     */
 808    switch (ir->operation) {
 809    case ir_unop_b2f:
 810       ir->operation = ir_unop_b2f16;
 811       break;
 812    case ir_unop_f2b:
 813       ir->operation = ir_unop_f162b;
 814       break;
 815    case ir_unop_b2i:
 816    case ir_unop_i2b:
 817       /* Nothing to do - they both support int16. */
 818       break;
 819    default:
 820       break;
 821    }
 822
 823    return visit_continue;
 824 }
 825
 826 void
 827 find_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
 828 {
 829    /* Checking the precision of rvalue can be lowered first throughout
 830     * find_lowerable_rvalues_visitor.
 831     * Once it found the precision of rvalue can be lowered, then we can
 832     * add conversion f2fmp, etc. through lower_precision_visitor.
 833     */
 834    if (*rvalue == NULL)
 835       return;
 836
 837    struct set_entry *entry = _mesa_set_search(lowerable_rvalues, *rvalue);
 838
 839    if (!entry)
 840       return;
 841
 842    _mesa_set_remove(lowerable_rvalues, entry);
 843
 844    /* If the entire expression is just a variable dereference then trying to
 845     * lower it will just directly add pointless to and from conversions without
 846     * any actual operation in-between. Although these will eventually get
 847     * optimised out, avoiding generating them here also avoids breaking inout
 848     * parameters to functions.
 849     */
 850    if ((*rvalue)->as_dereference())
 851       return;
 852
 853    lower_precision_visitor v;
 854
 855    (*rvalue)->accept(&v);
 856    v.handle_rvalue(rvalue);
 857
 858    /* We don’t need to add the final conversion if the final type has been
 859     * converted to bool
 860     */
 861    if ((*rvalue)->type->base_type != GLSL_TYPE_BOOL) {
 862       *rvalue = convert_precision(true, *rvalue);
 863    }
 864 }
 865
 866 ir_visitor_status
 867 find_precision_visitor::visit_enter(ir_call *ir)
 868 {
 869    ir_rvalue_enter_visitor::visit_enter(ir);
 870
 871    ir_variable *return_var =
 872       ir->return_deref ? ir->return_deref->variable_referenced() : NULL;
 873
 874    /* Don't do anything for image_load here. We have only changed the return
 875     * value to mediump/lowp, so that following instructions can use reduced
 876     * precision.
 877     *
 878     * The return value type of the intrinsic itself isn't changed here, but
 879     * can be changed in NIR if all users use the *2*mp opcode.
 880     */
 881    if (ir->callee->intrinsic_id == ir_intrinsic_image_load)
 882       return visit_continue;
 883
 884    /* If this is a call to a builtin and the find_lowerable_rvalues_visitor
 885     * overrode the precision of the temporary return variable, then we can
 886     * replace the builtin implementation with a lowered version.
 887     */
 888
 889    if (!ir->callee->is_builtin() ||
 890        ir->callee->is_intrinsic() ||
 891        return_var == NULL ||
 892        (return_var->data.precision != GLSL_PRECISION_MEDIUM &&
 893         return_var->data.precision != GLSL_PRECISION_LOW))
 894       return visit_continue;
 895
 896    ir->callee = map_builtin(ir->callee);
 897    ir->generate_inline(ir);
 898    ir->remove();
 899
 900    return visit_continue_with_parent;
 901 }
 902
 903 ir_function_signature *
 904 find_precision_visitor::map_builtin(ir_function_signature *sig)
 905 {
 906    if (lowered_builtins == NULL) {
 907       lowered_builtins = _mesa_pointer_hash_table_create(NULL);
 908       clone_ht =_mesa_pointer_hash_table_create(NULL);
 909       lowered_builtin_mem_ctx = ralloc_context(NULL);
 910    } else {
 911       struct hash_entry *entry = _mesa_hash_table_search(lowered_builtins, sig);
 912       if (entry)
 913          return (ir_function_signature *) entry->data;
 914    }
 915
 916    ir_function_signature *lowered_sig =
 917       sig->clone(lowered_builtin_mem_ctx, clone_ht);
 918
 919    /* Functions that always return mediump or lowp should keep their
 920     * parameters intact, because they can be highp. NIR can lower
 921     * the up-conversion for parameters if needed.
 922     */
 923    if (!function_always_returns_mediump_or_lowp(sig->function_name())) {
 924       foreach_in_list(ir_variable, param, &lowered_sig->parameters) {
 925          param->data.precision = GLSL_PRECISION_MEDIUM;
 926       }
 927    }
 928
 929    lower_precision(options, &lowered_sig->body);
 930
 931    _mesa_hash_table_clear(clone_ht, NULL);
 932
 933    _mesa_hash_table_insert(lowered_builtins, sig, lowered_sig);
 934
 935    return lowered_sig;
 936 }
 937
 938 find_precision_visitor::find_precision_visitor(const struct gl_shader_compiler_options *options)
 939    : lowerable_rvalues(_mesa_pointer_set_create(NULL)),
 940      lowered_builtins(NULL),
 941      clone_ht(NULL),
 942      lowered_builtin_mem_ctx(NULL),
 943      options(options)
 944 {
 945 }
 946
 947 find_precision_visitor::~find_precision_visitor()
 948 {
 949    _mesa_set_destroy(lowerable_rvalues, NULL);
 950
 951    if (lowered_builtins) {
 952       _mesa_hash_table_destroy(lowered_builtins, NULL);
 953       _mesa_hash_table_destroy(clone_ht, NULL);
 954       ralloc_free(lowered_builtin_mem_ctx);
 955    }
 956 }
 957
 958 /* Lowering opcodes to 16 bits is not enough for programs with control flow
 959  * (and the ?: operator, which is represented by if-then-else in the IR),
 960  * because temporary variables, which are used for passing values between
 961  * code blocks, are not lowered, resulting in 32-bit phis in NIR.
 962  *
 963  * First change the variable types to 16 bits, then change all ir_dereference
 964  * types to 16 bits.
 965  */
 966 class lower_variables_visitor : public ir_rvalue_enter_visitor {
 967 public:
 968    lower_variables_visitor(const struct gl_shader_compiler_options *options)
 969       : options(options) {
 970       lower_vars = _mesa_pointer_set_create(NULL);
 971    }
 972
 973    virtual ~lower_variables_visitor()
 974    {
 975       _mesa_set_destroy(lower_vars, NULL);
 976    }
 977
 978    virtual ir_visitor_status visit(ir_variable *var);
 979    virtual ir_visitor_status visit_enter(ir_assignment *ir);
 980    virtual ir_visitor_status visit_enter(ir_return *ir);
 981    virtual ir_visitor_status visit_enter(ir_call *ir);
 982    virtual void handle_rvalue(ir_rvalue **rvalue);
 983
 984    void fix_types_in_deref_chain(ir_dereference *ir);
 985    void convert_split_assignment(ir_dereference *lhs, ir_rvalue *rhs,
 986                                  bool insert_before);
 987
 988    const struct gl_shader_compiler_options *options;
 989    set *lower_vars;
 990 };
 991
 992 static void
 993 lower_constant(ir_constant *ir)
 994 {
 995    if (ir->type->is_array()) {
 996       for (int i = 0; i < ir->type->array_size(); i++)
 997          lower_constant(ir->get_array_element(i));
 998
 999       ir->type = lower_glsl_type(ir->type);
1000       return;
1001    }
1002
1003    ir->type = lower_glsl_type(ir->type);
1004    ir_constant_data value;
1005
1006    if (ir->type->base_type == GLSL_TYPE_FLOAT16) {
1007       for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++)
1008          value.f16[i] = _mesa_float_to_half(ir->value.f[i]);
1009    } else if (ir->type->base_type == GLSL_TYPE_INT16) {
1010       for (unsigned i = 0; i < ARRAY_SIZE(value.i16); i++)
1011          value.i16[i] = ir->value.i[i];
1012    } else if (ir->type->base_type == GLSL_TYPE_UINT16) {
1013       for (unsigned i = 0; i < ARRAY_SIZE(value.u16); i++)
1014          value.u16[i] = ir->value.u[i];
1015    } else {
1016       unreachable("invalid type");
1017    }
1018
1019    ir->value = value;
1020 }
1021
1022 ir_visitor_status
1023 lower_variables_visitor::visit(ir_variable *var)
1024 {
1025    if ((var->data.mode != ir_var_temporary &&
1026         var->data.mode != ir_var_auto) ||
1027        !var->type->without_array()->is_32bit() ||
1028        (var->data.precision != GLSL_PRECISION_MEDIUM &&
1029         var->data.precision != GLSL_PRECISION_LOW) ||
1030        !can_lower_type(options, var->type))
1031       return visit_continue;
1032
1033    /* Lower constant initializers. */
1034    if (var->constant_value &&
1035        var->type == var->constant_value->type) {
1036       if (!options->LowerPrecisionConstants)
1037          return visit_continue;
1038       var->constant_value =
1039          var->constant_value->clone(ralloc_parent(var), NULL);
1040       lower_constant(var->constant_value);
1041    }
1042
1043    if (var->constant_initializer &&
1044        var->type == var->constant_initializer->type) {
1045       if (!options->LowerPrecisionConstants)
1046          return visit_continue;
1047       var->constant_initializer =
1048          var->constant_initializer->clone(ralloc_parent(var), NULL);
1049       lower_constant(var->constant_initializer);
1050    }
1051
1052    var->type = lower_glsl_type(var->type);
1053    _mesa_set_add(lower_vars, var);
1054
1055    return visit_continue;
1056 }
1057
1058 void
1059 lower_variables_visitor::fix_types_in_deref_chain(ir_dereference *ir)
1060 {
1061    assert(ir->type->without_array()->is_32bit());
1062    assert(_mesa_set_search(lower_vars, ir->variable_referenced()));
1063
1064    /* Fix the type in the dereference node. */
1065    ir->type = lower_glsl_type(ir->type);
1066
1067    /* If it's an array, fix the types in the whole dereference chain. */
1068    for (ir_dereference_array *deref_array = ir->as_dereference_array();
1069         deref_array;
1070         deref_array = deref_array->array->as_dereference_array()) {
1071       assert(deref_array->array->type->without_array()->is_32bit());
1072       deref_array->array->type = lower_glsl_type(deref_array->array->type);
1073    }
1074 }
1075
1076 void
1077 lower_variables_visitor::convert_split_assignment(ir_dereference *lhs,
1078                                                   ir_rvalue *rhs,
1079                                                   bool insert_before)
1080 {
1081    void *mem_ctx = ralloc_parent(lhs);
1082
1083    if (lhs->type->is_array()) {
1084       for (unsigned i = 0; i < lhs->type->length; i++) {
1085          ir_dereference *l, *r;
1086
1087          l = new(mem_ctx) ir_dereference_array(lhs->clone(mem_ctx, NULL),
1088                                                new(mem_ctx) ir_constant(i));
1089          r = new(mem_ctx) ir_dereference_array(rhs->clone(mem_ctx, NULL),
1090                                                new(mem_ctx) ir_constant(i));
1091          convert_split_assignment(l, r, insert_before);
1092       }
1093       return;
1094    }
1095
1096    assert(lhs->type->is_16bit() || lhs->type->is_32bit());
1097    assert(rhs->type->is_16bit() || rhs->type->is_32bit());
1098    assert(lhs->type->is_16bit() != rhs->type->is_16bit());
1099
1100    ir_assignment *assign =
1101       new(mem_ctx) ir_assignment(lhs, convert_precision(lhs->type->is_32bit(), rhs));
1102
1103    if (insert_before)
1104       base_ir->insert_before(assign);
1105    else
1106       base_ir->insert_after(assign);
1107 }
1108
1109 ir_visitor_status
1110 lower_variables_visitor::visit_enter(ir_assignment *ir)
1111 {
1112    ir_dereference *lhs = ir->lhs;
1113    ir_variable *var = lhs->variable_referenced();
1114    ir_dereference *rhs_deref = ir->rhs->as_dereference();
1115    ir_variable *rhs_var = rhs_deref ? rhs_deref->variable_referenced() : NULL;
1116    ir_constant *rhs_const = ir->rhs->as_constant();
1117
1118    /* Legalize array assignments between lowered and non-lowered variables. */
1119    if (lhs->type->is_array() &&
1120        (rhs_var || rhs_const) &&
1121        (!rhs_var ||
1122         (var &&
1123          var->type->without_array()->is_16bit() !=
1124          rhs_var->type->without_array()->is_16bit())) &&
1125        (!rhs_const ||
1126         (var &&
1127          var->type->without_array()->is_16bit() &&
1128          rhs_const->type->without_array()->is_32bit()))) {
1129       assert(ir->rhs->type->is_array());
1130
1131       /* Fix array assignments from lowered to non-lowered. */
1132       if (rhs_var && _mesa_set_search(lower_vars, rhs_var)) {
1133          fix_types_in_deref_chain(rhs_deref);
1134          /* Convert to 32 bits for LHS. */
1135          convert_split_assignment(lhs, rhs_deref, true);
1136          ir->remove();
1137          return visit_continue;
1138       }
1139
1140       /* Fix array assignments from non-lowered to lowered. */
1141       if (var &&
1142           _mesa_set_search(lower_vars, var) &&
1143           ir->rhs->type->without_array()->is_32bit()) {
1144          fix_types_in_deref_chain(lhs);
1145          /* Convert to 16 bits for LHS. */
1146          convert_split_assignment(lhs, ir->rhs, true);
1147          ir->remove();
1148          return visit_continue;
1149       }
1150    }
1151
1152    /* Fix assignment types. */
1153    if (var &&
1154        _mesa_set_search(lower_vars, var)) {
1155       /* Fix the LHS type. */
1156       if (lhs->type->without_array()->is_32bit())
1157          fix_types_in_deref_chain(lhs);
1158
1159       /* Fix the RHS type if it's a lowered variable. */
1160       if (rhs_var &&
1161           _mesa_set_search(lower_vars, rhs_var) &&
1162           rhs_deref->type->without_array()->is_32bit())
1163          fix_types_in_deref_chain(rhs_deref);
1164
1165       /* Fix the RHS type if it's a non-array expression. */
1166       if (ir->rhs->type->is_32bit()) {
1167          ir_expression *expr = ir->rhs->as_expression();
1168
1169          /* Convert the RHS to the LHS type. */
1170          if (expr &&
1171              (expr->operation == ir_unop_f162f ||
1172               expr->operation == ir_unop_i2i ||
1173               expr->operation == ir_unop_u2u) &&
1174              expr->operands[0]->type->is_16bit()) {
1175             /* If there is an "up" conversion, just remove it.
1176              * This is optional. We could as well execute the else statement and
1177              * let NIR eliminate the up+down conversions.
1178              */
1179             ir->rhs = expr->operands[0];
1180          } else {
1181             /* Add a "down" conversion operation to fix the type of RHS. */
1182             ir->rhs = convert_precision(false, ir->rhs);
1183          }
1184       }
1185    }
1186
1187    return ir_rvalue_enter_visitor::visit_enter(ir);
1188 }
1189
1190 ir_visitor_status
1191 lower_variables_visitor::visit_enter(ir_return *ir)
1192 {
1193    void *mem_ctx = ralloc_parent(ir);
1194
1195    ir_dereference *deref = ir->value ? ir->value->as_dereference() : NULL;
1196    if (deref) {
1197       ir_variable *var = deref->variable_referenced();
1198
1199       /* Fix the type of the return value. */
1200       if (var &&
1201           _mesa_set_search(lower_vars, var) &&
1202           deref->type->without_array()->is_32bit()) {
1203          /* Create a 32-bit temporary variable. */
1204          ir_variable *new_var =
1205             new(mem_ctx) ir_variable(deref->type, "lowerp", ir_var_temporary);
1206          base_ir->insert_before(new_var);
1207
1208          /* Fix types in dereferences. */
1209          fix_types_in_deref_chain(deref);
1210
1211          /* Convert to 32 bits for the return value. */
1212          convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var),
1213                                   deref, true);
1214          ir->value = new(mem_ctx) ir_dereference_variable(new_var);
1215       }
1216    }
1217
1218    return ir_rvalue_enter_visitor::visit_enter(ir);
1219 }
1220
1221 void lower_variables_visitor::handle_rvalue(ir_rvalue **rvalue)
1222 {
1223    ir_rvalue *ir = *rvalue;
1224
1225    if (in_assignee || ir == NULL)
1226       return;
1227
1228    ir_expression *expr = ir->as_expression();
1229    ir_dereference *expr_op0_deref = expr ? expr->operands[0]->as_dereference() : NULL;
1230
1231    /* Remove f2fmp(float16). Same for int16 and uint16. */
1232    if (expr &&
1233        expr_op0_deref &&
1234        (expr->operation == ir_unop_f2fmp ||
1235         expr->operation == ir_unop_i2imp ||
1236         expr->operation == ir_unop_u2ump ||
1237         expr->operation == ir_unop_f2f16 ||
1238         expr->operation == ir_unop_i2i ||
1239         expr->operation == ir_unop_u2u) &&
1240        expr->type->without_array()->is_16bit() &&
1241        expr_op0_deref->type->without_array()->is_32bit() &&
1242        expr_op0_deref->variable_referenced() &&
1243        _mesa_set_search(lower_vars, expr_op0_deref->variable_referenced())) {
1244       fix_types_in_deref_chain(expr_op0_deref);
1245
1246       /* Remove f2fmp/i2imp/u2ump. */
1247       *rvalue = expr_op0_deref;
1248       return;
1249    }
1250
1251    ir_dereference *deref = ir->as_dereference();
1252
1253    if (deref) {
1254       ir_variable *var = deref->variable_referenced();
1255
1256       /* var can be NULL if we are dereferencing ir_constant. */
1257       if (var &&
1258           _mesa_set_search(lower_vars, var) &&
1259           deref->type->without_array()->is_32bit()) {
1260          void *mem_ctx = ralloc_parent(ir);
1261
1262          /* Create a 32-bit temporary variable. */
1263          ir_variable *new_var =
1264             new(mem_ctx) ir_variable(deref->type, "lowerp", ir_var_temporary);
1265          base_ir->insert_before(new_var);
1266
1267          /* Fix types in dereferences. */
1268          fix_types_in_deref_chain(deref);
1269
1270          /* Convert to 32 bits for the rvalue. */
1271          convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var),
1272                                   deref, true);
1273          *rvalue = new(mem_ctx) ir_dereference_variable(new_var);
1274       }
1275    }
1276 }
1277
1278 ir_visitor_status
1279 lower_variables_visitor::visit_enter(ir_call *ir)
1280 {
1281    void *mem_ctx = ralloc_parent(ir);
1282
1283    /* We can't pass 16-bit variables as 32-bit inout/out parameters. */
1284    foreach_two_lists(formal_node, &ir->callee->parameters,
1285                      actual_node, &ir->actual_parameters) {
1286       ir_dereference *param_deref =
1287          ((ir_rvalue *)actual_node)->as_dereference();
1288       ir_variable *param = (ir_variable *)formal_node;
1289
1290       if (!param_deref)
1291             continue;
1292
1293       ir_variable *var = param_deref->variable_referenced();
1294
1295       /* var can be NULL if we are dereferencing ir_constant. */
1296       if (var &&
1297           _mesa_set_search(lower_vars, var) &&
1298           param->type->without_array()->is_32bit()) {
1299          fix_types_in_deref_chain(param_deref);
1300
1301          /* Create a 32-bit temporary variable for the parameter. */
1302          ir_variable *new_var =
1303             new(mem_ctx) ir_variable(param->type, "lowerp", ir_var_temporary);
1304          base_ir->insert_before(new_var);
1305
1306          /* Replace the parameter. */
1307          actual_node->replace_with(new(mem_ctx) ir_dereference_variable(new_var));
1308
1309          if (param->data.mode == ir_var_function_in ||
1310              param->data.mode == ir_var_function_inout) {
1311             /* Convert to 32 bits for passing in. */
1312             convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var),
1313                                      param_deref->clone(mem_ctx, NULL), true);
1314          }
1315          if (param->data.mode == ir_var_function_out ||
1316              param->data.mode == ir_var_function_inout) {
1317             /* Convert to 16 bits after returning. */
1318             convert_split_assignment(param_deref,
1319                                      new(mem_ctx) ir_dereference_variable(new_var),
1320                                      false);
1321          }
1322       }
1323    }
1324
1325    /* Fix the type of return value dereferencies. */
1326    ir_dereference_variable *ret_deref = ir->return_deref;
1327    ir_variable *ret_var = ret_deref ? ret_deref->variable_referenced() : NULL;
1328
1329    if (ret_var &&
1330        _mesa_set_search(lower_vars, ret_var) &&
1331        ret_deref->type->without_array()->is_32bit()) {
1332       /* Create a 32-bit temporary variable. */
1333       ir_variable *new_var =
1334          new(mem_ctx) ir_variable(ir->callee->return_type, "lowerp",
1335                                   ir_var_temporary);
1336       base_ir->insert_before(new_var);
1337
1338       /* Replace the return variable. */
1339       ret_deref->var = new_var;
1340
1341       /* Convert to 16 bits after returning. */
1342       convert_split_assignment(new(mem_ctx) ir_dereference_variable(ret_var),
1343                                new(mem_ctx) ir_dereference_variable(new_var),
1344                                false);
1345    }
1346
1347    return ir_rvalue_enter_visitor::visit_enter(ir);
1348 }
1349
1350 }
1351
1352 void
1353 lower_precision(const struct gl_shader_compiler_options *options,
1354                 exec_list *instructions)
1355 {
1356    find_precision_visitor v(options);
1357    find_lowerable_rvalues(options, instructions, v.lowerable_rvalues);
1358    visit_list_elements(&v, instructions);
1359
1360    lower_variables_visitor vars(options);
1361    visit_list_elements(&vars, instructions);
1362 }