src/compiler/glsl/lower_precision.cpp

   1 /*
   2  * Copyright © 2019 Google, Inc
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 /**
  25  * \file lower_precision.cpp
  26  */
  27
  28 #include "main/macros.h"
  29 #include "main/mtypes.h"
  30 #include "compiler/glsl_types.h"
  31 #include "ir.h"
  32 #include "ir_builder.h"
  33 #include "ir_optimization.h"
  34 #include "ir_rvalue_visitor.h"
  35 #include "util/half_float.h"
  36 #include "util/set.h"
  37 #include "util/hash_table.h"
  38 #include <vector>
  39
  40 namespace {
  41
  42 class find_precision_visitor : public ir_rvalue_enter_visitor {
  43 public:
  44    find_precision_visitor(const struct gl_shader_compiler_options *options);
  45    ~find_precision_visitor();
  46
  47    virtual void handle_rvalue(ir_rvalue **rvalue);
  48    virtual ir_visitor_status visit_enter(ir_call *ir);
  49
  50    ir_function_signature *map_builtin(ir_function_signature *sig);
  51
  52    /* Set of rvalues that can be lowered. This will be filled in by
  53     * find_lowerable_rvalues_visitor. Only the root node of a lowerable section
  54     * will be added to this set.
  55     */
  56    struct set *lowerable_rvalues;
  57
  58    /**
  59     * A mapping of builtin signature functions to lowered versions. This is
  60     * filled in lazily when a lowered version is needed.
  61     */
  62    struct hash_table *lowered_builtins;
  63    /**
  64     * A temporary hash table only used in order to clone functions.
  65     */
  66    struct hash_table *clone_ht;
  67
  68    void *lowered_builtin_mem_ctx;
  69
  70    const struct gl_shader_compiler_options *options;
  71 };
  72
  73 class find_lowerable_rvalues_visitor : public ir_hierarchical_visitor {
  74 public:
  75    enum can_lower_state {
  76       UNKNOWN,
  77       CANT_LOWER,
  78       SHOULD_LOWER,
  79    };
  80
  81    enum parent_relation {
  82       /* The parent performs a further operation involving the result from the
  83        * child and can be lowered along with it.
  84        */
  85       COMBINED_OPERATION,
  86       /* The parent instruction’s operation is independent of the child type so
  87        * the child should be lowered separately.
  88        */
  89       INDEPENDENT_OPERATION,
  90    };
  91
  92    struct stack_entry {
  93       ir_instruction *instr;
  94       enum can_lower_state state;
  95       /* List of child rvalues that can be lowered. When this stack entry is
  96        * popped, if this node itself can’t be lowered than all of the children
  97        * are root nodes to lower so we will add them to lowerable_rvalues.
  98        * Otherwise if this node can also be lowered then we won’t add the
  99        * children because we only want to add the topmost lowerable nodes to
 100        * lowerable_rvalues and the children will be lowered as part of lowering
 101        * this node.
 102        */
 103       std::vector<ir_instruction *> lowerable_children;
 104    };
 105
 106    find_lowerable_rvalues_visitor(struct set *result,
 107                                   const struct gl_shader_compiler_options *options);
 108
 109    static void stack_enter(class ir_instruction *ir, void *data);
 110    static void stack_leave(class ir_instruction *ir, void *data);
 111
 112    virtual ir_visitor_status visit(ir_constant *ir);
 113    virtual ir_visitor_status visit(ir_dereference_variable *ir);
 114
 115    virtual ir_visitor_status visit_enter(ir_dereference_record *ir);
 116    virtual ir_visitor_status visit_enter(ir_dereference_array *ir);
 117    virtual ir_visitor_status visit_enter(ir_texture *ir);
 118    virtual ir_visitor_status visit_enter(ir_expression *ir);
 119
 120    virtual ir_visitor_status visit_leave(ir_assignment *ir);
 121    virtual ir_visitor_status visit_leave(ir_call *ir);
 122
 123    can_lower_state handle_precision(const glsl_type *type,
 124                                     int precision) const;
 125
 126    static parent_relation get_parent_relation(ir_instruction *parent,
 127                                               ir_instruction *child);
 128
 129    std::vector<stack_entry> stack;
 130    struct set *lowerable_rvalues;
 131    const struct gl_shader_compiler_options *options;
 132
 133    void pop_stack_entry();
 134    void add_lowerable_children(const stack_entry &entry);
 135 };
 136
 137 class lower_precision_visitor : public ir_rvalue_visitor {
 138 public:
 139    virtual void handle_rvalue(ir_rvalue **rvalue);
 140    virtual ir_visitor_status visit_enter(ir_dereference_array *);
 141    virtual ir_visitor_status visit_enter(ir_dereference_record *);
 142    virtual ir_visitor_status visit_enter(ir_call *ir);
 143    virtual ir_visitor_status visit_enter(ir_texture *ir);
 144    virtual ir_visitor_status visit_leave(ir_expression *);
 145 };
 146
 147 static bool
 148 can_lower_type(const struct gl_shader_compiler_options *options,
 149                const glsl_type *type)
 150 {
 151    /* Don’t lower any expressions involving non-float types except bool and
 152     * texture samplers. This will rule out operations that change the type such
 153     * as conversion to ints. Instead it will end up lowering the arguments
 154     * instead and adding a final conversion to float32. We want to handle
 155     * boolean types so that it will do comparisons as 16-bit.
 156     */
 157
 158    switch (type->base_type) {
 159    /* TODO: should we do anything for these two with regard to Int16 vs FP16
 160     * support?
 161     */
 162    case GLSL_TYPE_BOOL:
 163    case GLSL_TYPE_SAMPLER:
 164    case GLSL_TYPE_IMAGE:
 165       return true;
 166
 167    case GLSL_TYPE_FLOAT:
 168       return options->LowerPrecisionFloat16;
 169
 170    case GLSL_TYPE_UINT:
 171    case GLSL_TYPE_INT:
 172       return options->LowerPrecisionInt16;
 173
 174    default:
 175       return false;
 176    }
 177 }
 178
 179 find_lowerable_rvalues_visitor::find_lowerable_rvalues_visitor(struct set *res,
 180                                  const struct gl_shader_compiler_options *opts)
 181 {
 182    lowerable_rvalues = res;
 183    options = opts;
 184    callback_enter = stack_enter;
 185    callback_leave = stack_leave;
 186    data_enter = this;
 187    data_leave = this;
 188 }
 189
 190 void
 191 find_lowerable_rvalues_visitor::stack_enter(class ir_instruction *ir,
 192                                             void *data)
 193 {
 194    find_lowerable_rvalues_visitor *state =
 195       (find_lowerable_rvalues_visitor *) data;
 196
 197    /* Add a new stack entry for this instruction */
 198    stack_entry entry;
 199
 200    entry.instr = ir;
 201    entry.state = state->in_assignee ? CANT_LOWER : UNKNOWN;
 202
 203    state->stack.push_back(entry);
 204 }
 205
 206 void
 207 find_lowerable_rvalues_visitor::add_lowerable_children(const stack_entry &entry)
 208 {
 209    /* We can’t lower this node so if there were any pending children then they
 210     * are all root lowerable nodes and we should add them to the set.
 211     */
 212    for (auto &it : entry.lowerable_children)
 213       _mesa_set_add(lowerable_rvalues, it);
 214 }
 215
 216 void
 217 find_lowerable_rvalues_visitor::pop_stack_entry()
 218 {
 219    const stack_entry &entry = stack.back();
 220
 221    if (stack.size() >= 2) {
 222       /* Combine this state into the parent state, unless the parent operation
 223        * doesn’t have any relation to the child operations
 224        */
 225       stack_entry &parent = stack.end()[-2];
 226       parent_relation rel = get_parent_relation(parent.instr, entry.instr);
 227
 228       if (rel == COMBINED_OPERATION) {
 229          switch (entry.state) {
 230          case CANT_LOWER:
 231             parent.state = CANT_LOWER;
 232             break;
 233          case SHOULD_LOWER:
 234             if (parent.state == UNKNOWN)
 235                parent.state = SHOULD_LOWER;
 236             break;
 237          case UNKNOWN:
 238             break;
 239          }
 240       }
 241    }
 242
 243    if (entry.state == SHOULD_LOWER) {
 244       ir_rvalue *rv = entry.instr->as_rvalue();
 245
 246       if (rv == NULL) {
 247          add_lowerable_children(entry);
 248       } else if (stack.size() >= 2) {
 249          stack_entry &parent = stack.end()[-2];
 250
 251          switch (get_parent_relation(parent.instr, rv)) {
 252          case COMBINED_OPERATION:
 253             /* We only want to add the toplevel lowerable instructions to the
 254              * lowerable set. Therefore if there is a parent then instead of
 255              * adding this instruction to the set we will queue depending on
 256              * the result of the parent instruction.
 257              */
 258             parent.lowerable_children.push_back(entry.instr);
 259             break;
 260          case INDEPENDENT_OPERATION:
 261             _mesa_set_add(lowerable_rvalues, rv);
 262             break;
 263          }
 264       } else {
 265          /* This is a toplevel node so add it directly to the lowerable
 266           * set.
 267           */
 268          _mesa_set_add(lowerable_rvalues, rv);
 269       }
 270    } else if (entry.state == CANT_LOWER) {
 271       add_lowerable_children(entry);
 272    }
 273
 274    stack.pop_back();
 275 }
 276
 277 void
 278 find_lowerable_rvalues_visitor::stack_leave(class ir_instruction *ir,
 279                                             void *data)
 280 {
 281    find_lowerable_rvalues_visitor *state =
 282       (find_lowerable_rvalues_visitor *) data;
 283
 284    state->pop_stack_entry();
 285 }
 286
 287 enum find_lowerable_rvalues_visitor::can_lower_state
 288 find_lowerable_rvalues_visitor::handle_precision(const glsl_type *type,
 289                                                  int precision) const
 290 {
 291    if (!can_lower_type(options, type))
 292       return CANT_LOWER;
 293
 294    switch (precision) {
 295    case GLSL_PRECISION_NONE:
 296       return UNKNOWN;
 297    case GLSL_PRECISION_HIGH:
 298       return CANT_LOWER;
 299    case GLSL_PRECISION_MEDIUM:
 300    case GLSL_PRECISION_LOW:
 301       return SHOULD_LOWER;
 302    }
 303
 304    return CANT_LOWER;
 305 }
 306
 307 enum find_lowerable_rvalues_visitor::parent_relation
 308 find_lowerable_rvalues_visitor::get_parent_relation(ir_instruction *parent,
 309                                                     ir_instruction *child)
 310 {
 311    /* If the parent is a dereference instruction then the only child could be
 312     * for example an array dereference and that should be lowered independently
 313     * of the parent.
 314     */
 315    if (parent->as_dereference())
 316       return INDEPENDENT_OPERATION;
 317
 318    /* The precision of texture sampling depend on the precision of the sampler.
 319     * The rest of the arguments don’t matter so we can treat it as an
 320     * independent operation.
 321     */
 322    if (parent->as_texture())
 323       return INDEPENDENT_OPERATION;
 324
 325    return COMBINED_OPERATION;
 326 }
 327
 328 ir_visitor_status
 329 find_lowerable_rvalues_visitor::visit(ir_constant *ir)
 330 {
 331    stack_enter(ir, this);
 332
 333    if (!can_lower_type(options, ir->type))
 334       stack.back().state = CANT_LOWER;
 335
 336    stack_leave(ir, this);
 337
 338    return visit_continue;
 339 }
 340
 341 ir_visitor_status
 342 find_lowerable_rvalues_visitor::visit(ir_dereference_variable *ir)
 343 {
 344    stack_enter(ir, this);
 345
 346    if (stack.back().state == UNKNOWN)
 347       stack.back().state = handle_precision(ir->type, ir->precision());
 348
 349    stack_leave(ir, this);
 350
 351    return visit_continue;
 352 }
 353
 354 ir_visitor_status
 355 find_lowerable_rvalues_visitor::visit_enter(ir_dereference_record *ir)
 356 {
 357    ir_hierarchical_visitor::visit_enter(ir);
 358
 359    if (stack.back().state == UNKNOWN)
 360       stack.back().state = handle_precision(ir->type, ir->precision());
 361
 362    return visit_continue;
 363 }
 364
 365 ir_visitor_status
 366 find_lowerable_rvalues_visitor::visit_enter(ir_dereference_array *ir)
 367 {
 368    ir_hierarchical_visitor::visit_enter(ir);
 369
 370    if (stack.back().state == UNKNOWN)
 371       stack.back().state = handle_precision(ir->type, ir->precision());
 372
 373    return visit_continue;
 374 }
 375
 376 ir_visitor_status
 377 find_lowerable_rvalues_visitor::visit_enter(ir_texture *ir)
 378 {
 379    ir_hierarchical_visitor::visit_enter(ir);
 380
 381    /* The precision of the sample value depends on the precision of the
 382     * sampler.
 383     */
 384    stack.back().state = handle_precision(ir->type,
 385                                          ir->sampler->precision());
 386    return visit_continue;
 387 }
 388
 389 ir_visitor_status
 390 find_lowerable_rvalues_visitor::visit_enter(ir_expression *ir)
 391 {
 392    ir_hierarchical_visitor::visit_enter(ir);
 393
 394    if (!can_lower_type(options, ir->type))
 395       stack.back().state = CANT_LOWER;
 396
 397    /* Don't lower precision for derivative calculations */
 398    if (!options->LowerPrecisionDerivatives &&
 399        (ir->operation == ir_unop_dFdx ||
 400         ir->operation == ir_unop_dFdx_coarse ||
 401         ir->operation == ir_unop_dFdx_fine ||
 402         ir->operation == ir_unop_dFdy ||
 403         ir->operation == ir_unop_dFdy_coarse ||
 404         ir->operation == ir_unop_dFdy_fine)) {
 405       stack.back().state = CANT_LOWER;
 406    }
 407
 408    return visit_continue;
 409 }
 410
 411 static bool
 412 function_always_returns_mediump_or_lowp(const char *name)
 413 {
 414    return !strcmp(name, "bitCount") ||
 415           !strcmp(name, "findLSB") ||
 416           !strcmp(name, "findMSB") ||
 417           !strcmp(name, "unpackHalf2x16") ||
 418           !strcmp(name, "unpackUnorm4x8") ||
 419           !strcmp(name, "unpackSnorm4x8");
 420 }
 421
 422 static bool
 423 is_lowerable_builtin(ir_call *ir,
 424                      const struct set *lowerable_rvalues)
 425 {
 426    /* The intrinsic call is inside the wrapper imageLoad function that will
 427     * be inlined. We have to handle both of them.
 428     */
 429    if (ir->callee->intrinsic_id == ir_intrinsic_image_load ||
 430        (ir->callee->is_builtin() &&
 431         !strcmp(ir->callee_name(), "imageLoad"))) {
 432       ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head();
 433       ir_variable *resource = param->variable_referenced();
 434
 435       assert(ir->callee->return_precision == GLSL_PRECISION_NONE);
 436       assert(resource->type->without_array()->is_image());
 437
 438       /* GLSL ES 3.20 requires that images have a precision modifier, but if
 439        * you set one, it doesn't do anything, because all intrinsics are
 440        * defined with highp. This seems to be a spec bug.
 441        *
 442        * In theory we could set the return value to mediump if the image
 443        * format has a lower precision. This appears to be the most sensible
 444        * thing to do.
 445        */
 446       const struct util_format_description *desc =
 447          util_format_description(resource->data.image_format);
 448       unsigned i =
 449          util_format_get_first_non_void_channel(resource->data.image_format);
 450
 451       if (desc->channel[i].pure_integer ||
 452           desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
 453          return desc->channel[i].size <= 16;
 454       else
 455          return desc->channel[i].size <= 10; /* unorm/snorm */
 456    }
 457
 458    /* Handle special calls. */
 459    if (ir->callee->is_builtin() && ir->actual_parameters.length()) {
 460       ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head();
 461       ir_variable *var = param->variable_referenced();
 462
 463       /* Handle builtin wrappers around ir_texture opcodes. These wrappers will
 464        * be inlined by lower_precision() if we return true here, so that we can
 465        * get to ir_texture later and do proper lowering.
 466        *
 467        * We should lower the type of the return value if the sampler type
 468        * uses lower precision. The function parameters don't matter.
 469        */
 470       if (var && var->type->without_array()->is_sampler()) {
 471          /* textureSize always returns highp. */
 472          if (!strcmp(ir->callee_name(), "textureSize"))
 473             return false;
 474
 475          return var->data.precision == GLSL_PRECISION_MEDIUM ||
 476                 var->data.precision == GLSL_PRECISION_LOW;
 477       }
 478    }
 479
 480    if (!ir->callee->is_builtin() ||
 481        /* Parameters are always highp: */
 482        !strcmp(ir->callee_name(), "floatBitsToInt") ||
 483        !strcmp(ir->callee_name(), "floatBitsToUint") ||
 484        !strcmp(ir->callee_name(), "intBitsToFloat") ||
 485        !strcmp(ir->callee_name(), "uintBitsToFloat") ||
 486        !strcmp(ir->callee_name(), "bitfieldReverse") ||
 487        !strcmp(ir->callee_name(), "frexp") ||
 488        !strcmp(ir->callee_name(), "ldexp") ||
 489        /* Parameters and outputs are always highp: */
 490        /* TODO: The operations are highp, but carry and borrow outputs are lowp. */
 491        !strcmp(ir->callee_name(), "uaddCarry") ||
 492        !strcmp(ir->callee_name(), "usubBorrow") ||
 493        !strcmp(ir->callee_name(), "imulExtended") ||
 494        !strcmp(ir->callee_name(), "umulExtended") ||
 495        !strcmp(ir->callee_name(), "unpackUnorm2x16") ||
 496        !strcmp(ir->callee_name(), "unpackSnorm2x16") ||
 497        /* Outputs are highp: */
 498        !strcmp(ir->callee_name(), "packUnorm2x16") ||
 499        !strcmp(ir->callee_name(), "packSnorm2x16") ||
 500        /* Parameters are mediump and outputs are highp. The parameters should
 501         * be optimized in NIR, not here, e.g:
 502         * - packHalf2x16 can just be a bitcast from f16vec2 to uint32
 503         * - Other opcodes don't have to convert parameters to highp if the hw
 504         *   has f16 versions. Optimize in NIR accordingly.
 505         */
 506        !strcmp(ir->callee_name(), "packHalf2x16") ||
 507        !strcmp(ir->callee_name(), "packUnorm4x8") ||
 508        !strcmp(ir->callee_name(), "packSnorm4x8"))
 509       return false;
 510
 511    assert(ir->callee->return_precision == GLSL_PRECISION_NONE);
 512
 513    /* Number of parameters to check if they are lowerable. */
 514    unsigned check_parameters = ir->actual_parameters.length();
 515
 516    /* Interpolation functions only consider the precision of the interpolant. */
 517    /* Bitfield functions ignore the precision of "offset" and "bits". */
 518    if (!strcmp(ir->callee_name(), "interpolateAtOffset") ||
 519        !strcmp(ir->callee_name(), "interpolateAtSample") ||
 520        !strcmp(ir->callee_name(), "bitfieldExtract")) {
 521       check_parameters = 1;
 522    } else if (!strcmp(ir->callee_name(), "bitfieldInsert")) {
 523       check_parameters = 2;
 524    } if (function_always_returns_mediump_or_lowp(ir->callee_name())) {
 525       /* These only lower the return value. Parameters keep their precision,
 526        * which is preserved in map_builtin.
 527        */
 528       check_parameters = 0;
 529    }
 530
 531    foreach_in_list(ir_rvalue, param, &ir->actual_parameters) {
 532       if (!check_parameters)
 533          break;
 534
 535       if (!param->as_constant() &&
 536           _mesa_set_search(lowerable_rvalues, param) == NULL)
 537          return false;
 538
 539       --check_parameters;
 540    }
 541
 542    return true;
 543 }
 544
 545 ir_visitor_status
 546 find_lowerable_rvalues_visitor::visit_leave(ir_call *ir)
 547 {
 548    ir_hierarchical_visitor::visit_leave(ir);
 549
 550    /* Special case for handling temporary variables generated by the compiler
 551     * for function calls. If we assign to one of these using a function call
 552     * that has a lowerable return type then we can assume the temporary
 553     * variable should have a medium precision too.
 554     */
 555
 556    /* Do nothing if the return type is void. */
 557    if (!ir->return_deref)
 558       return visit_continue;
 559
 560    ir_variable *var = ir->return_deref->variable_referenced();
 561
 562    assert(var->data.mode == ir_var_temporary);
 563
 564    unsigned return_precision = ir->callee->return_precision;
 565
 566    /* If the call is to a builtin, then the function won’t have a return
 567     * precision and we should determine it from the precision of the arguments.
 568     */
 569    if (is_lowerable_builtin(ir, lowerable_rvalues))
 570       return_precision = GLSL_PRECISION_MEDIUM;
 571
 572    can_lower_state lower_state =
 573       handle_precision(var->type, return_precision);
 574
 575    if (lower_state == SHOULD_LOWER) {
 576       /* There probably shouldn’t be any situations where multiple ir_call
 577        * instructions write to the same temporary?
 578        */
 579       assert(var->data.precision == GLSL_PRECISION_NONE);
 580       var->data.precision = GLSL_PRECISION_MEDIUM;
 581    } else {
 582       var->data.precision = GLSL_PRECISION_HIGH;
 583    }
 584
 585    return visit_continue;
 586 }
 587
 588 ir_visitor_status
 589 find_lowerable_rvalues_visitor::visit_leave(ir_assignment *ir)
 590 {
 591    ir_hierarchical_visitor::visit_leave(ir);
 592
 593    /* Special case for handling temporary variables generated by the compiler.
 594     * If we assign to one of these using a lowered precision then we can assume
 595     * the temporary variable should have a medium precision too.
 596     */
 597    ir_variable *var = ir->lhs->variable_referenced();
 598
 599    if (var->data.mode == ir_var_temporary) {
 600       if (_mesa_set_search(lowerable_rvalues, ir->rhs)) {
 601          /* Only override the precision if this is the first assignment. For
 602           * temporaries such as the ones generated for the ?: operator there
 603           * can be multiple assignments with different precisions. This way we
 604           * get the highest precision of all of the assignments.
 605           */
 606          if (var->data.precision == GLSL_PRECISION_NONE)
 607             var->data.precision = GLSL_PRECISION_MEDIUM;
 608       } else if (!ir->rhs->as_constant()) {
 609          var->data.precision = GLSL_PRECISION_HIGH;
 610       }
 611    }
 612
 613    return visit_continue;
 614 }
 615
 616 void
 617 find_lowerable_rvalues(const struct gl_shader_compiler_options *options,
 618                        exec_list *instructions,
 619                        struct set *result)
 620 {
 621    find_lowerable_rvalues_visitor v(result, options);
 622
 623    visit_list_elements(&v, instructions);
 624
 625    assert(v.stack.empty());
 626 }
 627
 628 static const glsl_type *
 629 lower_glsl_type(const glsl_type *type)
 630 {
 631    glsl_base_type new_base_type;
 632
 633    switch (type->base_type) {
 634    case GLSL_TYPE_FLOAT:
 635       new_base_type = GLSL_TYPE_FLOAT16;
 636       break;
 637    case GLSL_TYPE_INT:
 638       new_base_type = GLSL_TYPE_INT16;
 639       break;
 640    case GLSL_TYPE_UINT:
 641       new_base_type = GLSL_TYPE_UINT16;
 642       break;
 643    default:
 644       unreachable("invalid type");
 645       return NULL;
 646    }
 647
 648    return glsl_type::get_instance(new_base_type,
 649                                   type->vector_elements,
 650                                   type->matrix_columns,
 651                                   type->explicit_stride,
 652                                   type->interface_row_major);
 653 }
 654
 655 static ir_rvalue *
 656 convert_precision(bool up, ir_rvalue *ir)
 657 {
 658    unsigned new_type, op;
 659
 660    if (up) {
 661       switch (ir->type->base_type) {
 662       case GLSL_TYPE_FLOAT16:
 663          new_type = GLSL_TYPE_FLOAT;
 664          op = ir_unop_f162f;
 665          break;
 666       case GLSL_TYPE_INT16:
 667          new_type = GLSL_TYPE_INT;
 668          op = ir_unop_i2i;
 669          break;
 670       case GLSL_TYPE_UINT16:
 671          new_type = GLSL_TYPE_UINT;
 672          op = ir_unop_u2u;
 673          break;
 674       default:
 675          unreachable("invalid type");
 676          return NULL;
 677       }
 678    } else {
 679       switch (ir->type->base_type) {
 680       case GLSL_TYPE_FLOAT:
 681          new_type = GLSL_TYPE_FLOAT16;
 682          op = ir_unop_f2fmp;
 683          break;
 684       case GLSL_TYPE_INT:
 685          new_type = GLSL_TYPE_INT16;
 686          op = ir_unop_i2imp;
 687          break;
 688       case GLSL_TYPE_UINT:
 689          new_type = GLSL_TYPE_UINT16;
 690          op = ir_unop_u2ump;
 691          break;
 692       default:
 693          unreachable("invalid type");
 694          return NULL;
 695       }
 696    }
 697
 698    const glsl_type *desired_type;
 699    desired_type = glsl_type::get_instance(new_type,
 700                              ir->type->vector_elements,
 701                              ir->type->matrix_columns);
 702
 703    void *mem_ctx = ralloc_parent(ir);
 704    return new(mem_ctx) ir_expression(op, desired_type, ir, NULL);
 705 }
 706
 707 void
 708 lower_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
 709 {
 710    ir_rvalue *ir = *rvalue;
 711
 712    if (ir == NULL)
 713       return;
 714
 715    if (ir->as_dereference()) {
 716       if (!ir->type->is_boolean())
 717          *rvalue = convert_precision(false, ir);
 718    } else if (ir->type->is_32bit()) {
 719       ir->type = lower_glsl_type(ir->type);
 720
 721       ir_constant *const_ir = ir->as_constant();
 722
 723       if (const_ir) {
 724          ir_constant_data value;
 725
 726          if (ir->type->base_type == GLSL_TYPE_FLOAT16) {
 727             for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++)
 728                value.f16[i] = _mesa_float_to_half(const_ir->value.f[i]);
 729          } else if (ir->type->base_type == GLSL_TYPE_INT16) {
 730             for (unsigned i = 0; i < ARRAY_SIZE(value.i16); i++)
 731                value.i16[i] = const_ir->value.i[i];
 732          } else if (ir->type->base_type == GLSL_TYPE_UINT16) {
 733             for (unsigned i = 0; i < ARRAY_SIZE(value.u16); i++)
 734                value.u16[i] = const_ir->value.u[i];
 735          } else {
 736             unreachable("invalid type");
 737          }
 738
 739          const_ir->value = value;
 740       }
 741    }
 742 }
 743
 744 ir_visitor_status
 745 lower_precision_visitor::visit_enter(ir_dereference_record *ir)
 746 {
 747    /* We don’t want to lower the variable */
 748    return visit_continue_with_parent;
 749 }
 750
 751 ir_visitor_status
 752 lower_precision_visitor::visit_enter(ir_dereference_array *ir)
 753 {
 754    /* We don’t want to convert the array index or the variable. If the array
 755     * index itself is lowerable that will be handled separately.
 756     */
 757    return visit_continue_with_parent;
 758 }
 759
 760 ir_visitor_status
 761 lower_precision_visitor::visit_enter(ir_call *ir)
 762 {
 763    /* We don’t want to convert the arguments. These will be handled separately.
 764     */
 765    return visit_continue_with_parent;
 766 }
 767
 768 ir_visitor_status
 769 lower_precision_visitor::visit_enter(ir_texture *ir)
 770 {
 771    /* We don’t want to convert the arguments. These will be handled separately.
 772     */
 773    return visit_continue_with_parent;
 774 }
 775
 776 ir_visitor_status
 777 lower_precision_visitor::visit_leave(ir_expression *ir)
 778 {
 779    ir_rvalue_visitor::visit_leave(ir);
 780
 781    /* If the expression is a conversion operation to or from bool then fix the
 782     * operation.
 783     */
 784    switch (ir->operation) {
 785    case ir_unop_b2f:
 786       ir->operation = ir_unop_b2f16;
 787       break;
 788    case ir_unop_f2b:
 789       ir->operation = ir_unop_f162b;
 790       break;
 791    case ir_unop_b2i:
 792    case ir_unop_i2b:
 793       /* Nothing to do - they both support int16. */
 794       break;
 795    default:
 796       break;
 797    }
 798
 799    return visit_continue;
 800 }
 801
 802 void
 803 find_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
 804 {
 805    /* Checking the precision of rvalue can be lowered first throughout
 806     * find_lowerable_rvalues_visitor.
 807     * Once it found the precision of rvalue can be lowered, then we can
 808     * add conversion f2fmp, etc. through lower_precision_visitor.
 809     */
 810    if (*rvalue == NULL)
 811       return;
 812
 813    struct set_entry *entry = _mesa_set_search(lowerable_rvalues, *rvalue);
 814
 815    if (!entry)
 816       return;
 817
 818    _mesa_set_remove(lowerable_rvalues, entry);
 819
 820    /* If the entire expression is just a variable dereference then trying to
 821     * lower it will just directly add pointless to and from conversions without
 822     * any actual operation in-between. Although these will eventually get
 823     * optimised out, avoiding generating them here also avoids breaking inout
 824     * parameters to functions.
 825     */
 826    if ((*rvalue)->as_dereference())
 827       return;
 828
 829    lower_precision_visitor v;
 830
 831    (*rvalue)->accept(&v);
 832    v.handle_rvalue(rvalue);
 833
 834    /* We don’t need to add the final conversion if the final type has been
 835     * converted to bool
 836     */
 837    if ((*rvalue)->type->base_type != GLSL_TYPE_BOOL) {
 838       *rvalue = convert_precision(true, *rvalue);
 839    }
 840 }
 841
 842 ir_visitor_status
 843 find_precision_visitor::visit_enter(ir_call *ir)
 844 {
 845    ir_rvalue_enter_visitor::visit_enter(ir);
 846
 847    ir_variable *return_var =
 848       ir->return_deref ? ir->return_deref->variable_referenced() : NULL;
 849
 850    /* Don't do anything for image_load here. We have only changed the return
 851     * value to mediump/lowp, so that following instructions can use reduced
 852     * precision.
 853     *
 854     * The return value type of the intrinsic itself isn't changed here, but
 855     * can be changed in NIR if all users use the *2*mp opcode.
 856     */
 857    if (ir->callee->intrinsic_id == ir_intrinsic_image_load)
 858       return visit_continue;
 859
 860    /* If this is a call to a builtin and the find_lowerable_rvalues_visitor
 861     * overrode the precision of the temporary return variable, then we can
 862     * replace the builtin implementation with a lowered version.
 863     */
 864
 865    if (!ir->callee->is_builtin() ||
 866        return_var == NULL ||
 867        (return_var->data.precision != GLSL_PRECISION_MEDIUM &&
 868         return_var->data.precision != GLSL_PRECISION_LOW))
 869       return visit_continue;
 870
 871    ir->callee = map_builtin(ir->callee);
 872    ir->generate_inline(ir);
 873    ir->remove();
 874
 875    return visit_continue_with_parent;
 876 }
 877
 878 ir_function_signature *
 879 find_precision_visitor::map_builtin(ir_function_signature *sig)
 880 {
 881    if (lowered_builtins == NULL) {
 882       lowered_builtins = _mesa_pointer_hash_table_create(NULL);
 883       clone_ht =_mesa_pointer_hash_table_create(NULL);
 884       lowered_builtin_mem_ctx = ralloc_context(NULL);
 885    } else {
 886       struct hash_entry *entry = _mesa_hash_table_search(lowered_builtins, sig);
 887       if (entry)
 888          return (ir_function_signature *) entry->data;
 889    }
 890
 891    ir_function_signature *lowered_sig =
 892       sig->clone(lowered_builtin_mem_ctx, clone_ht);
 893
 894    /* Functions that always return mediump or lowp should keep their
 895     * parameters intact, because they can be highp. NIR can lower
 896     * the up-conversion for parameters if needed.
 897     */
 898    if (!function_always_returns_mediump_or_lowp(sig->function_name())) {
 899       foreach_in_list(ir_variable, param, &lowered_sig->parameters) {
 900          param->data.precision = GLSL_PRECISION_MEDIUM;
 901       }
 902    }
 903
 904    lower_precision(options, &lowered_sig->body);
 905
 906    _mesa_hash_table_clear(clone_ht, NULL);
 907
 908    _mesa_hash_table_insert(lowered_builtins, sig, lowered_sig);
 909
 910    return lowered_sig;
 911 }
 912
 913 find_precision_visitor::find_precision_visitor(const struct gl_shader_compiler_options *options)
 914    : lowerable_rvalues(_mesa_pointer_set_create(NULL)),
 915      lowered_builtins(NULL),
 916      clone_ht(NULL),
 917      lowered_builtin_mem_ctx(NULL),
 918      options(options)
 919 {
 920 }
 921
 922 find_precision_visitor::~find_precision_visitor()
 923 {
 924    _mesa_set_destroy(lowerable_rvalues, NULL);
 925
 926    if (lowered_builtins) {
 927       _mesa_hash_table_destroy(lowered_builtins, NULL);
 928       _mesa_hash_table_destroy(clone_ht, NULL);
 929       ralloc_free(lowered_builtin_mem_ctx);
 930    }
 931 }
 932
 933 }
 934
 935 void
 936 lower_precision(const struct gl_shader_compiler_options *options,
 937                 exec_list *instructions)
 938 {
 939    find_precision_visitor v(options);
 940
 941    find_lowerable_rvalues(options, instructions, v.lowerable_rvalues);
 942
 943    visit_list_elements(&v, instructions);
 944 }